In [None]:
!pip install annoy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting annoy
  Downloading annoy-1.17.2.tar.gz (647 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.4/647.4 kB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: annoy
  Building wheel for annoy (setup.py) ... [?25l[?25hdone
  Created wheel for annoy: filename=annoy-1.17.2-cp39-cp39-linux_x86_64.whl size=582234 sha256=e6e0329c2dc3084070dad89aba53d466fe1b2563ae749c50e98cdda0d27c0acc
  Stored in directory: /root/.cache/pip/wheels/f2/2e/e4/f3ae385c375b87982a2a70055061d4a6330ef4f60817e717e3
Successfully built annoy
Installing collected packages: annoy
Successfully installed annoy-1.17.2


In [None]:
import pandas as pd
import numpy as np
import random

from annoy import AnnoyIndex
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import cosine_distances


In [None]:
data = pd.read_csv('tracks_features.csv')
data['id']= data.index
#features = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 'speechiness', 'valence']
features = ['acousticness','danceability', 'energy', 'instrumentalness', 'liveness','speechiness', 'valence']
data_features = data[features].values

In [None]:
scaler = StandardScaler()
scaled_data_features = scaler.fit_transform(data_features)

In [None]:
n_trees = 10
n_dimensions = len(features)
index = AnnoyIndex(n_dimensions, 'angular')

for i, feature_vector in enumerate(scaled_data_features):
    index.add_item(i, feature_vector)

index.build(n_trees)

True

In [None]:
n_neighbors = 20
nearest_neighbors = {}
for i in range(len(scaled_data_features)):
    nearest_neighbors[i] = index.get_nns_by_item(i, n_neighbors)

In [None]:
track_id_to_index = {track_id: index for index, track_id in enumerate(data['id'])}
index_to_track_id = {index: track_id for track_id, index in track_id_to_index.items()}


In [None]:
seed_track_id = random.choice(data['id'].values) # Replace this with the id of your seed track
playlist_length = 50

seed_index = track_id_to_index[seed_track_id]
playlist_indices = [seed_index]

for i in range(playlist_length - 1):
    last_index = playlist_indices[-1]
    # Choose the first nearest neighbor that is not already in the playlist
    for neighbor in nearest_neighbors[last_index]:
        if neighbor not in playlist_indices:
            next_index = neighbor
            break
    playlist_indices.append(next_index)

# Convert the integer indices back to track IDs
playlist_track_ids = [index_to_track_id[index] for index in playlist_indices]

print("Generated Playlist:")
for i, track_id in enumerate(playlist_track_ids, start=1):
    print(f"{i}. {track_id}")


Generated Playlist:
1. 5gjXQKpoOwBQw8JEkdHH6W
2. 0akwcgWUz7pZkcPcUICRDs
3. 7oranklaTTX2UrIcxGHXi1
4. 0ciLIpQWuC3aHDZykpZ1dG
5. 4eiO5FmSmbVpLvGDliv68E
6. 66O3bJniDGLeMktjqKslLR
7. 5RuSGDiTcAB9BpB4RoyUM6
8. 4Hv84dG21gxsjvQpbRJaV4
9. 404GyiQgr8XZN4rV0bheCm
10. 2oalBSh5nkKE8wqTRnU6sj
11. 5nTApmXp3FaCDEwcCxt07y
12. 05ab5MZw1beOMcRH25St2W
13. 3TVTloSgya9jdU4YQQ2rka
14. 4pk4vM60G4UdvMWWBhG3OD
15. 0GHQk6q6TMTdeZvEUh1cOQ
16. 6BJnIWjduL82rWWei8F8yk
17. 7favZ8Oq42dbrDaUcZxRWL
18. 4pCDyKBSjfj3WuqNC2UTgR
19. 3voeG8mV9nydaNoAqX8TCq
20. 6wMySRMOVEsenA3AlggLT8
21. 4R1fzfWG1gzXw5qLJZgdfi
22. 5nTf9Qo0o9f1NSmBd5NCmO
23. 0xaUwO0jWmhnx2IZjbyWSx
24. 6CWniTfDkdXBiZt78WOA2c
25. 6iKfeyWPxeThQbimP7oTRZ
26. 5GZkRyxGukCkJ5ffcr8Hte
27. 6ieqsvPBIRw9TuN16bSzID
28. 3HnNpCEpYYowyxwXdnfCGL
29. 3GjXTvJ9gMx9CDx2ekDXeD
30. 660AbLGVCNOkxMAs8ngIei
31. 1XMj3ErRq2KKJpIBYccw9H
32. 6wmLipxvkbf6PrcZ8onQUq
33. 0sb8531hZkOyNuNbDROuHD
34. 6Gkhm9KC9ERna376oEPtKl
35. 3xKXEPiZOqPCqG4R6RrxWY
36. 3L8d8gS3YheMrtR9GEXnuT
37. 1TKMwyVYqZikZ

#### Transition Smoothness

In [None]:
def pairwise_cosine_similarity(playlist, data, data_features):
    similarities = []
    for i in range(len(playlist) - 1):
        track_1_id = playlist[i]
        track_2_id = playlist[i + 1]
        
        track_1_index = data[data['id'] == track_1_id].index[0]
        track_2_index = data[data['id'] == track_2_id].index[0]
        
        track_1_features = data_features[track_1_index]
        track_2_features = data_features[track_2_index]
        
        similarity = cosine_similarity([track_1_features], [track_2_features])[0][0]
        similarities.append(similarity)
    return similarities

In [None]:
similarities = pairwise_cosine_similarity(playlist_track_ids, data, scaled_data_features)

In [None]:
average_similarity = np.mean(similarities)
print("Average Cosine Similarity:", average_similarity)

Average Cosine Similarity: 0.9984656308903094


In [None]:
# Assuming playlist_track_ids contains the track IDs of the generated playlist
playlist_indices = [track_id_to_index[track_id] for track_id in playlist_track_ids]

#### Intra-list Diversity 

In [None]:
def intra_list_diversity(playlist, features):
    pairwise_distances = cosine_distances(features[playlist])
    avg_distance = np.mean(pairwise_distances)
    return avg_distance


In [None]:
# Calculate Intra-list diversity
diversity_score = intra_list_diversity(playlist_indices, scaled_data_features)
print(f"Intra-list Diversity: {diversity_score}")

Intra-list Diversity: 0.017547747941776784


#### Artist diversity

In [None]:
def artist_diversity(playlist, track_to_artist):
    unique_artists = len(set([track_to_artist[track] for track in playlist]))
    artist_diversity_score = unique_artists / len(playlist)
    return artist_diversity_score

In [None]:
track_to_artist = dict(zip(data['id'], data['artist_ids']))
artist_diversity_score = artist_diversity(playlist_track_ids, track_to_artist)
print(f"Artist Diversity: {artist_diversity_score}")

Artist Diversity: 0.98
