# Project Plan

After discussing with Robin Burke, the general model will include have multiple levels for detecting for the recommender system
- Last FM to start the cluster
- use eucliedean distance, and only draw connections between "djable" nodes

## Data to Test

- [🎹 Spotify Tracks Dataset](https://www.kaggle.com/datasets/maharshipandya/-spotify-tracks-dataset)

In [132]:
## IMPORTS ##
import seaborn as sns
import pandas as pd
import networkx as nx
import numpy as np
import requests
import re
from collections import Counter
import os
from kmodes.kmodes import KModes
import spotipy
import keys

In [133]:
# looking at dataset of songs
df = pd.read_csv('dataset.csv')

In [134]:
# All the genres in the dataframe
np.unique(df.track_genre)

array(['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient',
       'anime', 'black-metal', 'bluegrass', 'blues', 'brazil',
       'breakbeat', 'british', 'cantopop', 'chicago-house', 'children',
       'chill', 'classical', 'club', 'comedy', 'country', 'dance',
       'dancehall', 'death-metal', 'deep-house', 'detroit-techno',
       'disco', 'disney', 'drum-and-bass', 'dub', 'dubstep', 'edm',
       'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk',
       'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove',
       'grunge', 'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle',
       'heavy-metal', 'hip-hop', 'honky-tonk', 'house', 'idm', 'indian',
       'indie', 'indie-pop', 'industrial', 'iranian', 'j-dance', 'j-idol',
       'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino',
       'malay', 'mandopop', 'metal', 'metalcore', 'minimal-techno', 'mpb',
       'new-age', 'opera', 'pagode', 'party', 'piano', 'pop', 'pop-film',
       'pow

[keys.py](https://github.com/schwartzadev/dj-recommender/blob/master/keys.py)


- Rules for Building Edges
    - BPM within 8 of each other
    - must be within similar key --> Camelot
    - must be at least danceability of 0.5, otherwise add and ignore
    - must be at least popularity of 0.7, otherwise add and ignore

In [135]:
# Helper Functions

def _calc_bpm_range(bpm,brange=8):
    '''
    returns list of bpm range
    '''
    return [bpm-brange,bpm+brange]

def song_id_search(_id):
    return df[df['track_id']==_id]

def find_neighbors(song,danceability=0.5,popularity=45):
    brange = _calc_bpm_range(song.tempo)
    small_df = df[(df['tempo']>brange[0]) & (df['tempo']<brange[1]) & (df['track_genre'] == song.track_genre) & (df['danceability']>danceability) & (df['popularity']>popularity) & (df['key'] == song.key) & (df['track_id'] != song.track_id)]
    return small_df['track_id'].tolist() 


In [136]:
bieb = df.iloc[30021]
print(bieb.track_name)
find_neighbors(bieb)

Family


['322TxW77VZdX9gHynK5Xue',
 '58kZ9spgxmlEznXGu6FPdQ',
 '4EZDJ5FMTj5pCJe2HBmd21',
 '1ZOVeidJCvkxOARWTHmWOL',
 '0HLhptvI8NozbOHRLNniFz']

In [138]:
song_id_search('4vP5AQEH20l7zXfkdMCtzX')

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
13303,13303,4vP5AQEH20l7zXfkdMCtzX,Barbara Tucker;Obskür,Beautiful People (Obskür Remix),Beautiful People - Obskür Remix,48,333771,False,0.832,0.677,...,-8.78,0,0.105,0.00338,0.0153,0.0494,0.384,128.017,4,chicago-house


In [139]:
song_id_search('6fSdR81YvNG8Wo6i2ytLPR')

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
13012,13012,6fSdR81YvNG8Wo6i2ytLPR,Roy Davis Jr.;Peven Everett,Gabriel,Gabriel - Live Garage Mix,52,443849,False,0.829,0.332,...,-13.837,0,0.047,0.0125,0.294,0.0918,0.448,128.627,4,chicago-house


In [140]:
# The Chainsmokers, So Far So Good
test = song_id_search('2FqkTu4FhwDWn9hzEaWWCE')
test

Unnamed: 0.1,Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,...,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
30403,30403,2FqkTu4FhwDWn9hzEaWWCE,The Chainsmokers,So Far So Good,I Love U,69,185522,False,0.651,0.719,...,-5.804,1,0.0318,0.143,3.6e-05,0.0948,0.81,103.981,4,edm
31455,31455,2FqkTu4FhwDWn9hzEaWWCE,The Chainsmokers,So Far So Good,I Love U,69,185522,False,0.651,0.719,...,-5.804,1,0.0318,0.143,3.6e-05,0.0948,0.81,103.981,4,electro
53355,53355,2FqkTu4FhwDWn9hzEaWWCE,The Chainsmokers,So Far So Good,I Love U,69,185522,False,0.651,0.719,...,-5.804,1,0.0318,0.143,3.6e-05,0.0948,0.81,103.981,4,house


In [141]:
# checking to see how many duplicates there are
len(df)-len(np.unique(df.track_id))

24259

In [142]:
# 25k duplicates?!?!! Let's drop those

In [143]:
# Drop unneed columns
df = df.drop(columns=[
    'Unnamed: 0', 
    'explicit',
    'loudness',
    'speechiness',
    'acousticness',
    'instrumentalness',
    'liveness']
            )


In [144]:
cols = list(df.columns)
cols.remove('track_genre')
print(cols)

TypeError: 'list' object is not callable

In [145]:
# combines columns that are similar except with different genre
#df = df.groupby(cols)['track_genre'].apply(','.join).reset_index()
df = df.groupby(cols)['track_genre'].apply(set).reset_index()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre
0,0000vdREvCVMxbQTkS888c,Rill,Lolly,Lolly,44,160725,0.910,0.37400,8,0,0.432,104.042,4,{german}
1,000CC8EParg64OmTxVnZ0p,Glee Cast,Glee Love Songs,It's All Coming Back To Me Now (Glee Cast Vers...,47,322933,0.269,0.51600,0,1,0.341,178.174,4,{club}
2,000Iz0K615UepwSJ5z2RE5,Paul Kalkbrenner;Pig&Dan,X,Böxig Leise - Pig & Dan Remix,22,515360,0.686,0.56000,5,0,0.108,119.997,4,{minimal-techno}
3,000RDCYioLteXcutOjeweY,Jordan Sandhu,Teeje Week,Teeje Week,62,190203,0.679,0.77000,0,1,0.839,161.721,4,{hip-hop}
4,000qpdoc97IMTBvF8gwcpy,Paul Kalkbrenner,Zeit,Tief,19,331240,0.519,0.43100,6,0,0.234,129.971,4,{minimal-techno}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90455,7zxHiMmVLt4LGWpOMqOpUh,Haricharan;Gopi Sundar,Bangalore Days,"Aethu Kari Raavilum - From ""Bangalore Days""",56,325156,0.766,0.38200,7,0,0.672,119.992,4,{pop-film}
90456,7zxpdh3EqMq2JCkOI0EqcG,Piano Genie,Disney Favourites,"Two Worlds (From ""Tarzan"")",23,109573,0.529,0.00879,10,1,0.510,82.694,4,{disney}
90457,7zyYmIdjqqiX6kLryb7QBx,Eric Chou,學著愛,以後別做朋友,61,260573,0.423,0.36000,3,1,0.291,130.576,4,{mandopop}
90458,7zybSU9tFO9HNlwmGF7stc,Stereoclip,Echoes,Sunset Drive,54,234300,0.649,0.83400,10,0,0.150,125.004,4,{electronic}


In [150]:
test = song_id_search('2FqkTu4FhwDWn9hzEaWWCE')
test

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre
26106,2FqkTu4FhwDWn9hzEaWWCE,The Chainsmokers,So Far So Good,I Love U,69,185522,0.651,0.719,8,1,0.81,103.981,4,"{edm, electro, house}"


In [151]:
# Lol, why are there still duplicates
len(df)-len(np.unique(df.track_id))

720

In [152]:
# list of unique ids
unique = np.unique(df.track_id)

# all ids
ids = df.track_id

In [153]:
# duplicates left
len(ids) - len(unique)

720

In [154]:
id_count = dict(Counter(ids))

In [155]:
dup_list = [key for key,val in id_count.items() if val > 1]

In [156]:
# list of duplicates captured
len(dup_list)

720

In [157]:
# list of duplicates
# dup_list

In [158]:
# slight difference in popularity, hm
song_id_search('00YwP3wJWiG8IxAA7OS9lo')

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre
109,00YwP3wJWiG8IxAA7OS9lo,Anupam Roy,Doorbiney Chokh Rakhbona,Amake Amar Moto Thakte Dao,46,319946,0.566,0.419,7,1,0.186,147.881,4,"{singer-songwriter, songwriter}"
110,00YwP3wJWiG8IxAA7OS9lo,Anupam Roy,Doorbiney Chokh Rakhbona,Amake Amar Moto Thakte Dao,47,319946,0.566,0.419,7,1,0.186,147.881,4,"{indie-pop, indie, indian, k-pop}"


In [159]:
# another difference in popularity
song_id_search('014SIjoLDG1Ku19c5FlDYh')

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre
189,014SIjoLDG1Ku19c5FlDYh,Creedence Clearwater Revival,Pumpkin Patch Hits,I Put A Spell On You,0,271786,0.393,0.732,4,0,0.621,100.41,4,{country}
190,014SIjoLDG1Ku19c5FlDYh,Creedence Clearwater Revival,Pumpkin Patch Hits,I Put A Spell On You,3,271786,0.393,0.732,4,0,0.621,100.41,4,{rock}


In [160]:
# ANOTHER difference in popularity, weird
song_id_search('0DqLuhTD1xI8mb2gY5YoLM')

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre
2547,0DqLuhTD1xI8mb2gY5YoLM,Håkan Hellström,Det är så jag säger det,Den fulaste flickan i världen,35,197826,0.292,0.812,9,1,0.502,108.782,4,{swedish}
2548,0DqLuhTD1xI8mb2gY5YoLM,Håkan Hellström,Det är så jag säger det,Den fulaste flickan i världen,36,197826,0.292,0.812,9,1,0.502,108.782,4,{goth}


At this point I am thinking of dropping the rows that are duplicates, and removing the one with higher popularity. I don't think it will be the end of the world, but because this data is weird, I don't want to recommend it probably.

In [161]:
df = df[~df['track_id'].isin(dup_list)]

In [162]:
# hell yeah, duplicate values are gone, totally removed for now
df[df['track_id']=='0DqLuhTD1xI8mb2gY5YoLM']

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre


### Sudo Code

I want to create a song object so that I can pass the data in a box to the visualization in D3. I'm hoping I can project the image and add the spotify link to the visualization.

1. Create Song Objects with attributes
    - Attributes
        - Song ID
        - Name
        - Artist
        - Spotify Link
        - spotify genre
        - popularity
        - key (convert to camelot)
        - tempo
        - lyrics
        - duration
        - explicit
    - Methods
        - Generate Spotify Link
        - get last fm track tag
        - from Song_ID (class method)
        - get lyrics
        - get valid tempo range
        - get neighbors (using filtering)
2. Go through entire track list
    - for a song in the song list
        - build a list of node list songs that share similar features
3. Playlist Object
    - Attributes
        - Song Objects
        - BPM range
        - Key Range
        - Genre
    - Methods
        - Create in Spotify(using Spotipy)
        - From 2 songs (class method)
            - short
            - different paths
        - Add (add to graph, use graph logic to create new playlist?)
4. SongGraph Object
   - Attributes
       - Last Update
   - Methods
       - Save to GraphML
       - From GraphML
       - Visualize
       - _add_song
       - _remove_song

Simple, just referenced when Playlist Object is implemented, contains metadata about last update, etc. It's nice because it will also have ways to save the current graph to graphml, etc.
 

In [184]:
class Song:
    def __init__(self, song_id, name, artists, popularity, key, tempo, duration, explicit=None,lyrics=None):
        self.song_id = song_id
        self.name = name
        self.artists = artists
        self.popularity = popularity
        self.key = key
        self.tempo = tempo
        self._duration = duration

    @classmethod
    def from_df_row(cls,df_row):
        return Song(df_row.track_id, df_row.track_name, df_row.artists, df_row.popularity, df_row.key, df_row.tempo, df_row.duration_ms)        

    @classmethod
    def from_spot_id(cls,_id):
        pass

    @property
    def spot_link(self):
        return f'https://open.spotify.com/track/{self.song_id}'
    
    @property
    def duration(self):
        _sec=int((self._duration/1000)%60)
        _min=int((self._duration/(1000*60))%60)
        return f'{_min}min {_sec}sec'
    
    @property
    def valid_bpm_range(self):
        pass
    
    
    @property
    def valid_keys(self):
        pass
    
    @property
    def stats(self):
        output = f'''Song ID: {self.song_id}
Song Name: {self.name}
Song Artists: {self.artists}
Song Key: {self.key}
Song Tempo: {self.tempo}
Song Duration {self.duration}'''
        print(output)
    
    def __str__(self):
        return f'Song Object: \'{self.name} by {self.artists}\''
    
    def __repr__(self):
        return f'ID: {self.song_id} | Name: {self.name} | Song Artists: {self.artists} | Song Key: {self.key} | Song Tempo: {self.tempo} | Song Duration {self.duration}'
    


In [185]:
# Creating Object

first = df.iloc[0]

song = Song.from_df_row(first)

In [186]:
song.duration

'2min 40sec'

In [187]:
song

ID: 0000vdREvCVMxbQTkS888c | Name: Lolly | Song Artists: Rill | Song Key: 8 | Song Tempo: 104.042 | Song Duration 2min 40sec

## TODOS:

- key to camelot conversion
- networkx the mf dataframe
    - convert to graphml [(link)](https://stackoverflow.com/questions/13159575/using-a-graphml-file-for-d3-js-force-directed-layout)

In [188]:
first['key']

8

In [189]:
first['mode']

0

In [190]:
df.iloc[0]

track_id          0000vdREvCVMxbQTkS888c
artists                             Rill
album_name                         Lolly
track_name                         Lolly
popularity                            44
duration_ms                       160725
danceability                        0.91
energy                             0.374
key                                    8
mode                                   0
valence                            0.432
tempo                            104.042
time_signature                         4
track_genre                     {german}
cam_key                               1A
Name: 0, dtype: object

In [191]:
key = keys.generate_camelot_key(first['mode'],first['key'])
key
keys._get_matching_keys(key)

['2A', '4A', '7A', '10A', '1A', '1B']

In [192]:
key

'1A'

In [193]:
import pandas as pd
pd.options.mode.chained_assignment = None  # default='warn'

In [194]:
# gave a warning, disabled warnings so we could get it to work

# assigned new values for all of the keys using two columns and function from my friends code
df['cam_key'] = [keys.generate_camelot_key(*a) for a in tuple(zip(df.loc[:,'mode'],df.loc[:,'key']))]

In [195]:
df

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
0,0000vdREvCVMxbQTkS888c,Rill,Lolly,Lolly,44,160725,0.910,0.37400,8,0,0.432,104.042,4,{german},1A
1,000CC8EParg64OmTxVnZ0p,Glee Cast,Glee Love Songs,It's All Coming Back To Me Now (Glee Cast Vers...,47,322933,0.269,0.51600,0,1,0.341,178.174,4,{club},8B
2,000Iz0K615UepwSJ5z2RE5,Paul Kalkbrenner;Pig&Dan,X,Böxig Leise - Pig & Dan Remix,22,515360,0.686,0.56000,5,0,0.108,119.997,4,{minimal-techno},4A
3,000RDCYioLteXcutOjeweY,Jordan Sandhu,Teeje Week,Teeje Week,62,190203,0.679,0.77000,0,1,0.839,161.721,4,{hip-hop},8B
4,000qpdoc97IMTBvF8gwcpy,Paul Kalkbrenner,Zeit,Tief,19,331240,0.519,0.43100,6,0,0.234,129.971,4,{minimal-techno},11A
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90455,7zxHiMmVLt4LGWpOMqOpUh,Haricharan;Gopi Sundar,Bangalore Days,"Aethu Kari Raavilum - From ""Bangalore Days""",56,325156,0.766,0.38200,7,0,0.672,119.992,4,{pop-film},6A
90456,7zxpdh3EqMq2JCkOI0EqcG,Piano Genie,Disney Favourites,"Two Worlds (From ""Tarzan"")",23,109573,0.529,0.00879,10,1,0.510,82.694,4,{disney},6B
90457,7zyYmIdjqqiX6kLryb7QBx,Eric Chou,學著愛,以後別做朋友,61,260573,0.423,0.36000,3,1,0.291,130.576,4,{mandopop},5B
90458,7zybSU9tFO9HNlwmGF7stc,Stereoclip,Echoes,Sunset Drive,54,234300,0.649,0.83400,10,0,0.150,125.004,4,{electronic},3A


In [196]:
second = df.iloc[1]

In [197]:
key = keys.generate_camelot_key(second['mode'],second['key'])

In [198]:
key = '12A'

In [199]:
keys._get_matching_keys(key)

['1A', '3A', '6A', '9A', '11A', '12A', '12B']

In [200]:
#df['valid_cam_keys'] = df['cam_key'].map(keys._get_matching_keys)
#df.drop('valid_cam_keys', axis=1, inplace=True)

In [201]:
df[df['cam_key'] == "8B"]

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
1,000CC8EParg64OmTxVnZ0p,Glee Cast,Glee Love Songs,It's All Coming Back To Me Now (Glee Cast Vers...,47,322933,0.269,0.516,0,1,0.341,178.174,4,{club},8B
3,000RDCYioLteXcutOjeweY,Jordan Sandhu,Teeje Week,Teeje Week,62,190203,0.679,0.770,0,1,0.839,161.721,4,{hip-hop},8B
23,006rHBBNLJMpQs8fRC2GDe,Calcinha Preta;Gusttavo Lima,CP 25 Anos (Ao Vivo em Aracaju),Agora Estou Sofrendo - Ao Vivo,47,260510,0.605,0.678,0,1,0.439,125.059,4,"{sertanejo, pagode, forro}",8B
33,009MGoCC568mI1yvsbmTxw,Justnormal,Guessing Game,Park Bench,8,128984,0.805,0.503,0,1,0.515,97.964,4,{study},8B
38,00BYitnjj9tACCkLapk5uS,Silverchair,Neon Ballroom,Satin Sheets,25,144333,0.345,0.844,0,1,0.384,161.465,4,{grunge},8B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90421,7zo6hLIhyY7zGCR0sWKvth,Anitta;Missy Elliott,Daily Pop Mix,Lobby,0,157837,0.759,0.711,0,1,0.436,111.902,4,"{funk, pagode}",8B
90427,7zqdIo8k7YXvr0GNJN1xQT,Adriana Arydes,Adriana Arydes (Ao Vivo),Halellujah - Ao Vivo,45,244800,0.280,0.550,0,1,0.258,179.555,3,{brazil},8B
90429,7zr6d9QDDmpo0w0N5LPBjs,Thievery Corporation,Sounds From The Thievery Hi-Fi,Shaolin Satellite,49,384613,0.770,0.573,0,1,0.533,102.017,4,{trip-hop},8B
90434,7zsw78LtXUD7JfEwH64HK2,Pat Carroll;Disney,The Little Mermaid Special Edition,Poor Unfortunate Souls,63,291693,0.384,0.180,0,1,0.368,73.603,4,{disney},8B


In [241]:
def find_intersection(song_specific_genres,valid_set_of_genres):
    
    '''
    A function to find the 'genre intersection' between a target song and other 'valid' songs
    
    
    @param valid_set_of_genres: set of genres from dataframe
    
    @return: int indicating amount of genres in common 
    '''
    
    # just to make sure the input is a set
    song_specific_genres = set(song_specific_genres)
    valid_set_of_genres = set(valid_set_of_genres)
    
    # Return number of common genres
    return len(song_specific_genres & valid_set_of_genres)

In [302]:
def updated_find_neighbors(song,danceability=0.5,popularity=45):
    '''
    ** Need to convert to using track_id instead of row **
    
    function used to filter to valid DJ songs that work (hypothetically)
    
    @param song: row of df to match, e.g. df.iloc[0]
    @param danceability: float from 0 -> 1 indicating spotifies danceability metric
    @param danceability: float from 0 -> 1 indicating spotifies danceability metric    
    
    @return: list of valid ids
    '''
    
    # getting song from id
#     track_id = _track_id
#     song = df[df['track_id']==_track_id]
    
    
    # quick calculations to help with finding neighbors 
    cam_key = keys.generate_camelot_key(song.mode,song.key)
    valid_cam_keys = keys._get_matching_keys(cam_key)
    brange = _calc_bpm_range(song.tempo)
    
        
    # filtering rules for creating small df
    small_df = df[(df['tempo']>brange[0]) & (df['tempo']<brange[1]) & (df['danceability']>danceability) & (df['popularity']>popularity) & (df['cam_key'].isin(valid_cam_keys)) & (df['track_id'] != song.track_id)]
    
    # apply genre filtering rule with genres (create new column, make) #should I weight the amount of genres??
    small_df['weight'] = small_df.apply(lambda x: find_intersection(song_specific_genres = x['track_genre'],valid_set_of_genres = {'edm'}), axis=1)
    
    small_df = small_df[small_df['weight']>0]
    
    # return a list of valid track ids
    return small_df['track_id'].tolist()



## Takeaways

We might have to use NLP here, it is hard for us to process the difference between chicago house and progressive-house. Because these are classified as EDM, it might make sense to use a Kmode function to cluster these songs.

In [303]:
#should I use a set for genre checking? for now, we can just look at the first genre in the dataset

In [305]:
bieb

index                                   30505
track_id               2d4NfufMbawr8n1gBSyGOI
artists           Mark Farina;Homero Espinosa
album_name                   Somebody To Love
track_name                   Somebody To Love
popularity                                  7
duration_ms                            410269
danceability                            0.806
energy                                  0.666
key                                        11
mode                                        0
valence                                 0.257
tempo                                 123.446
time_signature                              4
track_genre                   {chicago-house}
cam_key                                   10A
Name: 30021, dtype: object

In [297]:
keys._get_matching_keys('10A')

['1A', '4A', '7A', '9A', '11A', '10A', '10B']

In [298]:
listy = [genres for genres in df['track_genre'][4114]]

In [299]:
listy

['psych-rock']

In [300]:
df[df['cam_key'].isin(keys._get_matching_keys('10A'))]

Unnamed: 0,index,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
0,0,0000vdREvCVMxbQTkS888c,Rill,Lolly,Lolly,44,160725,0.9100,0.37400,8,0,0.432,104.042,4,{german},1A
2,2,000Iz0K615UepwSJ5z2RE5,Paul Kalkbrenner;Pig&Dan,X,Böxig Leise - Pig & Dan Remix,22,515360,0.6860,0.56000,5,0,0.108,119.997,4,{minimal-techno},4A
4,4,000qpdoc97IMTBvF8gwcpy,Paul Kalkbrenner,Zeit,Tief,19,331240,0.5190,0.43100,6,0,0.234,129.971,4,{minimal-techno},11A
5,5,0017XiMkqbTfF2AUOzlhj6,Chad Daniels,Busy Being Awesome,Thanksgiving Chicken,24,127040,0.5360,0.78000,5,0,0.452,173.912,3,{comedy},4A
7,7,001YQlnDSduXd5LgBd66gT,Soda Stereo,Soda Stereo (Remastered),El Tiempo Es Dinero - Remasterizado 2007,38,177266,0.5540,0.92100,2,1,0.700,183.571,1,"{punk-rock, ska}",10B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
89004,90444,7zumacGldlmxpoP8bpaeLe,Squeeze,Greatest Hits,Slap & Tickle,24,256839,0.5850,0.80500,11,0,0.919,131.820,4,"{synth-pop, power-pop}",10A
89005,90445,7zuuWZo0MyOdG3VHg1Mgml,BABYMETAL,BABYMETAL,Headbangeeeeerrrrr!!!!!,55,240441,0.3710,0.98500,2,1,0.275,185.009,4,{j-idol},10B
89010,90450,7zwWkmiQaVt3AFXUnhBlZk,Study Music & Sounds,Study Music for Focus and Ambient Alpha Waves ...,Study Music for Focus (Alpha Waves),42,124849,0.0985,0.00755,2,1,0.039,71.227,4,{sleep},10B
89011,90451,7zwddyEol0Hg5K8HQd3srX,Sayulita,La Caoba,La Caoba,66,159744,0.0630,0.02950,8,0,0.117,65.408,3,{sleep},1A


In [323]:
# one additional recommendation!
bieb = df.iloc[30021]
print(bieb.track_name)
len(updated_find_neighbors(bieb))

Somebody To Love


45

In [257]:
# how to filter songs by genre
df[df['track_genre'] & {"edm"}]

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
431,02LmGMJG5yi2K1RsBOj5cR,ILLENIUM;Sueco;Trippie Redd,Gaming Songs 2022: Hard Mode,Story of My Life,0,191124,0.521,0.892,3,1,0.369,96.991,4,"{dubstep, edm, house, dub}",5B
519,02shCNmb6IvgB5jLqKjtkK,Vicetone;Cozi Zuehlsdorff,Nevada,Nevada,71,208561,0.605,0.783,3,1,0.105,124.015,4,"{edm, electro, progressive-house, house}",5B
551,0343bYQtSfznf6v90jRLRK,John De Sohn;LIAMOO,Made For You,Forever Young,65,167911,0.653,0.621,8,1,0.584,121.980,4,"{electro, progressive-house, swedish, edm, house}",4B
592,03FrsymZUDrFC3aBVrvKA1,MEDUZA;James Carter;Elley Duhé;FAST BOY,Ballermann Hits Party 2023,Bad Memories,0,148629,0.607,0.767,5,0,0.662,123.998,3,"{edm, electro, house}",4A
664,03aiQ6cX3yeHwiIA8rvDLu,Becky Hill;Shift K3Y,pov: it's 2020,Better Off Without You,0,198742,0.682,0.855,7,1,0.304,124.005,4,"{house, dance, edm}",9B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90064,7xnElMPFCmSlMJ0AY2hnxc,Whales;Fraxo;Lox Chatterbox,Dead To Me Slow + Reverb,Dead To Me - Slow + Reverb,73,267227,0.682,0.543,1,1,0.112,127.526,4,"{dubstep, edm, dub}",3B
90094,7xxk2elVJN3IjhbV3T8Shz,Zaeden,days,days,43,150000,0.811,0.499,7,1,0.373,144.028,4,"{indie, indie-pop, edm, indian}",9B
90118,7y3c1oJMY1CwwtOZ84Qovu,Regard;Years & Years,Hallucination,Hallucination,74,174794,0.687,0.788,6,0,0.669,123.020,4,"{edm, house}",11A
90222,7yfhESjiagb2x1WEWlgpbb,Jim Yosef;Anna Yvette,Linked,Linked,55,223218,0.613,0.905,6,1,0.251,99.971,3,{edm},2B


In [258]:
song_id_search('03FrsymZUDrFC3aBVrvKA1')

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
592,03FrsymZUDrFC3aBVrvKA1,MEDUZA;James Carter;Elley Duhé;FAST BOY,Ballermann Hits Party 2023,Bad Memories,0,148629,0.607,0.767,5,0,0.662,123.998,3,"{edm, electro, house}",4A


In [264]:
df = df.reset_index()

In [265]:
song_id_search('03FrsymZUDrFC3aBVrvKA1')

Unnamed: 0,index,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
586,592,03FrsymZUDrFC3aBVrvKA1,MEDUZA;James Carter;Elley Duhé;FAST BOY,Ballermann Hits Party 2023,Bad Memories,0,148629,0.607,0.767,5,0,0.662,123.998,3,"{edm, electro, house}",4A


In [324]:
meduza = df.iloc[586]

In [325]:
meduza.track_name

'Bad Memories'

In [326]:
song_id_search('0TDLuuLlV54CkRRUOahJb4')

Unnamed: 0,index,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
5315,5383,0TDLuuLlV54CkRRUOahJb4,David Guetta;Sia,Nothing but the Beat (Ultimate Edition),Titanium (feat. Sia),80,245040,0.604,0.787,0,0,0.301,126.062,4,"{dance, edm}",5A


In [285]:
song.track_name

586    Bad Memories
Name: track_name, dtype: object

In [278]:
#df['edges'] = df['cam_key'].map(keys._get_matching_keys)

In [357]:
meduza_df = df[df['track_id'] == '0343bYQtSfznf6v90jRLRK']
meduza_df

Unnamed: 0,index,track_id,artists,album_name,track_name,popularity,duration_ms,danceability,energy,key,mode,valence,tempo,time_signature,track_genre,cam_key
545,551,0343bYQtSfznf6v90jRLRK,John De Sohn;LIAMOO,Made For You,Forever Young,65,167911,0.653,0.621,8,1,0.584,121.98,4,"{electro, progressive-house, swedish, edm, house}",4B


In [352]:
meduza_series = df.iloc[586]
type(meduza_series)

pandas.core.series.Series

In [353]:
updated_find_neighbors(meduza_df)

TypeError: unhashable type: 'Series'

In [359]:
len(updated_find_neighbors(meduza_series))

60

In [358]:
meduza_series

index                                                 592
track_id                           03FrsymZUDrFC3aBVrvKA1
artists           MEDUZA;James Carter;Elley Duhé;FAST BOY
album_name                     Ballermann Hits Party 2023
track_name                                   Bad Memories
popularity                                              0
duration_ms                                        148629
danceability                                        0.607
energy                                              0.767
key                                                     5
mode                                                    0
valence                                             0.662
tempo                                             123.998
time_signature                                          3
track_genre                         {edm, electro, house}
cam_key                                                4A
Name: 586, dtype: object