In [1]:
import pandas as pd
from sklearn.neighbors import NearestNeighbors

In [2]:
df = pd.read_csv('dataset.csv')
df.drop(columns=df.columns[0], axis=1, inplace=True)
df.drop_duplicates(['track_id'], inplace=True)
df.shape

(89741, 20)

In [3]:
df.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [4]:
df.columns

Index(['track_id', 'artists', 'album_name', 'track_name', 'popularity',
       'duration_ms', 'explicit', 'danceability', 'energy', 'key', 'loudness',
       'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness',
       'valence', 'tempo', 'time_signature', 'track_genre'],
      dtype='object')

In [5]:
selected_features = ['danceability', 'energy', 'loudness', 'speechiness',
                     'acousticness', 'instrumentalness', 'liveness', 'valence']
X = df[selected_features]
X_normalized = (X - X.mean()) / X.std()

In [6]:
df[selected_features]

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence
0,0.676,0.4610,-6.746,0.1430,0.0322,0.000001,0.3580,0.7150
1,0.420,0.1660,-17.235,0.0763,0.9240,0.000006,0.1010,0.2670
2,0.438,0.3590,-9.734,0.0557,0.2100,0.000000,0.1170,0.1200
3,0.266,0.0596,-18.515,0.0363,0.9050,0.000071,0.1320,0.1430
4,0.618,0.4430,-9.681,0.0526,0.4690,0.000000,0.0829,0.1670
...,...,...,...,...,...,...,...,...
113995,0.172,0.2350,-16.393,0.0422,0.6400,0.928000,0.0863,0.0339
113996,0.174,0.1170,-18.318,0.0401,0.9940,0.976000,0.1050,0.0350
113997,0.629,0.3290,-10.895,0.0420,0.8670,0.000000,0.0839,0.7430
113998,0.587,0.5060,-10.889,0.0297,0.3810,0.000000,0.2700,0.4130


In [7]:
knn_model = NearestNeighbors(n_neighbors=11, algorithm='brute', metric='euclidean')
knn_model.fit(X_normalized)

In [8]:
def recommend_songs(features):
    normalized_features = (features - X.mean()) / X.std()
    _, indices = knn_model.kneighbors([normalized_features])
    recommended_songs = df.iloc[indices[0]]
    return recommended_songs

In [9]:
example_features = [0.676,0.4610,-6.746,0.1430,	0.0322,	0.000001,0.3580,0.7150]
recommended_songs = recommend_songs(example_features)
recommended_songs



Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
67860,2snbAel9GoxA6i6iH0Wvw9,Ozuna,OzuTochi,Te Pienso,66,147760,False,0.668,0.533,2,-7.409,1,0.098,0.117,0.0,0.305,0.676,163.733,4,latin
111951,6aBFjYfhZrgJtxiKaip1BH,Gramatik,"Beatz & Pieces, Vol. 1",Skylight,45,226333,False,0.667,0.608,0,-6.071,1,0.11,0.0983,0.00186,0.336,0.684,90.045,4,trip-hop
18213,5DPHLBx7WCVwKBe0eTNp5V,Aunty Donna,The Album,Fuccboi Anthem,25,202300,True,0.697,0.642,2,-7.038,1,0.161,0.0915,0.0,0.333,0.768,159.899,5,comedy
32700,7KAQwDyAcdiHZhQU01KLkN,TroyBoi,Left Is Right,ili,24,245000,False,0.634,0.588,9,-6.753,1,0.106,0.0575,0.00231,0.441,0.77,174.006,4,electronic
357,3IORLAm07yxo4T7B8cRET3,Jason Mraz,Look For The Good,Look For The Good (Single Version),21,239697,False,0.71,0.615,0,-8.23,1,0.133,0.0384,0.0,0.283,0.746,90.263,4,acoustic
39173,54XZSt4qoPwUh0s9q0A7oA,Trille,Blink 182,Blink 182,53,160076,True,0.73,0.629,10,-7.507,1,0.142,0.00976,0.0507,0.358,0.631,91.498,4,german
63905,007t1Fel5tcxHOEfSYWuGM,MFÖ,Ve MFÖ,Bu Aşk Olur Mu,33,198243,False,0.693,0.613,4,-6.304,0,0.114,0.142,0.0,0.295,0.764,71.967,4,j-rock
89881,4v7riPZWyAclrOfUSwD0zp,Camilo,De Adentro Pa Afuera,Aeropuerto,78,177626,False,0.69,0.58,0,-4.395,1,0.199,0.0328,0.0,0.328,0.708,197.635,4,reggaeton
99464,34evGV6SaoRLHTcXXUVOpb,Jax,90s Kids,90s Kids,64,163815,True,0.714,0.657,0,-6.436,1,0.118,0.0802,0.0,0.35,0.725,151.898,4,singer-songwriter


In [10]:
df['track_genre'].unique()

array(['acoustic', 'afrobeat', 'alt-rock', 'alternative', 'ambient',
       'anime', 'black-metal', 'bluegrass', 'blues', 'brazil',
       'breakbeat', 'british', 'cantopop', 'chicago-house', 'children',
       'chill', 'classical', 'club', 'comedy', 'country', 'dance',
       'dancehall', 'death-metal', 'deep-house', 'detroit-techno',
       'disco', 'disney', 'drum-and-bass', 'dub', 'dubstep', 'edm',
       'electro', 'electronic', 'emo', 'folk', 'forro', 'french', 'funk',
       'garage', 'german', 'gospel', 'goth', 'grindcore', 'groove',
       'grunge', 'guitar', 'happy', 'hard-rock', 'hardcore', 'hardstyle',
       'heavy-metal', 'hip-hop', 'honky-tonk', 'house', 'idm', 'indian',
       'indie-pop', 'indie', 'industrial', 'iranian', 'j-dance', 'j-idol',
       'j-pop', 'j-rock', 'jazz', 'k-pop', 'kids', 'latin', 'latino',
       'malay', 'mandopop', 'metal', 'metalcore', 'minimal-techno', 'mpb',
       'new-age', 'opera', 'pagode', 'party', 'piano', 'pop-film', 'pop',
       'pow

In [12]:
genre_df = df[(df['track_genre'] == 'comedy') & (df['popularity'] > 30)]
genre_df.sample(10)

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
18045,6JwC7ufMUMXNhg5d0QyKft,Jim Norton,Despicable,An Importanta Relationship Lesson,33,82680,True,0.553,0.773,6,-11.262,0,0.94,0.863,0.0,0.755,0.364,120.517,5,comedy
18014,2SmXJEhm1yJvXK6Hx4XAYT,Pink Guy,Pink Season,Help,51,165040,True,0.837,0.554,2,-10.165,1,0.109,0.734,5.6e-05,0.129,0.676,117.012,4,comedy
18816,1MlmOUe89Q5YhuMRZgragu,Andy Griffith,American Originals (Remastered),The Fishin' Hole - Remastered 1993,44,116533,False,0.625,0.34,0,-10.678,1,0.0826,0.687,0.0,0.111,0.921,172.018,3,comedy
18951,2eUOoOf6q0e9FIK01SJMjT,Lil Dicky;T-Pain,Professional Rapper,Personality (feat. T Pain),47,258836,True,0.768,0.709,5,-4.87,0,0.196,0.227,0.0,0.504,0.824,95.972,4,comedy
18750,5HRO58hfVhTpGR87AzHaBa,Afroman,The Good Times,She Won't Let Me Fuck,52,361173,True,0.933,0.448,7,-8.391,1,0.347,0.239,0.0,0.0579,0.774,99.968,4,comedy
18032,2M3K38uN5Y7NIut1k0zqfF,Jim Norton,Despicable,Fuck Florida,31,72373,True,0.585,0.662,9,-11.179,1,0.954,0.857,0.0,0.688,0.519,129.887,3,comedy
18016,3IQSYqnKsCTMLWn2xE3HFT,The Lonely Island;T-Pain,Incredibad,I'm On A Boat,59,156266,True,0.623,0.871,0,-2.403,1,0.239,0.0465,0.0,0.111,0.653,150.079,4,comedy
18150,7zvKFw17XyoBUx9mHiwzPy,Pink Guy,Pink Season,Rice Balls,46,196173,True,0.717,0.587,7,-8.97,1,0.368,0.0795,1.9e-05,0.192,0.336,112.44,5,comedy
18053,3ji3RdsWWyuuZyehLmYj0b,Lil Dicky;Brain,Professional Rapper,Pillow Talking (feat. Brain),55,645876,True,0.919,0.362,8,-8.331,1,0.368,0.576,0.0,0.134,0.416,104.987,4,comedy
18550,7rE9wYXPgLqZsXVxn2JhU9,Pink Guy,Pink Season,Pink Life,41,190640,True,0.589,0.745,2,-7.883,1,0.41,0.248,0.0,0.246,0.328,131.22,5,comedy
