In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler,StandardScaler

In [2]:
df = pd.read_csv ('sussy.csv', encoding = 'utf-8')
print(df.head())
print('There are', df.shape[0], 'songs and', df.shape[1], 'features')

   acousticness                                       analysis_url  \
0        0.0140  https://api.spotify.com/v1/audio-analysis/14gm...   
1        0.4590  https://api.spotify.com/v1/audio-analysis/1Fhb...   
2        0.7650  https://api.spotify.com/v1/audio-analysis/6D6H...   
3        0.0935  https://api.spotify.com/v1/audio-analysis/2hA6...   
4        0.1940  https://api.spotify.com/v1/audio-analysis/7DF8...   

                    artist                                       artist_links  \
0                    keshi  https://open.spotify.com/artist/3pc0bOVB5whxmD...   
1                    keshi  https://open.spotify.com/artist/3pc0bOVB5whxmD...   
2                    keshi  https://open.spotify.com/artist/3pc0bOVB5whxmD...   
3               Juice WRLD  https://open.spotify.com/artist/4MCBfE4596Uoi2...   
4  Juice WRLD | SUGA | BTS  https://open.spotify.com/artist/4MCBfE4596Uoi2...   

   danceability  duration_ms  energy                      id  \
0         0.622       193373

In [3]:
print('There are', df.isnull().sum().sum(), 'NA values in the dataset')

There are 0 NA values in the dataset


In [4]:
df.nunique()

acousticness        518
analysis_url        613
artist              406
artist_links        406
danceability        375
duration_ms         584
energy              378
id                  613
instrumentalness    347
key                  12
liveness            392
loudness            578
mode                  2
name                596
release_date        395
popularity           80
speechiness         427
tempo               597
time_signature        4
track_href          613
type                  1
uri                 613
valence             429
dtype: int64

In [5]:
df = df.drop(columns = 'type')

In [6]:
print(df.info())
df = df.convert_dtypes()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 613 entries, 0 to 612
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   acousticness      613 non-null    float64
 1   analysis_url      613 non-null    object 
 2   artist            613 non-null    object 
 3   artist_links      613 non-null    object 
 4   danceability      613 non-null    float64
 5   duration_ms       613 non-null    int64  
 6   energy            613 non-null    float64
 7   id                613 non-null    object 
 8   instrumentalness  613 non-null    float64
 9   key               613 non-null    int64  
 10  liveness          613 non-null    float64
 11  loudness          613 non-null    float64
 12  mode              613 non-null    int64  
 13  name              613 non-null    object 
 14  release_date      613 non-null    object 
 15  popularity        613 non-null    int64  
 16  speechiness       613 non-null    float64
 1

Select all relevant features

In [7]:
dropfeatures = ['analysis_url','artist','artist_links','uri','track_href','release_date','id','name','time_signature','key','mode','duration_ms']
libraryFeatures = df.drop(columns = dropfeatures)

In [8]:
libraryFeatures

Unnamed: 0,acousticness,danceability,energy,instrumentalness,liveness,loudness,popularity,speechiness,tempo,valence
0,0.014,0.622,0.671,0.0,0.304,-6.426,69,0.026,84.996,0.572
1,0.459,0.711,0.747,0.0,0.0957,-6.335,76,0.0523,136.0,0.852
2,0.765,0.691,0.389,0.0,0.0941,-8.247,72,0.0367,104.952,0.306
3,0.0935,0.619,0.622,0.0,0.226,-5.874,76,0.0461,150.062,0.194
4,0.194,0.603,0.771,0.0,0.13,-6.336,86,0.0627,139.988,0.332
...,...,...,...,...,...,...,...,...,...,...
608,0.65,0.709,0.548,0.000002,0.133,-8.493,85,0.353,83.995,0.543
609,0.00199,0.768,0.826,0.000374,0.031,-4.904,65,0.0335,114.997,0.926
610,0.00122,0.543,0.868,0.0,0.116,-3.836,57,0.0367,116.145,0.308
611,0.00647,0.695,0.818,0.000005,0.0219,-5.379,74,0.0334,119.965,0.916


In [9]:
current_track = pd.read_csv ('userTrack.csv')

In [10]:
track_features = current_track.drop(columns = dropfeatures)
track_features = track_features.drop(columns = ['type'])

In [11]:
concation = pd.concat([track_features,libraryFeatures])

In [12]:
standardscaler = StandardScaler()
features_scaled = standardscaler.fit_transform(concation)
tracks_scaled = pd.DataFrame(features_scaled)

In [13]:
tracks_scaled.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
count,614.0,614.0,614.0,614.0,614.0,614.0,614.0,614.0,614.0,614.0
mean,1.303698e-16,-3.348751e-16,1.171701e-16,1.352519e-16,-3.513521e-17,3.732085e-16,1.343478e-16,4.4119610000000005e-17,8.57801e-16,-3.409777e-16
std,1.000815,1.000815,1.000815,1.000815,1.000815,1.000815,1.000815,1.000815,1.000815,1.000815
min,-0.693709,-2.745944,-2.860847,-0.3379782,-1.123817,-3.626736,-2.628736,-0.8425657,-2.006568,-1.917139
25%,-0.6646805,-0.6804822,-0.6802201,-0.3379782,-0.657232,-0.633797,-0.3819917,-0.6491468,-0.7254368,-0.7633004
50%,-0.465362,0.0207547,0.007151272,-0.3379469,-0.3932079,0.1341687,0.2952566,-0.421906,-0.04887048,-0.06886069
75%,0.2211977,0.7713969,0.8826609,-0.331046,0.4141909,0.7158094,0.7252555,0.2429054,0.7605186,0.7637702
max,4.414888,2.092591,1.589291,4.30685,4.451185,2.371463,1.542253,3.984451,2.622769,2.413936


In [14]:
tracks_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.572671,1.219232,1.411522,3.360867,-0.446929,1.936279,-1.123740,0.528278,-0.012287,-0.322018
1,-0.618081,0.001630,-0.336534,-0.337978,0.659097,-0.339606,0.338256,-0.824598,-1.543638,0.565193
2,1.787581,0.568994,0.113812,-0.337978,-0.657390,-0.297296,0.639256,-0.546624,0.272336,1.865816
3,3.441812,0.441497,-2.007558,-0.337978,-0.667502,-1.186263,0.467256,-0.711506,-0.833114,-0.670399
4,-0.188305,-0.017495,-0.626890,-0.337978,0.166125,-0.082959,0.639256,-0.612154,0.773007,-1.190648
...,...,...,...,...,...,...,...,...,...,...
609,2.820124,0.556245,-1.065385,-0.337970,-0.421649,-1.300638,1.026255,2.631576,-1.579278,0.430486
610,-0.683007,0.932363,0.581936,-0.336106,-1.066304,0.368034,0.166257,-0.745328,-0.475466,2.209552
611,-0.687169,-0.501986,0.830812,-0.337978,-0.529091,0.864590,-0.177742,-0.711506,-0.434592,-0.661109
612,-0.658788,0.466996,0.534531,-0.337954,-1.123817,0.147187,0.553256,-0.746385,-0.298583,2.163101


In [15]:
similarity = pd.DataFrame(cosine_similarity(tracks_scaled[1:],tracks_scaled[0:1]),columns = ['cosine_similarity'])
similarity.describe()

Unnamed: 0,cosine_similarity
count,613.0
mean,-0.023146
std,0.322467
min,-0.663315
25%,-0.240503
50%,-0.055835
75%,0.152803
max,1.0


In [16]:
all_df = pd.concat([similarity,df],axis = 1)

In [22]:
print('Name: ', current_track['name'][0])
print('Artist: ', current_track['artist'][0])

Name:  DROP IT
Artist:  SLANDER | Basstrick


In [17]:
all_df.nlargest(10,['cosine_similarity'])

Unnamed: 0,cosine_similarity,acousticness,analysis_url,artist,artist_links,danceability,duration_ms,energy,id,instrumentalness,...,mode,name,release_date,popularity,speechiness,tempo,time_signature,track_href,uri,valence
384,1.0,0.0224,https://api.spotify.com/v1/audio-analysis/5sys...,SLANDER | Basstrick,https://open.spotify.com/artist/20DZAfCuP1TKZl...,0.813,191250,0.966,5sysgWS6HwrfhHDnqMh27l,0.739,...,1,DROP IT,2017-05-19,35,0.154,128.006,4,https://api.spotify.com/v1/tracks/5sysgWS6Hwrf...,spotify:track:5sysgWS6HwrfhHDnqMh27l,0.381
264,0.95397,0.00632,https://api.spotify.com/v1/audio-analysis/1KN7...,Rawtek | Schade,https://open.spotify.com/artist/6LiWlIUtgDUn83...,0.707,211200,0.96,1KN7IN8tKAQy6llwF7PEOo,0.692,...,0,My Vibe,2019-11-08,32,0.186,149.954,4,https://api.spotify.com/v1/tracks/1KN7IN8tKAQy...,spotify:track:1KN7IN8tKAQy6llwF7PEOo,0.421
395,0.950902,0.00419,https://api.spotify.com/v1/audio-analysis/6Ulv...,Valentino Khan,https://open.spotify.com/artist/0deIjoDjl9g9Zp...,0.792,184680,0.906,6UlvONcL4jXM0pC0CPA48S,0.586,...,1,Deep Down Low,2015-03-17,42,0.0592,126.016,4,https://api.spotify.com/v1/tracks/6UlvONcL4jXM...,spotify:track:6UlvONcL4jXM0pC0CPA48S,0.471
374,0.936478,0.00093,https://api.spotify.com/v1/audio-analysis/5IAN...,Mercer,https://open.spotify.com/artist/7aSsnDTH11xS2y...,0.732,193899,0.96,5IAN1gv4gFSirldHSMYSJq,0.881,...,0,Studio 54,2018-10-04,21,0.0686,120.008,4,https://api.spotify.com/v1/tracks/5IAN1gv4gFSi...,spotify:track:5IAN1gv4gFSirldHSMYSJq,0.557
585,0.919582,0.0262,https://api.spotify.com/v1/audio-analysis/7wC9...,Sikdope,https://open.spotify.com/artist/3EXfNuPuR3OFEd...,0.86,174375,0.956,7wC9cG5EgruIiQe6Xt6SFm,0.722,...,1,Snakes,2016-04-18,52,0.0727,128.013,4,https://api.spotify.com/v1/tracks/7wC9cG5EgruI...,spotify:track:7wC9cG5EgruIiQe6Xt6SFm,0.0794
180,0.916356,0.0142,https://api.spotify.com/v1/audio-analysis/67Vl...,Kayzo,https://open.spotify.com/artist/72iCiKwu6nu6Qq...,0.587,233600,0.966,67VlPt3to7DT4F2rFLWhw6,0.843,...,0,Say It,2020-09-25,19,0.0714,149.995,4,https://api.spotify.com/v1/tracks/67VlPt3to7DT...,spotify:track:67VlPt3to7DT4F2rFLWhw6,0.379
133,0.89007,0.00405,https://api.spotify.com/v1/audio-analysis/58qe...,Gerald Le Funk,https://open.spotify.com/artist/5oFnSQlO6c7Zzm...,0.741,268404,0.914,58qebZyV7A4Gka3vw51EUW,0.792,...,1,Legacy,2019-05-24,30,0.0808,149.998,4,https://api.spotify.com/v1/tracks/58qebZyV7A4G...,spotify:track:58qebZyV7A4Gka3vw51EUW,0.177
263,0.874986,0.00133,https://api.spotify.com/v1/audio-analysis/0yfg...,Borgeous,https://open.spotify.com/artist/4uiMn2g0pgTrhN...,0.678,203437,0.953,0yfgNfht7itLYslEOzmOwq,0.659,...,1,Night & Day,2020-02-28,24,0.0396,128.007,4,https://api.spotify.com/v1/tracks/0yfgNfht7itL...,spotify:track:0yfgNfht7itLYslEOzmOwq,0.564
410,0.872554,0.0266,https://api.spotify.com/v1/audio-analysis/70nT...,Riot Ten,https://open.spotify.com/artist/2Zxy5LwBatI5kw...,0.703,231305,0.972,70nTmaiN7Js36XCmURYUTF,0.775,...,1,Lost Your Mind,2020-02-25,25,0.153,125.975,4,https://api.spotify.com/v1/tracks/70nTmaiN7Js3...,spotify:track:70nTmaiN7Js36XCmURYUTF,0.178
113,0.852981,0.0111,https://api.spotify.com/v1/audio-analysis/0PJ4...,Steve Angello | BROHUG,https://open.spotify.com/artist/4FqPRilb0Ja0TK...,0.808,201120,0.752,0PJ4RVL5wCeHDO8wHpk3YG,0.927,...,0,KNAS - BROHUG Remix,2019-08-30,47,0.0759,125.005,4,https://api.spotify.com/v1/tracks/0PJ4RVL5wCeH...,spotify:track:0PJ4RVL5wCeHDO8wHpk3YG,0.469
