In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import MinMaxScaler,StandardScaler

In [2]:
df = pd.read_csv ('sussy.csv', encoding = 'utf-8') # utf-8 to include foreign characters
print(df.head())
print('There are', df.shape[0], 'songs and', df.shape[1], 'features')

   acousticness                                       analysis_url  \
0        0.0140  https://api.spotify.com/v1/audio-analysis/14gm...   
1        0.4590  https://api.spotify.com/v1/audio-analysis/1Fhb...   
2        0.7650  https://api.spotify.com/v1/audio-analysis/6D6H...   
3        0.0935  https://api.spotify.com/v1/audio-analysis/2hA6...   
4        0.1940  https://api.spotify.com/v1/audio-analysis/7DF8...   

                    artist                                       artist_links  \
0                    keshi  https://open.spotify.com/artist/3pc0bOVB5whxmD...   
1                    keshi  https://open.spotify.com/artist/3pc0bOVB5whxmD...   
2                    keshi  https://open.spotify.com/artist/3pc0bOVB5whxmD...   
3               Juice WRLD  https://open.spotify.com/artist/4MCBfE4596Uoi2...   
4  Juice WRLD | SUGA | BTS  https://open.spotify.com/artist/4MCBfE4596Uoi2...   

   danceability  duration_ms  energy                      id  \
0         0.622       193373

Spotify only provides the ends of the URLs in the id column so we must add the first part of the URL to get the link.

In [3]:
df['id'] = 'https://open.spotify.com/track/' + df['id'].astype(str)

In [4]:
print('There are', df.isnull().sum().sum(), 'NA values in the dataset')

There are 0 NA values in the dataset


In [5]:
df = df.drop(columns = 'type')

In [6]:
print(df.info())
df = df.convert_dtypes()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 669 entries, 0 to 668
Data columns (total 22 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   acousticness      669 non-null    float64
 1   analysis_url      669 non-null    object 
 2   artist            669 non-null    object 
 3   artist_links      669 non-null    object 
 4   danceability      669 non-null    float64
 5   duration_ms       669 non-null    int64  
 6   energy            669 non-null    float64
 7   id                669 non-null    object 
 8   instrumentalness  669 non-null    float64
 9   key               669 non-null    int64  
 10  liveness          669 non-null    float64
 11  loudness          669 non-null    float64
 12  mode              669 non-null    int64  
 13  name              669 non-null    object 
 14  release_date      669 non-null    object 
 15  popularity        669 non-null    int64  
 16  speechiness       669 non-null    float64
 1

Select all relevant features

In [7]:
dropfeatures = ['analysis_url','artist','artist_links','uri','track_href','release_date','id','name','time_signature','key','mode','duration_ms']
libraryFeatures = df.drop(columns = dropfeatures)

In [8]:
libraryFeatures

Unnamed: 0,acousticness,danceability,energy,instrumentalness,liveness,loudness,popularity,speechiness,tempo,valence
0,0.014,0.622,0.671,0.0,0.304,-6.426,70,0.026,84.996,0.572
1,0.459,0.711,0.747,0.0,0.0957,-6.335,76,0.0523,136.0,0.852
2,0.765,0.691,0.389,0.0,0.0941,-8.247,73,0.0367,104.952,0.306
3,0.0935,0.619,0.622,0.0,0.226,-5.874,74,0.0461,150.062,0.194
4,0.194,0.603,0.771,0.0,0.13,-6.336,84,0.0627,139.988,0.332
...,...,...,...,...,...,...,...,...,...,...
664,0.0239,0.853,0.693,0.0,0.11,-6.87,79,0.275,95.967,0.662
665,0.189,0.769,0.787,0.0,0.129,-3.909,80,0.367,126.77,0.836
666,0.106,0.78,0.575,0.0,0.129,-5.628,75,0.139,81.502,0.273
667,0.00215,0.886,0.628,0.000012,0.31,-5.949,64,0.131,119.906,0.178


In [9]:
current_track = pd.read_csv ('userTrack.csv')
current_track['id'] = 'https://open.spotify.com/track/' + current_track['id'].astype(str)

In [10]:
track_features = current_track.drop(columns = dropfeatures)
track_features = track_features.drop(columns = ['type'])

In [11]:
concation = pd.concat([track_features,libraryFeatures])

In [12]:
standardscaler = StandardScaler()
features_scaled = standardscaler.fit_transform(concation)
tracks_scaled = pd.DataFrame(features_scaled)

In [13]:
tracks_scaled.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
count,670.0,670.0,670.0,670.0,670.0,670.0,670.0,670.0,670.0,670.0
mean,-4.9877180000000005e-17,3.579226e-16,-1.58911e-16,-4.9877180000000005e-17,2.4358620000000002e-17,-5.965377e-17,2.175706e-16,1.362095e-16,-1.781328e-17,6.280217000000001e-17
std,1.000747,1.000747,1.000747,1.000747,1.000747,1.000747,1.000747,1.000747,1.000747,1.000747
min,-0.6974098,-2.759206,-2.892797,-0.331244,-1.122193,-3.629077,-2.690192,-0.8393046,-2.025117,-1.896616
25%,-0.6675203,-0.6942689,-0.6830506,-0.331244,-0.6510597,-0.6376093,-0.3333344,-0.6429069,-0.6702701,-0.7566731
50%,-0.4670096,0.004657449,0.05551703,-0.3312099,-0.4077654,0.1389436,0.2777027,-0.4148322,-0.06274328,-0.07832425
75%,0.2420871,0.7736356,0.8715152,-0.3240623,0.4111821,0.7133611,0.7141578,0.237187,0.7569451,0.7347884
max,4.457121,2.112581,1.580302,4.429228,4.499546,2.352004,1.499777,3.982997,2.614603,2.367808


In [14]:
tracks_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,-0.689502,-0.702229,-0.004045,-0.330755,-0.690095,-0.123230,1.368840,-0.464460,1.508030,-0.553961
1,-0.621102,-0.014448,-0.355460,-0.331244,0.675667,-0.351328,0.364994,-0.821354,-1.561149,0.524149
2,1.806191,0.552335,0.097210,-0.331244,-0.651856,-0.309139,0.626867,-0.543652,0.258898,1.792514
3,3.475295,0.424968,-2.035106,-0.331244,-0.662053,-1.195568,0.495930,-0.708373,-0.849031,-0.680798
4,-0.187462,-0.033553,-0.647313,-0.331244,0.178563,-0.095413,0.539576,-0.609118,0.760692,-1.188144
...,...,...,...,...,...,...,...,...,...,...
665,-0.567101,1.456641,-0.224424,-0.331244,-0.560721,-0.557172,0.757803,1.807840,-1.169655,0.931838
666,0.333451,0.921700,0.335458,-0.331244,-0.439631,0.815588,0.801449,2.779270,-0.070469,1.720036
667,-0.119279,0.991752,-0.927254,-0.331244,-0.439631,0.018636,0.583221,0.371814,-1.685830,-0.830284
668,-0.685739,1.666797,-0.611576,-0.331185,0.713906,-0.130184,0.103121,0.287342,-0.315407,-1.260622


In [15]:
similarity = pd.DataFrame(cosine_similarity(tracks_scaled[1:],tracks_scaled[0:1]),columns = ['cosine_similarity'])
similarity.describe()

Unnamed: 0,cosine_similarity
count,669.0
mean,0.036603
std,0.3393
min,-0.689458
25%,-0.219765
50%,-0.011458
75%,0.299926
max,1.0


In [16]:
all_df = pd.concat([similarity,df],axis = 1)

In [17]:
all_df = all_df[round(all_df['cosine_similarity'],2) != 1] 
all_df = all_df[['cosine_similarity','name','artist','release_date','id']]
all_df = all_df.nlargest(3,['cosine_similarity'])

In [18]:
all_df

Unnamed: 0,cosine_similarity,name,artist,release_date,id
444,0.931096,STAY (with Justin Bieber),The Kid LAROI | Justin Bieber,2021-07-09,https://open.spotify.com/track/5HCyWlXZPP0y6Gq...
228,0.92241,Love On The Brain,Rihanna,2016-01-28,https://open.spotify.com/track/5oO3drDxtziYU2H...
49,0.915703,Secrets,OneRepublic,2009-01-01,https://open.spotify.com/track/1NhPKVLsHhFUHIO...


First line is included incase the song the person recommended is also a song in my library. It will have a cosine similarity of 1 if it is in my library.

In [19]:
print('Printing songs similar to...')
print('Name: ', current_track['name'][0])
print('Artist(s): ', current_track['artist'][0])
print('Release Date:', current_track['release_date'][0])
print('Link:', current_track['id'][0])
print('___________________________________')
for index, row  in all_df.iterrows():
    print('Cosine Score:', round(row['cosine_similarity'],2)) 
    print('Name:', row['name'])
    print('Artist(s):', row['artist'])
    print('Release Date:', row['release_date'])
    print('Link:', row['id'])
    print('\n')

Printing songs similar to...
Name:  Blinding Lights
Artist(s):  The Weeknd
Release Date: 2020-03-20
Link: https://open.spotify.com/track/0VjIjW4GlUZAMYd2vXMi3b
___________________________________
Cosine Score: 0.93
Name: STAY (with Justin Bieber)
Artist(s): The Kid LAROI | Justin Bieber
Release Date: 2021-07-09
Link: https://open.spotify.com/track/5HCyWlXZPP0y6Gqq8TgA20


Cosine Score: 0.92
Name: Love On The Brain
Artist(s): Rihanna
Release Date: 2016-01-28
Link: https://open.spotify.com/track/5oO3drDxtziYU2H1X23ZIp


Cosine Score: 0.92
Name: Secrets
Artist(s): OneRepublic
Release Date: 2009-01-01
Link: https://open.spotify.com/track/1NhPKVLsHhFUHIOZ32QnS2


