In [15]:
from pathlib import Path
import numpy as np
import pandas as pd
from songs_analyzer.config import get_root_path
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [2]:
songs_data_path = get_root_path() / "data/raw/song_feature_data.csv"

songs_df = pd.read_csv(songs_data_path)

In [3]:
songs_df.head()

Unnamed: 0,uri,name,artist,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,spotify:track:7fgC7BPn2SzVno2d7ooLGI,Rufus Wainwright Spotify Pride Intro,Various Artists,0,0.0,0.629,10,-8.432,0,0.0,0.348,0.0,0.383,0.0,0.0,13750,0
1,spotify:track:3l6EkMrhwXPSVEuTDksWB8,Cigarettes And Chocolate Milk,Rufus Wainwright,44,0.328,0.357,4,-9.939,1,0.0274,0.791,0.0,0.0832,0.161,97.816,280360,4
2,spotify:track:2PwYIEV1H34mbQBvbnnmLx,Over the Rainbow,Judy Garland,0,0.231,0.0426,8,-21.141,1,0.0426,0.901,0.000136,0.153,0.199,77.74,166467,4
3,spotify:track:6bFrlORduDtYQ9BlPenV3o,My Baby Just Cares For Me,Nina Simone,0,0.76,0.196,11,-17.872,0,0.111,0.812,2e-06,0.0845,0.418,118.348,216689,4
4,spotify:track:3uxg8Hl1NFVySLDL0euMbu,Cre Spoda,Klaus Nomi,16,0.234,0.858,8,-6.145,0,0.071,0.337,0.459,0.228,0.2,127.745,183947,4


In [11]:
scaler = StandardScaler()

numerical_features = [
    "popularity", "danceability", "energy", "loudness", "mode",
    "speechiness", "acousticness", "instrumentalness", "liveness", "valence",
    "tempo","duration_ms", "time_signature"
]

numeric_dataset = scaler.fit_transform(songs_df[numerical_features])

In [12]:
songs_df[numerical_features] = numeric_dataset

In [14]:
songs_df.head()

Unnamed: 0,uri,name,artist,popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,spotify:track:7fgC7BPn2SzVno2d7ooLGI,Rufus Wainwright Spotify Pride Intro,Various Artists,-1.308319,-2.944535,0.374847,10,0.372628,-1.445603,-0.614034,-0.155054,-0.540209,1.140318,-1.686256,-3.635329,-1.000919,-6.160424
1,spotify:track:3l6EkMrhwXPSVEuTDksWB8,Cigarettes And Chocolate Milk,Rufus Wainwright,0.320084,-1.183339,-0.570732,4,0.16722,0.691753,-0.497709,1.055987,-0.540209,-0.653236,-1.056495,-0.587299,0.120397,0.268232
2,spotify:track:2PwYIEV1H34mbQBvbnnmLx,Over the Rainbow,Judy Garland,-1.308319,-1.704181,-1.663711,8,-1.359644,0.691753,-0.433177,1.356697,-0.539799,-0.235657,-0.907856,-1.212884,-0.358618,0.268232
3,spotify:track:6bFrlORduDtYQ9BlPenV3o,My Baby Just Cares For Me,Nina Simone,-1.308319,1.136285,-1.130432,11,-0.91407,-1.445603,-0.142788,1.113396,-0.540204,-0.645458,-0.051224,0.052496,-0.147393,0.268232
4,spotify:track:3uxg8Hl1NFVySLDL0euMbu,Cre Spoda,Klaus Nomi,-0.716172,-1.688072,1.170942,8,0.684353,-1.445603,-0.312606,-0.185125,0.846488,0.21303,-0.903944,0.345315,-0.2851,0.268232


In [16]:
model = NearestNeighbors(n_neighbors=10)
model.fit(songs_df[numerical_features])

NearestNeighbors(n_neighbors=10)

In [28]:
distance, index = model.kneighbors(np.matrix(songs_df[numerical_features].loc[0].to_list()), n_neighbors=10)



In [48]:
neighbors_songs = songs_df[["uri", "name", "artist"]].loc[index[0]]

In [49]:
neighbors_songs

Unnamed: 0,uri,name,artist
0,spotify:track:7fgC7BPn2SzVno2d7ooLGI,Rufus Wainwright Spotify Pride Intro,Various Artists
1522,spotify:track:5jBBOzlULSLS3J1WL646tB,Conchita Wurst Spotify Pride Intro,Various Artists
58818,spotify:track:3Om5CbQrsMF1UwC7dFVGA1,Kathy Griffin: Special Fun - #PressPlayForPride,Various Artists
3938,spotify:track:4ENuIjI1dQOQGYkgIwQgSX,Little Mix Spotify Pride Intro,Various Artists
11,spotify:track:71UlvGyFNbcQMTTXbCcSrr,Rufus Wainwright on Pride,Various Artists
66995,spotify:track:28loehMDri9eBOFsF0RNCK,Here's Some Sweet Music,Various Artists
58816,spotify:track:3oEGENBsi1DGRysE2ENu3S,Kathy Griffin: Great Track - #PressPlayForPride,Various Artists
66992,spotify:track:5T30zoNNgnVZrkCsHNtvD7,Seth Rogen - Classic Soul Intro,Various Artists
56581,spotify:track:51WUuwbqSq4MkG87wVg1eu,Rubber Duckie,Ernie
47704,spotify:track:5BaAl2ThitfLHMthC8nin4,Girls Rock on Mitski,Various Artists
