![](../images/bunker_studer.jpeg)
<br>
Photo: [*The Bunker*](https://www.thebunkerstudio.com/)

# Who Do You Sound Like?
### Notebook 4: Recommender System
#### Adam Zucker
---

## Contents
- **Section 1:** Package and data imports, preprocessing
- **Section 2:** Vector generation and dataframe creation
- **Section 3:** Recommender algorithm

---
---
### Section 1
#### Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy as sp

from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import pairwise_distances, cosine_distances, cosine_similarity, manhattan_distances

import librosa as lib
import librosa.display as libd

In [2]:
# Importing cleaned Spotify dataframe
df = pd.read_csv('../data_clean/spotify_kg_master.csv')
df.head()

Unnamed: 0,name,artists,tempo,key,mode,full_key,A minor,A# major,A# minor,B major,...,energy,instrumentalness,speechiness,acousticness,danceability,valence,popularity,liveness,year,id
0,Thunderstruck,['AC/DC'],133.5,4,1,E major,0,0,0,0,...,0.89,0.0117,0.0364,0.000147,0.502,0.259,83,0.217,1990,57bgtoPSgt236HzfBOd8kj
1,The Gift of Love,['Bette Midler'],157.5,8,1,G# major,0,0,0,0,...,0.467,0.0,0.0287,0.359,0.486,0.286,38,0.11,1990,7FUc1xVSKvABmVwI6kS5Y4
2,Thelma - Bonus Track,['Paul Simon'],94.0,5,1,F major,0,0,0,0,...,0.529,0.0845,0.077,0.872,0.71,0.882,29,0.093,1990,7pcEC5r1jVqWGRypo9D7f7
3,How I Need You,['Bad Boys Blue'],123.1,9,0,A minor,1,0,0,0,...,0.67,0.00347,0.0398,0.0724,0.652,0.963,44,0.119,1990,1yq8h4zD0IDT5X1YTaEwZh
4,Nunca Dudes De Mi,['El Golpe'],143.1,4,1,E major,0,0,0,0,...,0.49,0.0,0.0295,0.151,0.476,0.514,31,0.305,1990,5kNYkLFs3WFFgE6qhfWDEm


---

**BELOW:** Preprocessing and formatting the Spotify dataframe for conversion to a sparse matrix.

In [3]:
# Creating a copy of the dataframe to use for the recommender system
temp_df = df.copy()
print(temp_df.shape)
print(df.shape)

(56798, 41)
(56798, 41)


In [4]:
temp_df.columns

Index(['name', 'artists', 'tempo', 'key', 'mode', 'full_key', 'A minor',
       'A# major', 'A# minor', 'B major', 'B minor', 'C major', 'C minor',
       'C# major', 'C# minor', 'D major', 'D minor', 'D# major', 'D# minor',
       'E major', 'E minor', 'F major', 'F minor', 'F# major', 'F# minor',
       'G major', 'G minor', 'G# major', 'G# minor', 'loudness', 'duration_s',
       'energy', 'instrumentalness', 'speechiness', 'acousticness',
       'danceability', 'valence', 'popularity', 'liveness', 'year', 'id'],
      dtype='object')

In [5]:
# Combining 'name' and 'artists' features to use as indices
temp_df['name_and_artists'] = temp_df['name'] + ' - ' + temp_df['artists']

In [6]:
# Dropping features that won't be used for similarity comparisons against new, incoming songs
temp_df.drop(columns=['name', 'artists', 'key', 'mode', 'full_key', 'popularity', 'year', 'id'], inplace=True)

In [7]:
temp_df.columns

Index(['tempo', 'A minor', 'A# major', 'A# minor', 'B major', 'B minor',
       'C major', 'C minor', 'C# major', 'C# minor', 'D major', 'D minor',
       'D# major', 'D# minor', 'E major', 'E minor', 'F major', 'F minor',
       'F# major', 'F# minor', 'G major', 'G minor', 'G# major', 'G# minor',
       'loudness', 'duration_s', 'energy', 'instrumentalness', 'speechiness',
       'acousticness', 'danceability', 'valence', 'liveness',
       'name_and_artists'],
      dtype='object')

In [8]:
# Reordering columns
temp_df = temp_df[['name_and_artists', 'tempo', 'A minor', 'A# major', 'A# minor', 'B major', 'B minor', 'C major', 
                   'C minor', 'C# major', 'C# minor', 'D major', 'D minor', 'D# major', 'D# minor', 'E major', 'E minor', 
                   'F major', 'F minor', 'F# major', 'F# minor', 'G major', 'G minor', 'G# major', 'G# minor', 'loudness', 
                   'duration_s', 'energy', 'instrumentalness', 'speechiness', 'acousticness', 'danceability', 'valence', 
                   'liveness']]

In [9]:
# Setting the song titles and associated artists as the index in my new dataframe
temp_df.set_index(keys='name_and_artists', inplace=True)

In [10]:
temp_df.head()

Unnamed: 0_level_0,tempo,A minor,A# major,A# minor,B major,B minor,C major,C minor,C# major,C# minor,...,G# minor,loudness,duration_s,energy,instrumentalness,speechiness,acousticness,danceability,valence,liveness
name_and_artists,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Thunderstruck - ['AC/DC'],133.5,0,0,0,0,0,0,0,0,0,...,0,-5.175,292.9,0.89,0.0117,0.0364,0.000147,0.502,0.259,0.217
The Gift of Love - ['Bette Midler'],157.5,0,0,0,0,0,0,0,0,0,...,0,-10.765,241.6,0.467,0.0,0.0287,0.359,0.486,0.286,0.11
Thelma - Bonus Track - ['Paul Simon'],94.0,0,0,0,0,0,0,0,0,0,...,0,-13.367,254.7,0.529,0.0845,0.077,0.872,0.71,0.882,0.093
How I Need You - ['Bad Boys Blue'],123.1,1,0,0,0,0,0,0,0,0,...,0,-12.096,218.3,0.67,0.00347,0.0398,0.0724,0.652,0.963,0.119
Nunca Dudes De Mi - ['El Golpe'],143.1,0,0,0,0,0,0,0,0,0,...,0,-8.606,176.0,0.49,0.0,0.0295,0.151,0.476,0.514,0.305


---
---
### Section 2
#### Vector Generation

In [11]:
# Instantiating Standard Scaler and fitting/transforming my dataframe
ss = StandardScaler()

scaled_df = ss.fit_transform(temp_df)

In [12]:
rec = pairwise_distances(scaled_df, metric='cosine', n_jobs=12)

In [13]:
rec_df = pd.DataFrame(data=rec, columns=temp_df.index, index=temp_df.index)

In [14]:
rec_df.head()

name_and_artists,Thunderstruck - ['AC/DC'],The Gift of Love - ['Bette Midler'],Thelma - Bonus Track - ['Paul Simon'],How I Need You - ['Bad Boys Blue'],Nunca Dudes De Mi - ['El Golpe'],"You And Me Of The 10,000 Wars - ['Indigo Girls']",Don't Laugh (i Love You) - ['Ween'],Railroad Worksong - ['The Notting Hillbillies'],Turn! Turn! Turn! - ['The Byrds'],"Siéntate Ahí - [""Oscar D'León""]",...,"I WANNA SEE SOME ASS (feat. jetsonmade) - ['Jack Harlow', 'jetsonmade']",MODUS - ['Joji'],Psycho! - ['MASN'],"COOLER THAN A BITCH (feat. Roddy Ricch) - ['Gunna', 'Roddy Ricch']",What If I Told You That I Love You - ['Ali Gatie'],"24 (feat. Lil Baby) - ['Money Man', 'Lil Baby']","Bean (Kobe) [feat. Chief Keef] - ['Lil Uzi Vert', 'Chief Keef']","Pardon (feat. Lil Baby) - ['T.I.', 'Lil Baby']",Long Live - ['Florida Georgia Line'],"Billetes Azules (with J Balvin) - ['KEVVO', 'J Balvin']"
name_and_artists,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Thunderstruck - ['AC/DC'],0.0,1.008766,1.200292,1.081286,0.164255,1.124453,1.05934,1.053503,1.039069,1.010914,...,1.193503,1.037507,1.180821,1.108988,1.186523,1.097103,1.0484,1.078885,1.027577,1.10389
The Gift of Love - ['Bette Midler'],1.008766,0.0,1.071814,1.067803,0.987457,0.935461,1.236937,0.989657,1.030825,1.156865,...,1.191703,0.980848,1.029696,1.081106,0.990258,1.083273,1.216447,1.199216,1.11603,1.238159
Thelma - Bonus Track - ['Paul Simon'],1.200292,1.071814,0.0,0.956827,1.127582,0.936057,0.973261,0.92909,1.028249,0.984139,...,0.959309,1.138327,0.954939,0.973634,0.906705,1.065129,1.137969,1.009152,1.163957,1.04301
How I Need You - ['Bad Boys Blue'],1.081286,1.067803,0.956827,0.0,1.027095,1.076275,0.777341,0.984242,0.907169,0.963341,...,0.894979,1.08729,1.064838,0.982186,1.123387,1.006332,1.132095,1.011595,1.011847,0.994169
Nunca Dudes De Mi - ['El Golpe'],0.164255,0.987457,1.127582,1.027095,0.0,1.042554,1.010274,1.079293,1.044738,1.069386,...,1.041934,1.043607,1.021067,1.0608,1.098322,1.080464,1.094262,1.040128,1.003542,1.110944


---
---
### Section 3
#### Recommender Algorithm