In [61]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


from scipy import sparse

from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity, cosine_distances
from sklearn.preprocessing import StandardScaler

In [62]:
df = pd.read_csv('datasets/checkpoint2.csv')
df.drop(columns=['Unnamed: 0'], inplace=True)

In [63]:
df.head(2)

Unnamed: 0,track_name,track_id,track_pop,artist_name,featured_artist,album_name,artist_id,danceability,energy,key,loudness,mode,speechiness,acousticness,intrumentalness,valence,tempo,track_lyrics,playlist_name
0,Codeine Dreaming (feat. Lil Wayne),4DTpngLjoHj5gFxEZFeD3J,66,Kodak Black,Lil Wayne,Project Baby 2: All Grown Up (Deluxe Edition),46SHBwWsqBkxI7EeeBEQG7,0.736,0.604,1,-7.357,0,0.222,0.0652,6.4e-05,0.574,155.007,Take that all the way up. I'm about to take of...,Best Hip-Hop Playlist
1,Bank Account,2fQrGHiQOvpL9UgPvtYy6G,18,21 Savage,,Issa Album,1URnnhqYAYcrqrcwql10ft,0.884,0.347,8,-8.227,0,0.35,0.015,7e-06,0.376,75.016,I tear down the mall with the bitch (for real)...,Best Hip-Hop Playlist


In [64]:
df.shape

(4037, 19)

In [100]:
df = df.drop_duplicates(subset = ['track_name'])

In [101]:
df.shape

(3351, 19)

### Drop unnecessary columns

In [66]:
data = df.drop(columns=[
    'track_id',
    'track_pop',
    'artist_name', 
    'featured_artist', 
    'album_name',
    'artist_id',
    'intrumentalness',
    'track_lyrics',
    'playlist_name',
])

In [67]:
data.head(2)

Unnamed: 0,track_name,danceability,energy,key,loudness,mode,speechiness,acousticness,valence,tempo
0,Codeine Dreaming (feat. Lil Wayne),0.736,0.604,1,-7.357,0,0.222,0.0652,0.574,155.007
1,Bank Account,0.884,0.347,8,-8.227,0,0.35,0.015,0.376,75.016


### Need to format the index as the track name for the cosine matrix to work correctly.

In [68]:
data.set_index('track_name', inplace=True)
#data.set_index('artist_name', inplace=True)

In [None]:
data.head(2)

### Standard scale the audio features before calculating the pairwise distances.

In [None]:
ss = StandardScaler()
scaled = ss.fit_transform(data)
scaled = pd.DataFrame(scaled)

In [None]:
scaled.head(2)

In [None]:
similairity_matrix = pairwise_distances(scaled, metric='cosine')


In [None]:
similairity_matrix

### Convert similarity matrix back to a Dataframe

In [None]:
recommender_df = pd.DataFrame(
    similairity_matrix,
    index=data.index,
    columns=data.index
)

recommender_df.head(2)

In [None]:
recommender_df.shape

In [None]:
recommender_df['Codeine Dreaming (feat. Lil Wayne)'].sort_values(ascending=False).head(10)

In [None]:
recommender_df['Bank Account'].sort_values(ascending=False).head(10)

In [None]:
recommender_df.loc['Bank Account', 'Codeine Dreaming (feat. Lil Wayne)']

### Questionaire Development

In [None]:
data['danceability'].describe()

In [None]:
data['acousticness'].describe()

Going to leave insturmentalness out becuase of the strange ranges.

In [None]:
data['intrumentalness'].describe()

In [None]:
data['acousticness'].sort_values(ascending=False)

In [None]:
data['acousticness'].describe()

In [None]:
data['tempo'].describe()

In [None]:
data['energy'].describe()

data['key'].describe()

data['loudness'].describe()

### Key Characteristics associated by mood 
https://www.wmich.edu/mus-theo/courses/keys.html

#### Pitch classes
0:  C  <br /> 
1:  C#, Db <br /> 
2:  D <br /> 
3:  D#, Eb <br /> 
4:  E <br /> 
5:  F <br /> 
6:  F#, Gb <br /> 
7:  G <br /> 
8:  G#, Ab <br /> 
9:  A <br /> 
10:  A#, Bb <br /> 
11:  B

In [None]:
data.shape

### The App will ask the user for input on key characteristics to fit their mood and audio feature levels to include. I will add the values as a new instance at the end of the dataframe so it will represent a track. From there, I can get the top 5 recommended songs.

In [69]:
#danceability	energy	key	loudness	mode	speechiness	acousticness	intrumentalness	valence	tempo
data.loc[len(df.index)+1] = [.8, .2, 0, -10, 1, .4, .2, .8, 60]

In [70]:
data.shape

(4038, 9)

In [None]:
data.tail(3)

In [13]:
# def scale_songs(data):
#     ss = StandardScaler()
#     scaled = ss.fit_transform(data)
#     scaled = pd.DataFrame(scaled)

#     similairity_matrix = pairwise_distances(scaled, metric='cosine')

#     recommender_df = pd.DataFrame(
#         similairity_matrix,
#         index=data.index,
#         columns=data.index
#     )
#     return recommender_df

In [14]:
scale_songs(data)

track_name,Codeine Dreaming (feat. Lil Wayne),Bank Account,PICK IT UP (feat. A$AP Rocky),Ric Flair Drip (with Metro Boomin),Roll in Peace (feat. XXXTENTACION),God's Plan,XO Tour Llif3,Mask Off,Rolex,Look At Me!,...,Run It! (feat. Juelz Santana),GDFR (feat. Sage the Gemini & Lookas),Beautiful,X Gon' Give It To Ya,El Chapo,All The Way Up (feat. Infared),Taste (feat. Offset),Everybody (Backstreet's Back) - Radio Edit,Black Betty,4038
track_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Codeine Dreaming (feat. Lil Wayne),0.000000,0.913147,0.739443,1.387041e+00,7.164531e-01,1.462009,0.777204,0.592449,0.912290,0.806652,...,0.622477,1.095188,0.575593,1.167715,0.774091,0.767305,0.995447,1.213987,1.149935,9.356407e-01
Bank Account,0.913147,0.000000,0.758586,2.895565e-01,3.922386e-01,0.382628,0.834757,0.603951,1.618733,1.185679,...,0.373827,1.699566,1.372508,0.793877,1.024890,1.171375,0.708839,0.964992,1.075137,3.783596e-01
PICK IT UP (feat. A$AP Rocky),0.739443,0.758586,0.000000,5.179203e-01,9.901108e-01,0.577799,1.271981,0.324178,0.978794,0.596276,...,0.425747,1.181759,1.208861,1.570619,1.528462,1.547824,0.268283,1.536863,1.710099,6.590834e-01
Ric Flair Drip (with Metro Boomin),1.387041,0.289556,0.517920,1.110223e-16,6.531295e-01,0.189838,1.055743,0.564839,1.509078,1.021270,...,0.682206,1.527370,1.750654,1.130320,1.450131,1.442127,0.622108,1.202821,1.338177,4.712461e-01
Roll in Peace (feat. XXXTENTACION),0.716453,0.392239,0.990111,6.531295e-01,1.110223e-16,0.942367,0.187399,0.703206,1.554454,1.326186,...,0.712425,1.383067,1.316806,0.756855,0.809322,0.499075,1.326464,0.766045,0.802436,1.096402e+00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
All The Way Up (feat. Infared),0.767305,1.171375,1.547824,1.442127e+00,4.990755e-01,1.614060,0.261799,1.001292,1.117439,1.062346,...,1.345706,0.790903,0.919289,0.719510,0.486184,0.000000,1.671223,0.778476,0.569633,1.540322e+00
Taste (feat. Offset),0.995447,0.708839,0.268283,6.221080e-01,1.326464e+00,0.501220,1.535573,0.592762,0.769824,0.545824,...,0.371979,1.063893,0.902700,1.159174,1.207218,1.671223,0.000000,1.275994,1.661400,5.145645e-01
Everybody (Backstreet's Back) - Radio Edit,1.213987,0.964992,1.536863,1.202821e+00,7.660448e-01,1.118522,0.516085,1.720456,0.785874,1.639932,...,1.025085,0.692816,0.575667,0.100729,0.378204,0.778476,1.275994,0.000000,0.420942,1.491786e+00
Black Betty,1.149935,1.075137,1.710099,1.338177e+00,8.024364e-01,1.088875,0.556643,1.575493,1.260671,1.716041,...,1.361457,0.923113,0.926014,0.536079,0.774954,0.569633,1.661400,0.420942,0.000000,1.373927e+00


In [15]:
recommender_df.loc[4038].sort_values(ascending=False).head(10)

NameError: name 'recommender_df' is not defined

NameError: name 'recommender_df' is not defined

In [None]:
df[recommender_df.loc[4038][0]]

In [71]:
ss = StandardScaler()
scaled = ss.fit_transform(data)
scaled = pd.DataFrame(scaled)

similairity_matrix = pairwise_distances(scaled, metric='cosine')

recommender_df = pd.DataFrame(
    similairity_matrix,
    index=data.index,
    columns=data.index
)

In [17]:
recommender_df.loc[4038].sort_values(ascending=False).head(5)

track_name
2019                              1.843000
Vibes                             1.830097
Sex on Fire                       1.815258
Looking                           1.807481
HELL BENT (with The Kid LAROI)    1.806494
Name: 4038, dtype: float64

In [72]:
recommender_df[len(data)].sort_values(ascending=False)

track_name
2019                              1.843000e+00
Vibes                             1.830097e+00
Sex on Fire                       1.815258e+00
Looking                           1.807481e+00
HELL BENT (with The Kid LAROI)    1.806494e+00
                                      ...     
Overdrive                         1.266563e-01
Deadly Combination                1.199891e-01
LOT OF ME                         8.671492e-02
Through Da Storm                  7.833390e-02
4038                              2.220446e-16
Name: 4038, Length: 4038, dtype: float64

In [18]:
rec_songs = recommender_df[len(data)].sort_values(ascending=False)

In [85]:
rec_songs.index[0]

'2019'

In [37]:
rec_songs.index[:5]

Index(['2019', 'Vibes', 'Sex on Fire', 'Looking',
       'HELL BENT (with The Kid LAROI)'],
      dtype='object', name='track_name')

### Testing how to return a data frame that includes the artist name within the app

In [92]:

for i in range(0,5):
    print(df[df['track_name'] == rec_songs.index[i]][['track_name', 'artist_name']])

    track_name artist_name
821       2019     Bazanji
     track_name   artist_name
1778      Vibes  Trippie Redd
       track_name    artist_name
3490  Sex on Fire  Kings of Leon
    track_name artist_name
334    Looking     On Froy
                          track_name      artist_name
1512  HELL BENT (with The Kid LAROI)  TOKYO’S REVENGE


In [95]:
d2 = pd.DataFrame({'track_name': [rec_songs.index[0],
                                 rec_songs.index[1],
                                 rec_songs.index[2],
                                 rec_songs.index[3],
                                 rec_songs.index[4]],
                  'arist_name': [df[df['track_name'] == rec_songs.index[0]][['artist_name']].iloc[0][0],
                                df[df['track_name'] == rec_songs.index[1]][['artist_name']].iloc[0][0],
                                 df[df['track_name'] == rec_songs.index[2]][['artist_name']].iloc[0][0],
                                df[df['track_name'] == rec_songs.index[3]][['artist_name']].iloc[0][0],
                                df[df['track_name'] == rec_songs.index[4]][['artist_name']].iloc[0][0]]})

In [97]:
d2

Unnamed: 0,track_name,arist_name
0,2019,Bazanji
1,Vibes,Trippie Redd
2,Sex on Fire,Kings of Leon
3,Looking,On Froy
4,HELL BENT (with The Kid LAROI),TOKYO’S REVENGE


In [55]:
df[df['track_name'] == rec_songs.index[i]][['track_name', 'artist_name']].iloc[0]

track_name     HELL BENT (with The Kid LAROI)
artist_name                   TOKYO’S REVENGE
Name: 1512, dtype: object

In [91]:
df[df['track_name'] == rec_songs.index[0]][['artist_name']].iloc[0][0]

'Bazanji'

In [None]:
#custom_ss = ss.fit_transform(recommender_df.loc[4038])
#custom_ss

In [None]:
df[df['track_name']== "Rebirth (2016)"]