In [1]:
from helper import *
!pip install plotly
%matplotlib inline



In [2]:
df = pd.read_csv('df_predictions.csv').drop(columns=['Unnamed: 0'])

In [3]:
## Removing all duplicate songs that might have been re-released every couple years

df.drop_duplicates(subset=['artists', 'name'], inplace = True)

### Rec Model - Eculidean Distance

In [4]:
'''
Creating variables for the column names I want for my vectors for cleaner code in the function later on. 
'''


song_title = df['name']
columns = ['popularity', 'duration_ms', 'explicit', 'danceability', 'energy',
           'key', 'loudness', 'mode', 'speechiness', 'acousticness',
           'instrumentalness', 'liveness', 'valence', 'tempo',
           'time_signature']

In [5]:
'''
This function takes in the user input of song_title and artist and returns the song features. 
This function is only to use in the recommendation function later on for comparison. 
The rec engine will compare the whole dataset for the chosen mood to just the one user input 
'''

def get_mean_vector(song_title, artist):
    
    song_vectors = []
    
    song_data = df[(df.name == song_title) & (df.artists == artist)][columns]
    song_vector = song_data.values
    song_vectors.append(song_vector)  
    
    song_matrix = np.array(list(song_vectors))
    return np.mean(song_matrix, axis=0)

In [15]:
'''
This is the function for the recommendation engine. It takes in the song name, artist and mood.
The song and artists are used to grab the vectors for that input and the mood is used to subset the main dataframe 
into just songs for that mood. Then with that subset, I find the most similar songs using the Eculidean distance. 

Since there is no real evaluation for this, I showed the top 10, mid 10 and last 10 recommendations to show how
far apart the distance values were. 
'''

def rec_songs(song_title, artist, mood):
    song_center = get_mean_vector(song_title, artist)
    
    matrix = df[df['mood']== mood].drop(columns=['name', 'id', 'id_artists','artists', 'release_year', 'release_datetime', 'mood'])
    
    df_mood = df[df['mood']== mood]
    
    distances = cdist(song_center, matrix, 'cosine')

    top_10_index = list(np.argsort(distances)[:, 0:10][0])
    top_10_dist = sorted(distances[0], reverse=True)[1:11]

    mid_10_index = list(np.argsort(distances)[:, 500:511][0])
    mid_10_dist = sorted(distances[0], reverse=True)[500:511]

    last_10_index = list(np.argsort(distances)[:, -10:][0])
    last_10_dist = sorted(distances[0], reverse=True)[-10:]

    rec_songs = df_mood.iloc[top_10_index][['name', 'artists', 'mood']]
    rec_songs['rec_tier'] = 'top'
    rec_songs['distance'] = list(top_10_dist)
    
    mid_ten_recs = df_mood.iloc[mid_10_index][['name', 'artists', 'mood']]
    mid_ten_recs['rec_tier'] = 'mid'
    mid_ten_recs['distance'] = list(mid_10_dist)

    last_ten_recs = df_mood.iloc[last_10_index][['name', 'artists', 'mood']]
    last_ten_recs['rec_tier'] = 'last'
    last_ten_recs['distance'] = list(last_10_dist)

    recommendations = pd.concat([rec_songs, mid_ten_recs, last_ten_recs], ignore_index = True, axis=0)
    
    return recommendations     

In [18]:
### Example:

rec_songs('We Belong Together', 'Mariah Carey', 'angry')

Unnamed: 0,name,artists,mood,rec_tier,distance
0,Talk (feat. Disclosure),"Khalid, Disclosure",angry,top,0.978645
1,Sağı Solu Kes,Gazapizm,angry,top,0.970298
2,Fendi,Rakhim,angry,top,0.963981
3,What A Shame,Leyla Blue,angry,top,0.963629
4,HAINE&SEX,Gazo,angry,top,0.963578
5,DOLLAZ ON MY HEAD (feat. Young Thug),"Gunna, Young Thug",angry,top,0.963185
6,Don't Wanna Fall In Love,KYLE,angry,top,0.962493
7,Come & Go (with Marshmello),"Juice WRLD, Marshmello",angry,top,0.962475
8,Drankin N Smokin,"Future, Lil Uzi Vert",angry,top,0.961602
9,Early,"Joy Crookes, Jafaris",angry,top,0.960916
