In [1]:
import pandas as pd
import numpy as np
import statistics
import seaborn as sns

import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings("ignore")

### Functions to predict the liked/disliked songs from Janna's library

**Note: Will recommend if song has probability of being liked at 0.8 or higher**


- When predicting songs for specific genres, call only songs from the specific genres
- When predicting songs for happy, valence > 0.7 . For sad, valence < 0.5 
- When predicting songs for energetic, energy > 0.7 + danceability > 0.7

In [2]:
def choose_mood(): 
    
    mood = ''
    
    try: 
        x = int(input('Which mood would you like this playlist to have? \n'
                             '0: No preference \n'
                             '1: Happy \n'
                             '2: Sad \n'
                             '3: Upbeat \n'))
    except (ValueError, TypeError):
        x = 0
    
    if x==1:
        mood = 'happy'
    elif x==2: 
        mood = 'sad'
    elif x==3:
        mood='upbeat'
    
    return mood

In [3]:
choose_mood()

Which mood would you like this playlist to have? 
0: No preference 
1: Happy 
2: Sad 
3: Upbeat 



''

In [4]:
def choose_genres():
    x = 10
    genres = []
   
    while x != 0:
        try:
            x = int(input('Which genres would you like included in this playlist? \n'
                                        '0: No preference/No more \n'
                                        '1: R&B \n'
                                        '2: pop \n'
                                        '3: rap \n'
                                        '4: hip hop \n'
                                        '5: funk \n'
                                        '6: rock \n'))
        except (ValueError, TypeError):
            x = 0
            
        if (x==1) and ('r&b' not in genres):
            genres.append('r&b')
        elif (x==2) and ('pop' not in genres):
            genres.append('pop')
        elif (x==3) and ('rap' not in genres):
            genres.append('rap')
        elif (x==4) and ('hip hop' not in genres):
            genres.append('hip hop')
        elif (x==5) and ('funk' not in genres):
            genres.append('funk')
        elif (x==6) and ('rock' not in genres):
            genres.append('rock')
        
    return genres   

In [5]:
choose_genres()

Which genres would you like included in this playlist? 
0: No preference/No more 
1: R&B 
2: pop 
3: rap 
4: hip hop 
5: funk 
6: rock 



[]

In [9]:
mood = choose_mood()

Which mood would you like this playlist to have? 
0: No preference 
1: Happy 
2: Sad 
3: Upbeat 
1


In [10]:
genres = choose_genres()

Which genres would you like included in this playlist? 
0: No preference/No more 
1: R&B 
2: pop 
3: rap 
4: hip hop 
5: funk 
6: rock 
1
Which genres would you like included in this playlist? 
0: No preference/No more 
1: R&B 
2: pop 
3: rap 
4: hip hop 
5: funk 
6: rock 
2
Which genres would you like included in this playlist? 
0: No preference/No more 
1: R&B 
2: pop 
3: rap 
4: hip hop 
5: funk 
6: rock 
6
Which genres would you like included in this playlist? 
0: No preference/No more 
1: R&B 
2: pop 
3: rap 
4: hip hop 
5: funk 
6: rock 
0


In [11]:
genres

['r&b', 'pop', 'rock']

**Note: Input dataset (param 'rdf') already has probability values for liked/disliked included as columns**

In [41]:
x = rec_df.loc[rec_df['r&b']==1]
print(x.shape)

(404, 27)


In [42]:
y = rec_df.loc[rec_df['pop']==1]
print(y.shape)

(578, 27)


In [45]:
print(x.index)
print(y.index)

Int64Index([   0,    2,    5,   12,   13,   15,   16,   17,   20,   21,
            ...
            1044, 1048, 1056, 1058, 1071, 1075, 1076, 1091, 1096, 1098],
           dtype='int64', length=404)
Int64Index([   0,    1,    2,    4,   12,   13,   18,   25,   96,   97,
            ...
            1079, 1082, 1084, 1085, 1089, 1091, 1093, 1094, 1095, 1098],
           dtype='int64', length=578)


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,r&b,pop,rap,hip hop,funk,rock,duration_min,rf_pred_class,rf_pred_disliked,rf_pred_liked
0,0.350,0.120,3.0,-11.104,0.0,0.0393,0.9820,0.000289,0.1030,0.121,...,1.0,1.0,0.0,1.0,0.0,0.0,3.370000,0.0,0.59,0.41
2,0.538,0.640,7.0,-7.957,1.0,0.2810,0.0727,0.000027,0.8410,0.445,...,1.0,1.0,1.0,1.0,0.0,0.0,4.086767,0.0,0.64,0.36
5,0.839,0.475,0.0,-9.338,0.0,0.2470,0.4140,0.000000,0.2160,0.909,...,1.0,0.0,0.0,0.0,0.0,0.0,3.171200,1.0,0.45,0.55
12,0.471,0.411,0.0,-9.500,1.0,0.2950,0.8100,0.000000,0.1350,0.363,...,1.0,1.0,0.0,0.0,0.0,0.0,3.520450,0.0,0.58,0.42
13,0.866,0.679,6.0,-6.740,0.0,0.3880,0.2140,0.000005,0.1010,0.272,...,1.0,1.0,0.0,1.0,0.0,0.0,4.468117,0.0,0.73,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1085,0.591,0.454,7.0,-8.851,1.0,0.0317,0.6360,0.000000,0.0793,0.198,...,0.0,1.0,0.0,0.0,0.0,0.0,6.546667,0.0,0.57,0.43
1089,0.651,0.439,0.0,-8.140,1.0,0.0238,0.7010,0.000000,0.0875,0.722,...,0.0,1.0,0.0,0.0,0.0,0.0,2.717333,0.0,0.57,0.43
1093,0.689,0.368,0.0,-7.340,1.0,0.0263,0.5880,0.000000,0.1010,0.738,...,0.0,1.0,0.0,0.0,0.0,0.0,2.240450,1.0,0.45,0.55
1094,0.516,0.154,4.0,-22.100,1.0,0.0383,0.9870,0.038300,0.1150,0.451,...,0.0,1.0,0.0,0.0,0.0,1.0,3.993917,1.0,0.25,0.75


In [54]:
temp = pd.DataFrame()

for gen in genres:
    if temp.empty:
        print('empty')
        print(gen)
        temp = rec_df.loc[rec_df[gen] == 1]
        print(temp.shape)
    else:
        print(gen)
        temp2 = rec_df.loc[rec_df[gen] == 1]
        temp = temp.append(temp2[temp2.isin(temp) == False].dropna())
        print(temp.shape)

temp.head()

empty
r&b
(404, 27)
pop
(693, 27)
rock
(733, 27)


Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,r&b,pop,rap,hip hop,funk,rock,duration_min,rf_pred_class,rf_pred_disliked,rf_pred_liked
0,0.35,0.12,3.0,-11.104,0.0,0.0393,0.982,0.000289,0.103,0.121,...,1.0,1.0,0.0,1.0,0.0,0.0,3.37,0.0,0.59,0.41
2,0.538,0.64,7.0,-7.957,1.0,0.281,0.0727,2.7e-05,0.841,0.445,...,1.0,1.0,1.0,1.0,0.0,0.0,4.086767,0.0,0.64,0.36
5,0.839,0.475,0.0,-9.338,0.0,0.247,0.414,0.0,0.216,0.909,...,1.0,0.0,0.0,0.0,0.0,0.0,3.1712,1.0,0.45,0.55
12,0.471,0.411,0.0,-9.5,1.0,0.295,0.81,0.0,0.135,0.363,...,1.0,1.0,0.0,0.0,0.0,0.0,3.52045,0.0,0.58,0.42
13,0.866,0.679,6.0,-6.74,0.0,0.388,0.214,5e-06,0.101,0.272,...,1.0,1.0,0.0,1.0,0.0,0.0,4.468117,0.0,0.73,0.27


In [56]:
def generate_playlist(rdf, genres, mood):
    
    # To return a playlist of 10 songs
    new_rec_df = pd.DataFrame()
    
    # GENRE -- if song is of a genre included in selected genres, add to 'filtered' dataframe
    for gen in genres:
        if new_rec_df.empty:
            new_rec_df = rdf.loc[rdf[gen] == 1]
        else:
            temp = rdf.loc[rdf[gen] == 1]
            new_rec_df = new_rec_df.append(temp[temp.isin(new_rec_df) == False].dropna())
            
    new_rec_df = new_rec_df.reset_index(drop=True)
        
    # MOOD -- Filter out songs based on mood 
    if mood=='happy':
        new_rec_df = new_rec_df.loc[new_rec_df['valence'] > 0.5]
    elif mood=='sad':
        new_rec_df = new_rec_df.loc[new_rec_df['valence'] < 0.5]
    elif mood=='upbeat':
        new_rec_df = new_rec_df.loc[new_rec_df['energy'] > 0.6 and new_rec_df['danceability'] > 0.6]
        
    
    # order output by probabilities (desc.)
    new_rec_df = new_rec_df.sort_values(by=['rf_pred_liked'], ascending=False)
    
    # pick top 10 songs to output 
    final_df = new_rec_df.head(10)
    
    return final_df

**Import the final_rec_df created with columns of probability liked/disliked**

In [4]:
rec_df = pd.read_csv('../datasets/final_rec_df.csv')
rec_df = rec_df.drop(columns=['Unnamed: 0'])
rec_df.head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,r&b,pop,rap,hip hop,funk,rock,duration_min,rf_pred_class,rf_pred_disliked,rf_pred_liked
0,0.35,0.12,3.0,-11.104,0.0,0.0393,0.982,0.000289,0.103,0.121,...,1,1,0,1,0,0,3.37,0,0.59,0.41
1,0.747,0.458,6.0,-6.891,1.0,0.303,0.451,0.0,0.252,0.47,...,0,1,0,0,0,0,3.033333,0,0.55,0.45
2,0.538,0.64,7.0,-7.957,1.0,0.281,0.0727,2.7e-05,0.841,0.445,...,1,1,1,1,0,0,4.086767,0,0.64,0.36
3,0.829,0.797,5.0,-5.428,1.0,0.109,0.0617,0.0,0.112,0.549,...,0,0,0,0,0,0,2.852217,1,0.31,0.69
4,0.518,0.762,7.0,-3.907,1.0,0.0305,0.0851,0.0,0.148,0.271,...,0,1,0,0,0,1,4.7523,0,0.68,0.32


In [5]:
sum(rec_df['rf_pred_class'])

430

In [6]:
rec_df['rf_pred_class'].value_counts()

0    669
1    430
Name: rf_pred_class, dtype: int64

**Test the functions on the dataset:**

In [57]:
# With mood = 0 (no preference) and genres including funk and rock, this is the recommended playlist
p1 = generate_playlist(rec_df, genres, mood)

In [58]:
p1

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,...,r&b,pop,rap,hip hop,funk,rock,duration_min,rf_pred_class,rf_pred_disliked,rf_pred_liked
700,0.666,0.668,0.0,-7.961,1.0,0.0523,0.251,6e-06,0.499,0.765,...,0.0,0.0,0.0,0.0,0.0,1.0,5.375817,1.0,0.11,0.89
698,0.813,0.46,1.0,-9.78,1.0,0.0346,0.56,0.0105,0.0724,0.729,...,0.0,0.0,0.0,0.0,0.0,1.0,4.23145,1.0,0.12,0.88
432,0.715,0.647,5.0,-5.965,0.0,0.321,0.707,0.0,0.179,0.729,...,0.0,1.0,1.0,1.0,0.0,0.0,3.90325,1.0,0.13,0.87
442,0.746,0.345,1.0,-10.963,1.0,0.0875,0.183,0.00367,0.121,0.598,...,0.0,1.0,1.0,1.0,0.0,0.0,5.448883,1.0,0.16,0.84
531,0.841,0.644,9.0,-3.284,0.0,0.083,0.594,0.00402,0.142,0.881,...,0.0,1.0,0.0,1.0,0.0,0.0,4.260133,1.0,0.17,0.83
705,0.462,0.595,2.0,-9.363,1.0,0.0347,0.215,0.0,0.184,0.64,...,0.0,0.0,0.0,0.0,0.0,1.0,2.54355,1.0,0.18,0.82
438,0.574,0.791,9.0,-4.708,1.0,0.0341,0.421,0.00813,0.369,0.776,...,0.0,1.0,0.0,0.0,0.0,1.0,3.064883,1.0,0.18,0.82
155,0.681,0.333,9.0,-8.582,1.0,0.0974,0.588,0.0,0.086,0.564,...,1.0,1.0,0.0,1.0,0.0,0.0,3.79425,1.0,0.2,0.8
729,0.369,0.199,1.0,-16.954,1.0,0.0505,0.922,0.000157,0.392,0.507,...,0.0,0.0,0.0,0.0,1.0,1.0,1.69155,1.0,0.2,0.8
677,0.705,0.474,10.0,-13.081,1.0,0.0391,0.443,0.0553,0.338,0.756,...,0.0,1.0,0.0,0.0,0.0,1.0,3.885467,1.0,0.24,0.76


In [59]:
# All songs are at least within the r&b, rap or pop genres (or all three)

p1.loc[:, ['r&b', 'pop', 'rock']]

Unnamed: 0,r&b,pop,rock
700,0.0,0.0,1.0
698,0.0,0.0,1.0
432,0.0,1.0,0.0
442,0.0,1.0,0.0
531,0.0,1.0,0.0
705,0.0,0.0,1.0
438,0.0,1.0,1.0
155,1.0,1.0,0.0
729,0.0,0.0,1.0
677,0.0,1.0,1.0


In [20]:
# All songs are below 0.5 valence (mood of song), indicating sadness

p1.loc[:, ['valence']]

Unnamed: 0,valence
472,0.497
162,0.433
150,0.382
157,0.176
169,0.43
53,0.471
91,0.274
5,0.112
518,0.413
621,0.393


In [21]:
p1.loc[:, ['rf_pred_liked']]

Unnamed: 0,rf_pred_liked
472,0.91
162,0.9
150,0.87
157,0.83
169,0.79
53,0.79
91,0.78
5,0.78
518,0.76
621,0.75


In [22]:
p1.loc[:, ['uri']]

Unnamed: 0,uri
472,spotify:track:4L7jMAP8UcIe309yQmkdcO
162,spotify:track:1CLmFKW99S8eJrebO3GB04
150,spotify:track:3QXm6zLOV9DKgLxmwqsysO
157,spotify:track:5Bjjf2Vj3dCaGetKlXS4be
169,spotify:track:5GZ4znceWfpTLrBBpr83DW
53,spotify:track:1hz7SRTGUNAtIQ46qiNv2p
91,spotify:track:2t8yVaLvJ0RenpXUIAC52d
5,spotify:track:1OuN92HcVG6NVpWbeESNB3
518,spotify:track:1KFtR58Hn1nQ9fR0DRnC9n
621,spotify:track:3ujuDsiyBLAXAB6dtNwpGu
