In [81]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import accuracy_score,roc_curve, auc, confusion_matrix, classification_report
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances, cosine_similarity

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

%matplotlib inline

### Get all pertenint data

In [82]:
#get nyoy's predicted dataset
nyoy = pd.read_csv("data/NyoyVolante_predicted_genres.csv")
nyoy['predicted_genre'].value_counts()

Acoustic     55
Rock         18
R&B           6
Classical     5
Name: predicted_genre, dtype: int64

In [83]:
#get spotify's predicted dataset
spotify = pd.read_csv('data/DailyCharts_predicted_genres.csv')
spotify = spotify[spotify['predicted_genre_prob']>=0.5]
spotify['predicted_genre'].value_counts()

R&B          857
Rock         836
Reggae       639
Acoustic     514
Classical     27
Name: predicted_genre, dtype: int64

In [84]:
#get OPM playlist
opm = pd.read_csv('data/OPM_playlist_tracks.csv')
opm = opm[['track_id','artist_id','artist_name']]
opm

Unnamed: 0,track_id,artist_id,artist_name
0,3VcVQmMivq2ISfKUmIuvMK,6ZgBJRjJsK1iOJGKGZxEUv,South Border
1,3OpGUlDmRUXh0NkIYWoIlD,2XHTklRsNMOOQT56Zm3WS4,Parokya Ni Edgar
2,761frfZtZ8I2g70UsjFfBK,3XEHRbR9NMWfNzQ6TQMS9M,Caleb Santos
3,1yDiru08Q6omDOGkZMPnei,4DAcJXcjX0zlQAZAPAx4Zb,Ben&Ben
4,54b8IPvheXDpro2VR2rWAS,2EGJbNf0Hva2C2N6hvhMXD,Soulstice
...,...,...,...
1659,30ol9908qZSemXA6zvtwZs,"['1mcqfNCReSFxun2vIWvC28', '6vBFkiC0HtwqFIbFZz...","['KZ Tandingan', 'Epy Quizon']"
1660,6OPVhRnnN4WuMWMGchljnp,2MhhosdKm5i6IlL4rPdDUt,Jaywalkers
1661,0K4lDbom7qAebFYnlhORJT,007MmXwT1HwcXwuyROgNJb,Fred Engay
1662,2IFNR24j14O8IuqLxFr0NT,0DouUeF604QbSdKYIL3xrq,Keiko Necesario


In [85]:
spotifyopm = spotify.merge(opm, how = 'inner', on = 'track_id')
spotifyopm['predicted_genre'].value_counts()

Acoustic    210
Rock        106
R&B          32
Reggae        6
Name: predicted_genre, dtype: int64

In [86]:
spotifyopm = spotifyopm.drop(columns = ['artist_id_x', 'artist_name_x'])
spotifyopm = spotifyopm.rename(columns = {'artist_id_y': 'artist_id', 'artist_name_y': 'artist_name'})
spotifyopm[['artist_id', 'artist_name']]

Unnamed: 0,artist_id,artist_name
0,7lIVjtsgz0y1oRQFBAVNzq,Mayonnaise
1,7lIVjtsgz0y1oRQFBAVNzq,Mayonnaise
2,"['40JlNF1w2OiSOyj1nC4y0I', '205CbtBaTjs0pxHmv2...","['Joseph Vincent', 'Jules Aurora']"
3,4nGp682WMiKS4X217kPw8C,Silent Sanctuary
4,4nGp682WMiKS4X217kPw8C,Silent Sanctuary
...,...,...
349,4DAcJXcjX0zlQAZAPAx4Zb,Ben&Ben
350,4HOEnLufwAqJ2qoJPVnL01,Callalily
351,4HOEnLufwAqJ2qoJPVnL01,Callalily
352,4HOEnLufwAqJ2qoJPVnL01,Callalily


In [87]:
spotifyopm['artist_name'].unique()

array(['Mayonnaise', "['Joseph Vincent', 'Jules Aurora']",
       'Silent Sanctuary', 'Kamikazee', 'Parokya Ni Edgar',
       "['Kamikazee', 'Kyla']", 'TJ Monterde', 'Nina', 'Autotelic',
       'Mark Carpio', "['Moira Dela Torre', 'Jason Marvin']", 'Hale',
       'Sponge Cola', 'Justin Vasquez', 'Ben&Ben', 'Moira Dela Torre',
       'Jana Garcia', 'Khel Pangilinan', 'Ebe Dancel', 'Joseph Vincent',
       "['Moira Dela Torre', 'I Belong to the Zoo']", 'Patch Quiwa',
       'SUD', 'Chlara', 'December Avenue', 'The Juans', 'Marion Aunor',
       'Michael Dutchi Libranda', 'IV Of Spades', 'Emman', 'Rivermaya',
       'Arthur Nery', 'Jimmy Bondoc', 'Michael Pangilinan',
       'Sarah Geronimo', 'John Roa', 'Eraserheads', 'Erik Santos',
       'This Band', 'Julie Anne San Jose', 'Bandang Lapis', 'Daryl Ong',
       "['Matthaios', 'Dudut']", 'Christian Bautista', 'Unique Salonga',
       'juan karlos', 'Just Hush', 'Music Hero', 'Kaye Cal', '6cyclemind',
       'krissy & ericka', "['Erik Sant

In [88]:
#removing artist that are not pinoy
pinoyartist = spotifyopm['artist_name'].unique()
notpinoy = np.array(['Soulstice'])
pinoyartist = np.setdiff1d(pinoyartist, notpinoy)
spotifyopm = spotifyopm[spotifyopm['artist_name'].isin(pinoyartist)]

In [89]:
spotifyopm.size

8119

In [90]:
#remove duplicates
spotifyopm = spotifyopm.drop_duplicates()
spotifyopm.size

4048

In [70]:
spotifyopm['predicted_genre'].value_counts()

Acoustic    97
Rock        55
R&B         20
Reggae       5
Name: predicted_genre, dtype: int64

In [91]:
#scale the features that need scaling
scaler = MinMaxScaler()
spotifyopm['loudness'] = scaler.fit_transform(spotifyopm[['loudness']])
spotifyopm['tempo'] =  scaler.fit_transform(spotifyopm[['tempo']])
nyoy['loudness'] = scaler.fit_transform(nyoy[['loudness']])
nyoy['tempo'] =  scaler.fit_transform(nyoy[['tempo']])

### Recommendation engine

In [92]:
#cosine distance
def recommendcos(seed_track, pool, genre):
    #compute cosine distances, audio features only
    pool['cosine_dist'] = pool.apply(lambda x: 1-cosine_similarity(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','cosine_dist','popularity','predicted_genre']+feature].sort_values('cosine_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

In [93]:
#Euclidean distance
def recommendeuc(seed_track, pool, genre):
    #compute euclidian distances, audio features only
    pool['euclidean_dist'] = pool.apply(lambda x: 1-euclidean_distances(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','euclidean_dist','popularity','predicted_genre']+feature].sort_values('euclidean_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

In [94]:
#Manhattan distance
def recommendman(seed_track, pool, genre):
    #compute manhattan distances, audio features only
    pool['manhattan_dist'] = pool.apply(lambda x: 1-manhattan_distances(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','manhattan_dist','popularity','predicted_genre']+feature].sort_values('manhattan_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

### Inputs

In [95]:
#identify the features to use
feature = ['danceability',\
                'energy',\
                'loudness',\
                'speechiness',\
                'acousticness',\
                'instrumentalness',\
                #'liveness',\
                'valence',\
                'tempo'\
               ]

In [96]:
#indicate what is the preferred genre of the collaborator
collab_genre = ['R&B','Rock']

In [97]:
nyoy[feature].mean().reset_index().T

Unnamed: 0,0,1,2,3,4,5,6,7
index,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
0,0.561881,0.4036,0.659819,0.039808,0.669076,0.147132,0.384956,0.587727


In [98]:
#indicate values for the track
newfeature = {'danceability':[1],'energy':[1],'loudness':[1],'speechiness':[1],\
              'acousticness':[1],'instrumentalness':[1],'liveness':[1],\
              'valence':[1],'tempo':[1]}
desiredtrack = pd.DataFrame(newfeature)
desiredtrack

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,1,1,1,1,1,1,1,1,1


In [99]:
# or indicate the genre of Nyoy
nyoy_genre = ['Acoustic']

### Result

In [100]:
by_genre = nyoy[nyoy['predicted_genre'].isin(nyoy_genre)]
nyoy_feature_by_genre = by_genre[feature].mean().reset_index().T.reset_index()
nyoy_feature_by_genre = nyoy_feature_by_genre.rename(columns = nyoy_feature_by_genre.iloc[0]).drop(nyoy_feature_by_genre.index[0])
nyoy_feature_by_genre

Unnamed: 0,index,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
1,0,0.552618,0.333065,0.607464,0.0389,0.771,0.135961,0.328533,0.588478


#### result if by nyoy's track

In [101]:
recommendcos(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,cosine_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
346,"['Powfu', 'beabadoobee']",death bed (coffee for your head),0.014762,92,R&B,0.726,0.431,0.65033,0.135,0.731,0.0,0.348,0.596847
345,Eraserheads,With A Smile,0.024861,64,Rock,0.328,0.465,0.599317,0.0334,0.72,2e-06,0.342,0.63393
271,KZ Tandingan,"Halik Sa Hangin - From ""The Killer Bride""",0.02989,60,R&B,0.505,0.552,0.522333,0.0973,0.623,5.2e-05,0.407,0.6355
307,"['Agsunta', 'Moira Dela Torre']",Kahit Kunwari Man Lang,0.031809,68,R&B,0.631,0.44,0.555187,0.0435,0.529,0.0,0.237,0.425892
266,Kyle Juliano,Nadarang - Campfire Mix,0.035653,50,Rock,0.816,0.539,0.675616,0.0378,0.765,8e-06,0.456,0.412366


In [102]:
recommendeuc(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,euclidean_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
0,Mayonnaise,Tayo Na Lang Dalawa,-0.139577,57,Rock,0.52,0.969,0.901879,0.0331,4.5e-05,0.00232,0.63,0.267028
6,Kamikazee,Halik,-0.059205,58,Rock,0.525,0.908,0.915646,0.0413,0.0139,0.0,0.628,0.469084
31,Sponge Cola,Kunwari,-0.049925,43,Rock,0.642,0.845,0.827093,0.0261,0.0778,0.0,0.837,0.426226
89,December Avenue,Kahit Sa Panaginip,-0.028266,49,Rock,0.469,0.769,0.842324,0.033,0.0362,3e-05,0.474,0.11357
20,Autotelic,Laro,-0.023592,0,Rock,0.569,0.823,0.94225,0.0271,0.00193,0.00703,0.623,0.568162


In [103]:
recommendman(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,manhattan_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
0,Mayonnaise,Tayo Na Lang Dalawa,-1.496283,57,Rock,0.52,0.969,0.901879,0.0331,4.5e-05,0.00232,0.63,0.267028
31,Sponge Cola,Kunwari,-1.333626,43,Rock,0.642,0.845,0.827093,0.0261,0.0778,0.0,0.837,0.426226
89,December Avenue,Kahit Sa Panaginip,-1.251419,49,Rock,0.469,0.769,0.842324,0.033,0.0362,3e-05,0.474,0.11357
6,Kamikazee,Halik,-1.225058,58,Rock,0.525,0.908,0.915646,0.0413,0.0139,0.0,0.628,0.469084
20,Autotelic,Laro,-1.065688,0,Rock,0.569,0.823,0.94225,0.0271,0.00193,0.00703,0.623,0.568162


#### Results if with the original theoretical track

In [104]:
recommendcos(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,cosine_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
324,Up Dharma Down,Oo,0.109743,61,R&B,0.583,0.626,0.712814,0.153,0.465,4e-06,0.6,0.675729
117,"['Matthaios', 'Dudut']",Nararahuyo,0.111315,65,R&B,0.524,0.331,0.524433,0.326,0.622,0.0,0.407,0.190832
292,ALLMO$T,Heart React,0.113683,49,R&B,0.631,0.621,0.656822,0.15,0.381,5e-06,0.499,0.501819
271,KZ Tandingan,"Halik Sa Hangin - From ""The Killer Bride""",0.119175,60,R&B,0.505,0.552,0.522333,0.0973,0.623,5.2e-05,0.407,0.6355
346,"['Powfu', 'beabadoobee']",death bed (coffee for your head),0.130745,92,R&B,0.726,0.431,0.65033,0.135,0.731,0.0,0.348,0.596847


In [105]:
recommendeuc(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,euclidean_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
108,This Band,'Di Na Babalik,-1.290129,59,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
109,This Band,'Di Na Babalik,-1.290129,0,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
154,This Band,Hindi Na Nga,-1.171319,1,Rock,0.396,0.413,0.513595,0.0334,0.0877,2.9e-05,0.108,0.545642
334,Freestyle,Before I Let You Go,-1.149523,65,R&B,0.655,0.348,0.487967,0.0356,0.176,0.0,0.0641,0.426247
115,Bandang Lapis,Kabilang Buhay,-1.123849,75,Rock,0.389,0.352,0.373883,0.0301,0.251,5e-06,0.203,0.565739


In [106]:
recommendman(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,manhattan_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
109,This Band,'Di Na Babalik,-5.184884,0,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
108,This Band,'Di Na Babalik,-5.184884,59,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
154,This Band,Hindi Na Nga,-4.902633,1,Rock,0.396,0.413,0.513595,0.0334,0.0877,2.9e-05,0.108,0.545642
115,Bandang Lapis,Kabilang Buhay,-4.835272,75,Rock,0.389,0.352,0.373883,0.0301,0.251,5e-06,0.203,0.565739
334,Freestyle,Before I Let You Go,-4.807086,65,R&B,0.655,0.348,0.487967,0.0356,0.176,0.0,0.0641,0.426247
