In [229]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import accuracy_score,roc_curve, auc, confusion_matrix, classification_report
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances, cosine_similarity

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

%matplotlib inline

### Get all pertenint data

In [230]:
#get nyoy's predicted dataset
nyoy = pd.read_csv("data/NyoyVolante_predicted_genres.csv")
nyoy['predicted_genre'].value_counts()

Acoustic     55
Rock         18
R&B           6
Classical     5
Name: predicted_genre, dtype: int64

In [231]:
#get spotify's predicted dataset
spotify = pd.read_csv('data/DailyCharts_predicted_genres.csv')
spotify = spotify[spotify['predicted_genre_prob']>=0.5]
spotify['predicted_genre'].value_counts()

R&B          857
Rock         836
Reggae       639
Acoustic     514
Classical     27
Name: predicted_genre, dtype: int64

In [232]:
#get the total streams per track
daily = pd.read_csv('data/spotify_daily_charts.csv')
daily = daily.groupby(['track_id'])['streams'].mean().reset_index()
spotify = spotify.merge(daily, how = 'left', on = 'track_id')
spotify

Unnamed: 0,track_id,track_name,artist_id,artist_name,album_id,duration,release_date,popularity,danceability,energy,...,acousticness,instrumentalness,liveness,valence,tempo,predicted_genre_id,predicted_genre_prob,predicted_genre,classification_probability,streams
0,3CGZ7wfk4skmuyQgua1C1K,Chromatica I,1HY2Jd0NmPuamShAr6KMms,Lady Gaga,05c49JgPmL4Uz2ZeqRx5SP,60400,2020-05-29,64,0.231,0.457,...,0.012600,0.875000,0.327,0.0588,65.827,1,0.999643,Classical,99.96427297592163%,38468.333333
1,0oQc0F6KUE7QY7k5TU6bic,Chromatica II,1HY2Jd0NmPuamShAr6KMms,Lady Gaga,05c49JgPmL4Uz2ZeqRx5SP,41866,2020-05-29,67,0.184,0.297,...,0.473000,0.893000,0.527,0.1130,75.824,1,0.999565,Classical,99.95651841163635%,39926.500000
2,468iB1VWvEy9ln8M9zdde6,Tayo Na Lang Dalawa,7lIVjtsgz0y1oRQFBAVNzq,Mayonnaise,4V1wdlWzWsSPbgXrgipsPS,185066,2014-04-20,57,0.520,0.969,...,0.000045,0.002320,0.157,0.6300,97.598,5,0.999499,Rock,99.94986057281494%,24652.716216
3,5wQnmLuC1W7ATsArWACrgW,Welcome to the Black Parade,7FBcuc1gsnv6Y1nwFtNRCb,My Chemical Romance,0FZK97MXMm5mUQ8mtudjuK,311106,2006-10-23,79,0.217,0.905,...,0.000289,0.000110,0.222,0.2360,96.950,5,0.999404,Rock,99.9403715133667%,26172.000000
4,6f49kbOuQSOsStBpyGvQfA,A Head Full of Dreams,4gzpq5DPGxSnKTe4SA8HAU,Coldplay,3cfAM8b8KqJRoIzt3zLKqw,223773,2015-12-04,68,0.449,0.920,...,0.002100,0.016700,0.334,0.0772,122.984,5,0.999403,Rock,99.9402642250061%,29626.166667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2868,4Km5HrUvYTaSUfiSGPJeQR,Bad and Boujee (feat. Lil Uzi Vert),6oMuImdp5ZcFhWP0ESe6mG,Migos,2AvupjUeMnSffKEV05x222,343150,2017-01-27,78,0.927,0.665,...,0.061000,0.000000,0.123,0.1750,127.076,2,0.501514,Reggae,50.15139579772949%,24562.943925
2869,3VI7vR4nbDDhoF63XHSbmE,paper,49tQo2QULno7gxHutgccqF,LANY,6tmSIFaEjxAtuYwPq9FaFP,244466,2020-10-02,61,0.490,0.565,...,0.492000,0.000005,0.283,0.4520,153.841,5,0.501231,Rock,50.123125314712524%,42240.500000
2870,6GYZS99WOJ00mZ4lAJ5gDA,"Thank You, Ang Babait Ninyo",2H4bbbOohoe9zhqEsiRi7j,ABS-CBN Music All Star,5XTT0ss5Bb4VFRYvjbGevV,352395,2014-10-16,55,0.640,0.469,...,0.200000,0.000002,0.115,0.5740,93.993,5,0.500871,Rock,50.08708834648132%,48943.500000
2871,1mGo7MnaIQ9Bfp1nJDCi1z,Bad Things (with Camila Cabello),6TIYQ3jFPwQSRmorSezPxX,Machine Gun Kelly,1dRiSd1bcF1j9BmHDvn7Mq,239289,2017-05-12,27,0.680,0.697,...,0.231000,0.000000,0.178,0.2870,137.736,4,0.500862,R&B,50.08620023727417%,46626.500000


In [233]:
#get OPM playlist
opm = pd.read_csv('data/OPM_playlist_tracks.csv')
opm = opm[['track_id','artist_id','artist_name']]
opm

Unnamed: 0,track_id,artist_id,artist_name
0,3VcVQmMivq2ISfKUmIuvMK,6ZgBJRjJsK1iOJGKGZxEUv,South Border
1,3OpGUlDmRUXh0NkIYWoIlD,2XHTklRsNMOOQT56Zm3WS4,Parokya Ni Edgar
2,761frfZtZ8I2g70UsjFfBK,3XEHRbR9NMWfNzQ6TQMS9M,Caleb Santos
3,1yDiru08Q6omDOGkZMPnei,4DAcJXcjX0zlQAZAPAx4Zb,Ben&Ben
4,54b8IPvheXDpro2VR2rWAS,2EGJbNf0Hva2C2N6hvhMXD,Soulstice
...,...,...,...
1659,30ol9908qZSemXA6zvtwZs,"['1mcqfNCReSFxun2vIWvC28', '6vBFkiC0HtwqFIbFZz...","['KZ Tandingan', 'Epy Quizon']"
1660,6OPVhRnnN4WuMWMGchljnp,2MhhosdKm5i6IlL4rPdDUt,Jaywalkers
1661,0K4lDbom7qAebFYnlhORJT,007MmXwT1HwcXwuyROgNJb,Fred Engay
1662,2IFNR24j14O8IuqLxFr0NT,0DouUeF604QbSdKYIL3xrq,Keiko Necesario


In [234]:
spotifyopm = spotify.merge(opm, how = 'inner', on = 'track_id')
spotifyopm['predicted_genre'].value_counts()

Acoustic    210
Rock        106
R&B          32
Reggae        6
Name: predicted_genre, dtype: int64

In [235]:
spotifyopm = spotifyopm.drop(columns = ['artist_id_x', 'artist_name_x'])
spotifyopm = spotifyopm.rename(columns = {'artist_id_y': 'artist_id', 'artist_name_y': 'artist_name'})
spotifyopm[['artist_id', 'artist_name']]

Unnamed: 0,artist_id,artist_name
0,7lIVjtsgz0y1oRQFBAVNzq,Mayonnaise
1,7lIVjtsgz0y1oRQFBAVNzq,Mayonnaise
2,"['40JlNF1w2OiSOyj1nC4y0I', '205CbtBaTjs0pxHmv2...","['Joseph Vincent', 'Jules Aurora']"
3,4nGp682WMiKS4X217kPw8C,Silent Sanctuary
4,4nGp682WMiKS4X217kPw8C,Silent Sanctuary
...,...,...
349,4DAcJXcjX0zlQAZAPAx4Zb,Ben&Ben
350,4HOEnLufwAqJ2qoJPVnL01,Callalily
351,4HOEnLufwAqJ2qoJPVnL01,Callalily
352,4HOEnLufwAqJ2qoJPVnL01,Callalily


In [236]:
spotifyopm['artist_name'].unique()

array(['Mayonnaise', "['Joseph Vincent', 'Jules Aurora']",
       'Silent Sanctuary', 'Kamikazee', 'Parokya Ni Edgar',
       "['Kamikazee', 'Kyla']", 'TJ Monterde', 'Nina', 'Autotelic',
       'Mark Carpio', "['Moira Dela Torre', 'Jason Marvin']", 'Hale',
       'Sponge Cola', 'Justin Vasquez', 'Ben&Ben', 'Moira Dela Torre',
       'Jana Garcia', 'Khel Pangilinan', 'Ebe Dancel', 'Joseph Vincent',
       "['Moira Dela Torre', 'I Belong to the Zoo']", 'Patch Quiwa',
       'SUD', 'Chlara', 'December Avenue', 'The Juans', 'Marion Aunor',
       'Michael Dutchi Libranda', 'IV Of Spades', 'Emman', 'Rivermaya',
       'Arthur Nery', 'Jimmy Bondoc', 'Michael Pangilinan',
       'Sarah Geronimo', 'John Roa', 'Eraserheads', 'Erik Santos',
       'This Band', 'Julie Anne San Jose', 'Bandang Lapis', 'Daryl Ong',
       "['Matthaios', 'Dudut']", 'Christian Bautista', 'Unique Salonga',
       'juan karlos', 'Just Hush', 'Music Hero', 'Kaye Cal', '6cyclemind',
       'krissy & ericka', "['Erik Sant

In [237]:
#removing artist that are not pinoy
pinoyartist = spotifyopm['artist_name'].unique()
notpinoy = np.array(['Soulstice'])
pinoyartist = np.setdiff1d(pinoyartist, notpinoy)
spotifyopm = spotifyopm[spotifyopm['artist_name'].isin(pinoyartist)]

In [238]:
spotifyopm.size

8472

In [239]:
#remove duplicates
spotifyopm = spotifyopm.drop_duplicates()
spotifyopm.size

4224

In [240]:
spotifyopm['predicted_genre'].value_counts()

Acoustic    97
Rock        55
R&B         19
Reggae       5
Name: predicted_genre, dtype: int64

In [241]:
#scale the features that need scaling
scaler = MinMaxScaler()
spotifyopm['loudness'] = scaler.fit_transform(spotifyopm[['loudness']])
spotifyopm['tempo'] =  scaler.fit_transform(spotifyopm[['tempo']])
nyoy['loudness'] = scaler.fit_transform(nyoy[['loudness']])
nyoy['tempo'] =  scaler.fit_transform(nyoy[['tempo']])

### Recommendation engine

In [242]:
#cosine distance
def recommendcos(seed_track, pool, genre):
    #compute cosine distances, audio features only
    pool['cosine_dist'] = pool.apply(lambda x: 1-cosine_similarity(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','cosine_dist','streams','predicted_genre']+feature].sort_values('cosine_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

In [243]:
#Euclidean distance
def recommendeuc(seed_track, pool, genre):
    #compute euclidian distances, audio features only
    pool['euclidean_dist'] = pool.apply(lambda x: 1-euclidean_distances(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','euclidean_dist','streams','predicted_genre']+feature].sort_values('euclidean_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

In [244]:
#Manhattan distance
def recommendman(seed_track, pool, genre):
    #compute manhattan distances, audio features only
    pool['manhattan_dist'] = pool.apply(lambda x: 1-manhattan_distances(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','manhattan_dist','streams','predicted_genre']+feature].sort_values('manhattan_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

### Inputs

In [245]:
#identify the features to use
feature = ['danceability',\
                'energy',\
                'loudness',\
                'speechiness',\
                'acousticness',\
                'instrumentalness',\
                #'liveness',\
                'valence',\
                'tempo'\
               ]

In [246]:
#indicate what is the preferred genre of the collaborator
collab_genre = ['R&B']

In [247]:
nyoy[feature].mean().reset_index().T

Unnamed: 0,0,1,2,3,4,5,6,7
index,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
0,0.561881,0.4036,0.659819,0.039808,0.669076,0.147132,0.384956,0.587727


In [248]:
#indicate values for the track
newfeature = {'danceability':[0.8],'energy':[0.8],'loudness':[0.66],'speechiness':[0.40],\
              'acousticness':[0.67],'instrumentalness':[0.15],'liveness':[1],\
              'valence':[0.38],'tempo':[0.58]}
desiredtrack = pd.DataFrame(newfeature)
desiredtrack

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,0.8,0.8,0.66,0.4,0.67,0.15,1,0.38,0.58


In [249]:
# or indicate the genre of Nyoy
nyoy_genre = ['Acoustic']

### Result

In [250]:
by_genre = nyoy[nyoy['predicted_genre'].isin(nyoy_genre)]
nyoy_feature_by_genre = by_genre[feature].mean().reset_index().T.reset_index()
nyoy_feature_by_genre = nyoy_feature_by_genre.rename(columns = nyoy_feature_by_genre.iloc[0]).drop(nyoy_feature_by_genre.index[0])
nyoy_feature_by_genre

Unnamed: 0,index,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
1,0,0.552618,0.333065,0.607464,0.0389,0.771,0.135961,0.328533,0.588478


#### result if by nyoy's track

In [251]:
recommendcos(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,cosine_dist,streams,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
346,"['Powfu', 'beabadoobee']",death bed (coffee for your head),0.014762,89342.571429,R&B,0.726,0.431,0.65033,0.135,0.731,0.0,0.348,0.596847
271,KZ Tandingan,"Halik Sa Hangin - From ""The Killer Bride""",0.02989,29088.753623,R&B,0.505,0.552,0.522333,0.0973,0.623,5.2e-05,0.407,0.6355
307,"['Agsunta', 'Moira Dela Torre']",Kahit Kunwari Man Lang,0.031809,44430.756757,R&B,0.631,0.44,0.555187,0.0435,0.529,0.0,0.237,0.425892
288,Arthur Nery,Higa,0.040104,67222.942222,R&B,0.657,0.337,0.563388,0.0433,0.45,0.0,0.183,0.462094
337,"['Matthaios', 'Calvin De Leon']",Binibini,0.052355,80957.112745,R&B,0.851,0.352,0.474054,0.0749,0.737,0.0,0.536,0.390849


In [252]:
recommendeuc(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,euclidean_dist,streams,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
199,Shanti Dope,Amatz,0.142295,32172.47191,R&B,0.825,0.496,0.660239,0.0503,0.00817,0.000511,0.185,0.482908
222,Janine Teñoso,'Di Na Muli,0.177699,112869.909091,R&B,0.413,0.561,0.927264,0.0517,0.192,0.0,0.352,0.203747
278,SUD,Sila,0.1908,44404.669065,R&B,0.635,0.376,0.780425,0.0457,0.0104,0.0,0.28,0.458634
339,ALLMO$T,Bagay Tayo,0.262631,88766.632877,R&B,0.804,0.557,0.655553,0.163,0.322,0.0,0.645,0.284468
174,Sarah Geronimo,Tala,0.264781,60902.104317,R&B,0.598,0.795,1.0,0.0497,0.623,0.000193,0.499,0.269877


In [253]:
recommendman(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,manhattan_dist,streams,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
339,ALLMO$T,Bagay Tayo,-0.852944,88766.632877,R&B,0.804,0.557,0.655553,0.163,0.322,0.0,0.645,0.284468
222,Janine Teñoso,'Di Na Muli,-0.823313,112869.909091,R&B,0.413,0.561,0.927264,0.0517,0.192,0.0,0.352,0.203747
174,Sarah Geronimo,Tala,-0.68349,60902.104317,R&B,0.598,0.795,1.0,0.0497,0.623,0.000193,0.499,0.269877
199,Shanti Dope,Amatz,-0.646875,32172.47191,R&B,0.825,0.496,0.660239,0.0503,0.00817,0.000511,0.185,0.482908
87,Sarah Geronimo,Isa Pang Araw,-0.425247,32745.591837,R&B,0.515,0.715,0.880986,0.046,0.277,0.0,0.416,0.580835


#### Results if with the original theoretical track

In [254]:
recommendcos(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,cosine_dist,streams,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
292,ALLMO$T,Heart React,0.033329,29180.166667,R&B,0.631,0.621,0.656822,0.15,0.381,5e-06,0.499,0.501819
307,"['Agsunta', 'Moira Dela Torre']",Kahit Kunwari Man Lang,0.035553,44430.756757,R&B,0.631,0.44,0.555187,0.0435,0.529,0.0,0.237,0.425892
271,KZ Tandingan,"Halik Sa Hangin - From ""The Killer Bride""",0.038111,29088.753623,R&B,0.505,0.552,0.522333,0.0973,0.623,5.2e-05,0.407,0.6355
346,"['Powfu', 'beabadoobee']",death bed (coffee for your head),0.039478,89342.571429,R&B,0.726,0.431,0.65033,0.135,0.731,0.0,0.348,0.596847
324,Up Dharma Down,Oo,0.046228,17976.368421,R&B,0.583,0.626,0.712814,0.153,0.465,4e-06,0.6,0.675729


In [255]:
recommendeuc(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,euclidean_dist,streams,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
278,SUD,Sila,0.089323,44404.669065,R&B,0.635,0.376,0.780425,0.0457,0.0104,0.0,0.28,0.458634
222,Janine Teñoso,'Di Na Muli,0.109513,112869.909091,R&B,0.413,0.561,0.927264,0.0517,0.192,0.0,0.352,0.203747
334,Freestyle,Before I Let You Go,0.118138,27223.134809,R&B,0.655,0.348,0.487967,0.0356,0.176,0.0,0.0641,0.426247
199,Shanti Dope,Amatz,0.149618,32172.47191,R&B,0.825,0.496,0.660239,0.0503,0.00817,0.000511,0.185,0.482908
42,Khel Pangilinan,Weak,0.196128,47678.388489,R&B,0.71,0.373,0.560215,0.0618,0.179,0.0,0.141,0.476351


In [256]:
recommendman(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

Unnamed: 0,artist_name,track_name,manhattan_dist,streams,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
222,Janine Teñoso,'Di Na Muli,-1.273817,112869.909091,R&B,0.413,0.561,0.927264,0.0517,0.192,0.0,0.352,0.203747
334,Freestyle,Before I Let You Go,-1.247086,27223.134809,R&B,0.655,0.348,0.487967,0.0356,0.176,0.0,0.0641,0.426247
278,SUD,Sila,-1.094691,44404.669065,R&B,0.635,0.376,0.780425,0.0457,0.0104,0.0,0.28,0.458634
42,Khel Pangilinan,Weak,-0.938634,47678.388489,R&B,0.71,0.373,0.560215,0.0618,0.179,0.0,0.141,0.476351
199,Shanti Dope,Amatz,-0.78235,32172.47191,R&B,0.825,0.496,0.660239,0.0503,0.00817,0.000511,0.185,0.482908
