In [77]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.metrics import accuracy_score,roc_curve, auc, confusion_matrix, classification_report
from sklearn.metrics.pairwise import euclidean_distances, manhattan_distances, cosine_similarity

from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

%matplotlib inline

### Get all pertenint data

In [78]:
#get nyoy's predicted dataset
nyoy = pd.read_csv("data/NyoyVolante_predicted_genres.csv")
nyoy['predicted_genre'].value_counts()

Acoustic     55
Rock         18
R&B           6
Classical     5
Name: predicted_genre, dtype: int64

In [79]:
#get spotify's predicted dataset
spotify = pd.read_csv('data/DailyCharts_predicted_genres.csv')
spotify = spotify[spotify['predicted_genre_prob']>=0.5]
spotify['predicted_genre'].value_counts()

R&B          857
Rock         836
Reggae       639
Acoustic     514
Classical     27
Name: predicted_genre, dtype: int64

In [80]:
#get OPM playlist
opm = pd.read_csv('data/OPM_playlist_tracks.csv')
spotifyopm = spotify[spotify['track_id'].isin(opm['track_id'])]
spotifyopm['predicted_genre'].value_counts()

Acoustic    97
Rock        55
R&B         20
Reggae       5
Name: predicted_genre, dtype: int64

In [81]:
spotifyopm['artist_name'].unique().size

90

In [82]:
#scale the features that need scaling
scaler = MinMaxScaler()
spotifyopm['loudness'] = scaler.fit_transform(spotifyopm[['loudness']])
spotifyopm['tempo'] =  scaler.fit_transform(spotifyopm[['tempo']])
nyoy['loudness'] = scaler.fit_transform(nyoy[['loudness']])
nyoy['tempo'] =  scaler.fit_transform(nyoy[['tempo']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  spotifyopm['loudness'] = scaler.fit_transform(spotifyopm[['loudness']])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  spotifyopm['tempo'] =  scaler.fit_transform(spotifyopm[['tempo']])


### Recommendation engine

In [163]:
#cosine distance
def recommendcos(seed_track, pool, genre):
    #compute cosine distances, audio features only
    pool['cosine_dist'] = pool.apply(lambda x: 1-cosine_similarity(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','cosine_dist','popularity','predicted_genre']+feature].sort_values('cosine_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

In [172]:
#Euclidean distance
def recommendeuc(seed_track, pool, genre):
    #compute euclidian distances, audio features only
    pool['euclidean_dist'] = pool.apply(lambda x: 1-euclidean_distances(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','euclidean_dist','popularity','predicted_genre']+feature].sort_values('euclidean_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

In [173]:
#Manhattan distance
def recommendman(seed_track, pool, genre):
    #compute manhattan distances, audio features only
    pool['manhattan_dist'] = pool.apply(lambda x: 1-manhattan_distances(x[feature].values.reshape(1, -1),\
                                                                  seed_track[feature].values.reshape(1, -1))\
                                                                  .flatten()[0], axis=1)
    result = pool[['artist_name','track_name','manhattan_dist','popularity','predicted_genre']+feature].sort_values('manhattan_dist')
    result = result[result['predicted_genre'].isin(genre)][:5]
    return result

### Inputs

In [84]:
#identify the features to use
feature = ['danceability',\
                'energy',\
                'loudness',\
                'speechiness',\
                'acousticness',\
                'instrumentalness',\
                #'liveness',\
                'valence',\
                'tempo'\
               ]

In [159]:
#indicate what is the preferred genre of the collaborator
collab_genre = ['R&B','Rock']

In [161]:
nyoy[feature].mean().reset_index().T

Unnamed: 0,0,1,2,3,4,5,6,7
index,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
0,0.561881,0.4036,0.659819,0.039808,0.669076,0.147132,0.384956,0.587727


In [111]:
#indicate values for the track
newfeature = {'danceability':[1],'energy':[1],'loudness':[1],'speechiness':[1],\
              'acousticness':[1],'instrumentalness':[1],'liveness':[1],\
              'valence':[1],'tempo':[1]}
desiredtrack = pd.DataFrame(newfeature)
desiredtrack

Unnamed: 0,danceability,energy,loudness,speechiness,acousticness,instrumentalness,liveness,valence,tempo
0,1,1,1,1,1,1,1,1,1


In [124]:
# or indicate the genre of Nyoy
nyoy_genre = ['Acoustic']

### Result

In [153]:
by_genre = nyoy[nyoy['predicted_genre'].isin(nyoy_genre)]
nyoy_feature_by_genre = by_genre[feature].mean().reset_index().T.reset_index()
nyoy_feature_by_genre = nyoy_feature_by_genre.rename(columns = nyoy_feature_by_genre.iloc[0]).drop(nyoy_feature_by_genre.index[0])
nyoy_feature_by_genre

Unnamed: 0,index,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
1,0,0.552618,0.333065,0.607464,0.0389,0.771,0.135961,0.328533,0.588478


#### result if by nyoy's track

In [178]:
recommendcos(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pool['cosine_dist'] = pool.apply(lambda x: 1-cosine_similarity(x[feature].values.reshape(1, -1),\


Unnamed: 0,artist_name,track_name,cosine_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
2821,Powfu,death bed (coffee for your head),0.014762,92,R&B,0.726,0.431,0.65033,0.135,0.731,0.0,0.348,0.596847
2796,Eraserheads,With A Smile,0.024861,64,Rock,0.328,0.465,0.599317,0.0334,0.72,2e-06,0.342,0.63393
2132,KZ Tandingan,"Halik Sa Hangin - From ""The Killer Bride""",0.02989,60,R&B,0.505,0.552,0.522333,0.0973,0.623,5.2e-05,0.407,0.6355
2522,Agsunta,Kahit Kunwari Man Lang,0.031809,68,R&B,0.631,0.44,0.555187,0.0435,0.529,0.0,0.237,0.425892
2052,Kyle Juliano,Nadarang - Campfire Mix,0.035653,50,Rock,0.816,0.539,0.675616,0.0378,0.765,8e-06,0.456,0.412366


In [179]:
recommendeuc(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pool['euclidean_dist'] = pool.apply(lambda x: 1-euclidean_distances(x[feature].values.reshape(1, -1),\


Unnamed: 0,artist_name,track_name,euclidean_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
2,Mayonnaise,Tayo Na Lang Dalawa,-0.139577,57,Rock,0.52,0.969,0.901879,0.0331,4.5e-05,0.00232,0.63,0.267028
25,Kamikazee,Halik,-0.059205,58,Rock,0.525,0.908,0.915646,0.0413,0.0139,0.0,0.628,0.469084
132,Sponge Cola,Kunwari,-0.049925,43,Rock,0.642,0.845,0.827093,0.0261,0.0778,0.0,0.837,0.426226
519,December Avenue,Kahit Sa Panaginip,-0.028266,49,Rock,0.469,0.769,0.842324,0.033,0.0362,3e-05,0.474,0.11357
63,Autotelic,Laro,-0.023592,0,Rock,0.569,0.823,0.94225,0.0271,0.00193,0.00703,0.623,0.568162


In [180]:
recommendman(seed_track = nyoy_feature_by_genre, pool = spotifyopm, genre = collab_genre)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pool['manhattan_dist'] = pool.apply(lambda x: 1-manhattan_distances(x[feature].values.reshape(1, -1),\


Unnamed: 0,artist_name,track_name,manhattan_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
2,Mayonnaise,Tayo Na Lang Dalawa,-1.496283,57,Rock,0.52,0.969,0.901879,0.0331,4.5e-05,0.00232,0.63,0.267028
132,Sponge Cola,Kunwari,-1.333626,43,Rock,0.642,0.845,0.827093,0.0261,0.0778,0.0,0.837,0.426226
519,December Avenue,Kahit Sa Panaginip,-1.251419,49,Rock,0.469,0.769,0.842324,0.033,0.0362,3e-05,0.474,0.11357
25,Kamikazee,Halik,-1.225058,58,Rock,0.525,0.908,0.915646,0.0413,0.0139,0.0,0.628,0.469084
63,Autotelic,Laro,-1.065688,0,Rock,0.569,0.823,0.94225,0.0271,0.00193,0.00703,0.623,0.568162


#### Results if with the original theoretical track

In [177]:
recommendcos(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pool['cosine_dist'] = pool.apply(lambda x: 1-cosine_similarity(x[feature].values.reshape(1, -1),\


Unnamed: 0,artist_name,track_name,cosine_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
2670,Up Dharma Down,Oo,0.109743,61,R&B,0.583,0.626,0.712814,0.153,0.465,4e-06,0.6,0.675729
596,Matthaios,Nararahuyo,0.111315,65,R&B,0.524,0.331,0.524433,0.326,0.622,0.0,0.407,0.190832
2320,ALLMO$T,Heart React,0.113683,49,R&B,0.631,0.621,0.656822,0.15,0.381,5e-06,0.499,0.501819
2132,KZ Tandingan,"Halik Sa Hangin - From ""The Killer Bride""",0.119175,60,R&B,0.505,0.552,0.522333,0.0973,0.623,5.2e-05,0.407,0.6355
2821,Powfu,death bed (coffee for your head),0.130745,92,R&B,0.726,0.431,0.65033,0.135,0.731,0.0,0.348,0.596847


In [175]:
recommendeuc(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pool['euclidean_dist'] = pool.apply(lambda x: 1-euclidean_distances(x[feature].values.reshape(1, -1),\


Unnamed: 0,artist_name,track_name,euclidean_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
539,This Band,'Di Na Babalik,-1.290129,0,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
538,This Band,'Di Na Babalik,-1.290129,59,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
1018,This Band,Hindi Na Nga,-1.171319,1,Rock,0.396,0.413,0.513595,0.0334,0.0877,2.9e-05,0.108,0.545642
2745,Freestyle,Before I Let You Go,-1.149523,65,R&B,0.655,0.348,0.487967,0.0356,0.176,0.0,0.0641,0.426247
580,Bandang Lapis,Kabilang Buhay,-1.123849,75,Rock,0.389,0.352,0.373883,0.0301,0.251,5e-06,0.203,0.565739


In [176]:
recommendman(seed_track = desiredtrack, pool = spotifyopm, genre = collab_genre)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pool['manhattan_dist'] = pool.apply(lambda x: 1-manhattan_distances(x[feature].values.reshape(1, -1),\


Unnamed: 0,artist_name,track_name,manhattan_dist,popularity,predicted_genre,danceability,energy,loudness,speechiness,acousticness,instrumentalness,valence,tempo
538,This Band,'Di Na Babalik,-5.184884,59,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
539,This Band,'Di Na Babalik,-5.184884,0,Rock,0.358,0.435,0.723554,0.0349,0.00819,0.000936,0.14,0.114536
1018,This Band,Hindi Na Nga,-4.902633,1,Rock,0.396,0.413,0.513595,0.0334,0.0877,2.9e-05,0.108,0.545642
580,Bandang Lapis,Kabilang Buhay,-4.835272,75,Rock,0.389,0.352,0.373883,0.0301,0.251,5e-06,0.203,0.565739
2745,Freestyle,Before I Let You Go,-4.807086,65,R&B,0.655,0.348,0.487967,0.0356,0.176,0.0,0.0641,0.426247
