In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In this notebook, I implemented a music artist recommender system using the spotify dataset. 

Data Loading

In [41]:
import numpy as np 
import pandas as pd 

In [42]:
data_artist = pd.read_csv('/content/drive/MyDrive/Technocolab/data_by_artist.csv')
print(data_artist.columns)
print("\n")
print(data_artist.shape)
data_artist.head()

Index(['artists', 'acousticness', 'danceability', 'duration_ms', 'energy',
       'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo',
       'valence', 'popularity', 'key', 'mode', 'count'],
      dtype='object')


(27621, 15)


Unnamed: 0,artists,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,key,mode,count
0,"""Cats"" 1981 Original London Cast",0.575083,0.44275,247260.0,0.386336,0.022717,0.287708,-14.205417,0.180675,115.9835,0.334433,38.0,5,1,12
1,"""Cats"" 1983 Broadway Cast",0.862538,0.441731,287280.0,0.406808,0.081158,0.315215,-10.69,0.176212,103.044154,0.268865,33.076923,5,1,26
2,"""Fiddler On The Roof” Motion Picture Chorus",0.856571,0.348286,328920.0,0.286571,0.024593,0.325786,-15.230714,0.118514,77.375857,0.354857,34.285714,0,1,7
3,"""Fiddler On The Roof” Motion Picture Orchestra",0.884926,0.425074,262890.962963,0.24577,0.073587,0.275481,-15.63937,0.1232,88.66763,0.37203,34.444444,0,1,27
4,"""Joseph And The Amazing Technicolor Dreamcoat""...",0.605444,0.437333,232428.111111,0.429333,0.037534,0.216111,-11.447222,0.086,120.329667,0.458667,42.555556,11,1,9


Drop the columns not very useful for recommendation.

In [43]:
data_artist.drop(['duration_ms','key','mode','count'],axis=1,inplace=True)
data_artist.shape

(27621, 11)

Normalize columns which are already not.

In [44]:
data_artist['popularity'] = data_artist['popularity']/100
data_artist['tempo'] = (data_artist['tempo'] - 50)/100
data_artist['loudness'] = (data_artist['loudness'] + 60)/60

Creating a user profile and then create recommendation matrix based on user ratings of a few artists.

In [45]:
features = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 
            'loudness', 'speechiness', 'tempo', 'valence', 'popularity']

def choice_of_user(artistRatingDict):
    artists = artistRatingDict.keys()
    artist_Mat = data_artist[data_artist['artists'].isin(artists)]
    #print(artist_Mat)
    
    for artist, rating in artistRatingDict.items():
        artist_Mat.loc[artist_Mat['artists']==
                   artist,features] = artist_Mat.loc[artist_Mat['artists']==
                                                 artist,features].mul(rating,axis=0)
    
  
    userProfile = artist_Mat.loc[:,features].sum(axis=0)
    normalized_userProfile = (userProfile/userProfile.sum())*10
    
    return normalized_userProfile

def createRecomMatrix(userProfile,artists):
    
    artist_Mat = data_artist[~data_artist['artists'].isin(artists)]
    artist_Mat.set_index('artists',inplace=True) 
    #print(userProfile)
    #print(artist_Mat.head())
    
    recomMat = pd.DataFrame(artist_Mat.values*userProfile.values, 
                            columns=artist_Mat.columns, index=artist_Mat.index)
    recomMat = recomMat.sum(axis=1)
    recomMat.sort_values(ascending = False,inplace=True)
    
    return recomMat

def recommend(artistRatingDict):
    
    userProfile = choice_of_user(artistRatingDict)
    
    recommendationMat = createRecomMatrix(userProfile,
                                          artistRatingDict.keys()) 
    
    return recommendationMat.head(10)

Generating random user ratings.

In [46]:
import random
artists = random.sample(list(data['artists']),k=10)
ratings = [10,10,8,5,9,2,3,7,6,10]
dictionary = dict(zip(artists, ratings))
print(dictionary)

{'Sotos Panagopoulos': 10, 'The Midnight': 10, 'Old Sea Brigade': 8, 'Blood Red Shoes': 5, 'Public Enemy': 9, 'Lee Spielman': 2, 'Andra Day': 3, 'Amistades Peligrosas': 7, 'Bonn': 6, 'Emerson String Quartet': 10}


Reporting top 10 recommended artists with predicted ratings.

In [47]:
recommend(dictionary)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, v)


artists
Hawkins C. All Star Jam Band    7.997701
The Jiants                      7.968844
Aymara pan-pipe orchestra       7.921963
Kala Balch                      7.912405
Marco Marinangeli               7.912405
DJ Mister Cee                   7.912194
Mndsgn                          7.830670
Sofie                           7.830670
Tim Armstrong                   7.794319
Edmundo Ros                     7.790437
dtype: float64