# Recommend artists
Build a content-based recommendation engine which suggests artists

In [10]:

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/spotify-data/data_by_year.csv
/kaggle/input/spotify-data/data_by_genres.csv
/kaggle/input/spotify-data/data_w_genres.csv
/kaggle/input/spotify-data/data_by_artist.csv
/kaggle/input/spotify-data/data.csv


In [12]:
data = pd.read_csv('../input/spotify-data/data_by_artist.csv')
data.head()

Unnamed: 0,artists,acousticness,danceability,duration_ms,energy,instrumentalness,liveness,loudness,speechiness,tempo,valence,popularity,key,mode,count
0,"""Cats"" 1981 Original London Cast",0.575083,0.44275,247260.0,0.386336,0.022717,0.287708,-14.205417,0.180675,115.9835,0.334433,38.0,5,1,12
1,"""Cats"" 1983 Broadway Cast",0.862538,0.441731,287280.0,0.406808,0.081158,0.315215,-10.69,0.176212,103.044154,0.268865,33.076923,5,1,26
2,"""Fiddler On The Roof” Motion Picture Chorus",0.856571,0.348286,328920.0,0.286571,0.024593,0.325786,-15.230714,0.118514,77.375857,0.354857,34.285714,0,1,7
3,"""Fiddler On The Roof” Motion Picture Orchestra",0.884926,0.425074,262890.962963,0.24577,0.073587,0.275481,-15.63937,0.1232,88.66763,0.37203,34.444444,0,1,27
4,"""Joseph And The Amazing Technicolor Dreamcoat""...",0.605444,0.437333,232428.111111,0.429333,0.037534,0.216111,-11.447222,0.086,120.329667,0.458667,42.555556,11,1,9


In [13]:
data.drop(['duration_ms','key','mode','count'],axis=1,inplace=True)

In [14]:
data['popularity'] = data['popularity']/100
data['tempo'] = (data['tempo'] - 50)/100
data['loudness'] = (data['loudness'] + 60)/60

In [15]:
features = ['acousticness', 'danceability', 'energy', 'instrumentalness', 'liveness', 
            'loudness', 'speechiness', 'tempo', 'valence', 'popularity']
def createUserPrefMatrix(artistRatingDict):
    
    artists = artistRatingDict.keys()
    artMat = data[data['artists'].isin(artists)]
    #print(artMat)
    
    for artist, rating in artistRatingDict.items():
        artMat.loc[artMat['artists']==
                   artist,features] = artMat.loc[artMat['artists']==
                                                 artist,features].mul(rating,axis=0)
    
    userProfile = artMat.loc[:,features].sum(axis=0)
    normalized_userProfile = (userProfile/userProfile.sum())*10
    
    return normalized_userProfile

def createRecomMatrix(userProfile,artists):
    
    artMat = data[~data['artists'].isin(artists)]
    artMat.set_index('artists',inplace=True) 
    #print(userProfile)
    #print(artMat.head())
    
    recomMat = pd.DataFrame(artMat.values*userProfile.values, 
                            columns=artMat.columns, index=artMat.index)
    recomMat = recomMat.sum(axis=1)
    recomMat.sort_values(ascending = False,inplace=True)
    
    return recomMat

def recommend(artistRatingDict):
    
    userProfile = createUserPrefMatrix(artistRatingDict)
    
    recommendationMat = createRecomMatrix(userProfile,
                                          artistRatingDict.keys()) 
    
    return recommendationMat.head(10)

In [16]:
import random
artists = random.sample(list(data['artists']),k=10)
ratings = [10,10,8,5,9,2,3,7,6,10]
dictionary = dict(zip(artists, ratings))
print(dictionary)

{'Hélène Bouvier': 10, "Michel 'Le": 10, 'Strike Anywhere': 8, 'Alfred Apaka': 5, 'Tonic': 9, 'Pearl Bailey': 2, 'Del McCoury': 3, 'Combichrist': 7, 'Crystal Gayle': 6, 'Arrogant Worms': 10}


In [17]:
recommend(dictionary)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, v)


artists
Kala Balch                            8.325943
Marco Marinangeli                     8.325943
DJ Mister Cee                         8.241323
The Jiants                            8.204544
Henry Blair                           8.108183
Tim Armstrong                         8.102470
Aymara pan-pipe orchestra             7.974328
Gary U.S. Bonds                       7.926520
Shout Praises Kids                    7.920893
Jovanny Cadena Y Su Estilo Privado    7.912363
dtype: float64