In [1]:
import pandas as pd
import numpy as np
from surprise import SVD
from surprise.prediction_algorithms import KNNWithMeans, KNNBasic, KNNBaseline
from surprise import Dataset, Reader
from surprise import accuracy
from surprise.model_selection import cross_validate, train_test_split, GridSearchCV

In [2]:
df = pd.read_csv('modelingdata')

In [3]:
#See which artists have been rated the most 
df['artist'].value_counts().head()

Various Artists        467
Panic! At The Disco     63
Passion Pit             56
Bastille                51
Twenty One Pilots       43
Name: artist, dtype: int64

In [4]:
#See which tracks have been rated the most 
df['track_name'].value_counts().head()

Gold         8
Smile        8
Closer       8
Hurricane    8
Heaven       7
Name: track_name, dtype: int64

In [5]:
#View the distribution of ratings 
df['ratings'].value_counts()

3    2648
2    1981
4    1287
1     916
5     209
Name: ratings, dtype: int64

In [6]:
data = df[['artist_id','track_id', 'ratings']]

In [7]:
data.head()

Unnamed: 0,artist_id,track_id,ratings
0,6jJ0s89eD6GaHleKKya26X,4lCv7b86sLynZbXhfScfm2,4
1,6jJ0s89eD6GaHleKKya26X,6tS3XVuOyu10897O3ae7bi,4
2,6jJ0s89eD6GaHleKKya26X,455AfCsOhhLPRc68sE01D8,4
3,6jJ0s89eD6GaHleKKya26X,14iN3o8ptQ8cFVZTEmyQRV,4
4,6jJ0s89eD6GaHleKKya26X,1nZzRJbFvCEct3uzu04ZoL,4


In [8]:
#Check sparsity of matrix
numratings = len(data['ratings'])
numusers = len(data['artist_id'].unique())
numitems = len(data['track_id'].unique())

sparse = 1 - (numratings / (numusers*numitems))
sparse

0.9995669406116341

In [9]:
#Filter out artists who only appear once in dataset 
data = data.groupby('artist_id').filter(lambda x: len(x)>1)

In [10]:
# #Filter out any tracks who have less than 10 ratings
# data = data.groupby('track_id').filter(lambda x: len(x)>1)

In [11]:
#Check sparsity of matrix
numratings = len(data['ratings'])
numusers = len(data['artist_id'].unique())
numitems = len(data['track_id'].unique())

sparse = 1 - (numratings / (numusers*numitems))
sparse

0.9990345136236665

In [12]:
data['artist_id'].nunique()

1045

In [13]:
#Check for NA values 
data.isna().sum()

artist_id    0
track_id     0
ratings      0
dtype: int64

### Baseline Model

In [14]:
#Instansiate reader and data 
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(data, reader)

In [15]:
#Train test split with test sizre of 20% 
trainset, testset = train_test_split(data, test_size=.25)

In [16]:
# Print number of uses and items for the trainset 
print('Number of artists in train set : ', trainset.n_users, '\n')
print('Number of tracks in train set : ', trainset.n_items, '\n')

Number of artists in train set :  1016 

Number of tracks in train set :  4288 



In [17]:
#Instansiate a baseline model using KNNBaseline 
baseline = SVD(random_state=42)

In [18]:
#Fit model on the trainset 
baseline.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f835fa02780>

In [19]:
#Predict on the test set 
baselinepreds = baseline.test(testset)

In [20]:
#Check RMSE and MAE results 
accuracy.rmse(baselinepreds)
accuracy.mae(baselinepreds)

RMSE: 0.8888
MAE:  0.7036


0.7036158538260687

In [21]:
#Run 3-fold cross validation on the data and print results 
cv_baseline = cross_validate(baseline, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9064  0.8836  0.9017  0.8972  0.0098  
MAE (testset)     0.7198  0.6969  0.7203  0.7123  0.0109  
Fit time          0.24    0.23    0.26    0.24    0.01    
Test time         0.01    0.01    0.01    0.01    0.00    


In [22]:
# Print out the RMSE score for each fold 
for i in cv_baseline.items():
    print(i)

('test_rmse', array([0.90642268, 0.88362903, 0.9016602 ]))
('test_mae', array([0.71981633, 0.69689745, 0.72031076]))
('fit_time', (0.23783183097839355, 0.23102688789367676, 0.2576587200164795))
('test_time', (0.011801004409790039, 0.011889934539794922, 0.011161088943481445))


In [23]:
#Find the average test RMSE from the 3-Fold cross-validation
np.mean(cv_baseline['test_rmse'])

0.8972373023693465

### Model 1

In [24]:
#Set parameters for GridSearch on SVD model 
parameters = {'n_factors': [25, 50, 75, 100],
             'reg_all': [0.01, 0.02, 0.03, 0.04, 0.05],
             'n_epochs': [20, 30, 40, 50, 60],
             'lr_all': [.005, .01, .05, .1]}
gridsvd = GridSearchCV(SVD, param_grid=parameters, n_jobs=-1)

In [25]:
#Fit SVD model on data
gridsvd.fit(data)

In [26]:
#Print best score and best parameters from the GridSearch 
print(gridsvd.best_score)
print(gridsvd.best_params)

{'rmse': 0.847644943603882, 'mae': 0.663178750856742}
{'rmse': {'n_factors': 25, 'reg_all': 0.01, 'n_epochs': 40, 'lr_all': 0.05}, 'mae': {'n_factors': 25, 'reg_all': 0.01, 'n_epochs': 40, 'lr_all': 0.05}}


In [27]:
#Reinstantiate the model with the best parameters fromGridSearch 
svdtuned = SVD(n_factors=25,
               reg_all=0.01,
               n_epochs=20,
               lr_all=0.05)

In [28]:
#Fit and predict the model 
svdtuned.fit(trainset)
svdpreds = svdtuned.test(testset)

In [29]:
#Print RMSE and MAE results 
accuracy.rmse(svdpreds)
accuracy.mae(svdpreds)

RMSE: 0.8398
MAE:  0.6526


0.6525785102823046

In [30]:
#Perform 3-Fold cross validation for SVD tuned model
cv_svd_tuned = cross_validate(svdtuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Evaluating RMSE, MAE of algorithm SVD on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.8707  0.8701  0.8631  0.8680  0.0034  
MAE (testset)     0.6843  0.6770  0.6779  0.6797  0.0033  
Fit time          0.14    0.12    0.13    0.13    0.01    
Test time         0.02    0.01    0.01    0.01    0.00    


In [31]:
#Display the results for all 3-folds 
for i in cv_svd_tuned.items():
    print(i)

('test_rmse', array([0.87069354, 0.87010165, 0.86313407]))
('test_mae', array([0.68432099, 0.67702622, 0.67790116]))
('fit_time', (0.1398928165435791, 0.12325000762939453, 0.12575173377990723))
('test_time', (0.015025854110717773, 0.014170169830322266, 0.013131856918334961))


In [32]:
# Print out the average RMSE score for the test set
np.mean(cv_svd_tuned['test_rmse'])

0.8679764221797864

## Model 2 

In [33]:
# Set parameters to be used in KNN models 
knn_params = {'name': ['cosine', 'pearson'],
              'user_based':[True, False], 
              'min_support':[True, False],
            'min_k' : [1, 2]}

In [34]:

# Apply GridSearch to the KNN Basic model to identify the best parameters
gsknnbasic = GridSearchCV(KNNBasic, knn_params, measures=['rmse', 'mae'], cv=3)
gsknnbasic.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

In [35]:
#Display the best scores and parameters from GridSearch
print(gsknnbasic.best_score)
print(gsknnbasic.best_params)

{'rmse': 0.981964018765035, 'mae': 0.7898244448061341}
{'rmse': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}, 'mae': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}}


In [38]:
#Reinstantiate the model with the best parameters from GridSearch 
knnbasic_tuned = KNNBasic(sim_options={'name': 'cosine', 
                                       'user_based': True, 
                                       'min_support':True, 
                                       'min_k':1, })

In [39]:

#Fit on the train set and predict on the test set 
knnbasic_tuned.fit(trainset)
knnbpreds = knnbasic_tuned.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [40]:
#Print RMSE and MAE results 
accuracy.rmse(knnbpreds)
accuracy.mae(knnbpreds)

RMSE: 0.9765
MAE:  0.7872


0.7872217399691357

In [41]:
#Conduct cross validation for the KNNBasic tuned model 
cv_knn_basic = cross_validate(knnbasic_tuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBasic on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9883  0.9992  0.9615  0.9830  0.0158  
MAE (testset)     0.7921  0.8090  0.7717  0.7909  0.0152  
Fit time          0.06    0.02    0.02    0.03    0.02    
Test time         0.02    0.01    0.01    0.01    0.00    


In [42]:
# Print out results from the cross-valdiatoin 
for i in cv_knn_basic.items():
    print(i)

('test_rmse', array([0.98825074, 0.99924948, 0.96152103]))
('test_mae', array([0.79212958, 0.80900608, 0.77171021]))
('fit_time', (0.05665302276611328, 0.01958608627319336, 0.017940998077392578))
('test_time', (0.01582813262939453, 0.011268854141235352, 0.012523889541625977))


In [43]:
# Print out the average RMSE score for the test set
np.mean(cv_knn_basic['test_rmse'])

0.9830070829602042

### Model 3

In [44]:
#Apply KNN GridSearch parameters on the KNNBaseline model 
gsknnbaseline = GridSearchCV(KNNBaseline, knn_params, measures=['rmse', 'mae'], cv=3)
gsknnbaseline.fit(data)

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matr

In [45]:
#Display the best score and the best parameters 
print(gsknnbaseline.best_score)
print(gsknnbaseline.best_params)

{'rmse': 0.917323152776386, 'mae': 0.7296408532368761}
{'rmse': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}, 'mae': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}}


In [46]:
#Reinstantiate the model with the best parameters from GridSearch 
knnbaseline_tuned = KNNBaseline(sim_options={'name': 'cosine', 
                                       'user_based': True, 
                                       'min_support':True, 
                                       'min_k':1, })

In [48]:
#fit the trainset and predict on the test set 
knnbaseline_tuned.fit(trainset)
knnbaselinepreds = knnbaseline_tuned.test(testset)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [49]:
#Print the RMSE and MAE scores 
accuracy.rmse(knnbaselinepreds)
accuracy.mae(knnbaselinepreds)

RMSE: 0.9075
MAE:  0.7206


0.7205504467971151

In [50]:
#Perform 3 fold cross validation 
cv_knn_baseline = cross_validate(knnbaseline_tuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNBaseline on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9208  0.9130  0.9133  0.9157  0.0036  
MAE (testset)     0.7332  0.7287  0.7212  0.7277  0.0050  
Fit time          0.09    0.05    0.03    0.05    0.02    
Test time         0.02    0.01    0.01    0.02    0.01    


In [51]:
#Show the mean RMSE score for the test set 
np.mean(cv_knn_baseline['test_rmse'])

0.9156927929350115

### Model 4

Our final model will look at the KNN Wtih Means algorithm, and apply a GridSearch similar to the KNN models above to tune our hyperparameters further.

In [52]:
#Apply GridSearch to the KNNWithMeans model 
gsknnWM = GridSearchCV(KNNWithMeans, knn_params, measures=['rmse', 'mae'], cv=3)
gsknnWM.fit(data)

Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computi

In [53]:
#Display the best score and best parameters from GridSearch 
print(gsknnWM.best_score)
print(gsknnWM.best_params)

{'rmse': 0.982665083352496, 'mae': 0.7908704969618056}
{'rmse': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}, 'mae': {'name': 'cosine', 'user_based': True, 'min_support': True, 'min_k': 1}}


In [54]:
#Reinstansiate the model with the best parameters 
knnwm_tuned = KNNWithMeans(sim_options={'name': 'cosine', 
                                       'user_based': True, 
                                       'min_support':True, 
                                       'min_k':1, })

In [55]:
#Fit on the trainset, predict on the testset 
knnwm_tuned.fit(trainset)
knnwmpreds = knnwm_tuned.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [56]:
#Print RMSE and MAE results
accuracy.rmse(knnwmpreds)
accuracy.mae(knnwmpreds)

RMSE: 0.9765
MAE:  0.7872


0.7872217399691357

In [57]:
#Perform 3-Fold cross validation on KNNWithMeans model 
cv_knn_wm = cross_validate(knnwm_tuned, data, measures=['RMSE', 'MAE'], cv=3, verbose=True)

Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Computing the cosine similarity matrix...
Done computing similarity matrix.
Evaluating RMSE, MAE of algorithm KNNWithMeans on 3 split(s).

                  Fold 1  Fold 2  Fold 3  Mean    Std     
RMSE (testset)    0.9738  0.9790  0.9942  0.9823  0.0086  
MAE (testset)     0.7857  0.7906  0.7934  0.7899  0.0032  
Fit time          0.06    0.04    0.03    0.04    0.01    
Test time         0.01    0.02    0.01    0.02    0.00    


In [58]:
#Print the average RMSE score for the test set 
np.mean(cv_knn_wm['test_rmse'])

0.9823400128845062

## All Results

In [59]:
#Create a dictionary for each models' results 
baselineresult = {'model': 'baseline','RMSE': accuracy.rmse(baselinepreds), 'MAE': accuracy.mae(baselinepreds), 'CV': np.mean(cv_baseline['test_rmse'])}
svdresult = {'model':'svd', 'RMSE': accuracy.rmse(svdpreds), 'MAE': accuracy.mae(svdpreds), 'CV': np.mean(cv_svd_tuned['test_rmse'])}
knnbasicresult = {'model':'knnbasic','RMSE': accuracy.rmse(knnbpreds), 'MAE': accuracy.mae(knnbpreds), 'CV': np.mean(cv_knn_basic['test_rmse'])}
knnbaselineresult = {'model':'knnbaseline','RMSE': accuracy.rmse(knnbaselinepreds), 'MAE': accuracy.mae(knnbaselinepreds), 'CV': np.mean(cv_knn_baseline['test_rmse'])}
knnwmresult = {'model':'knnwm','RMSE': accuracy.rmse(knnwmpreds), 'MAE': accuracy.mae(knnwmpreds), 'CV': np.mean(cv_knn_wm['test_rmse'])}

RMSE: 0.8888
MAE:  0.7036
RMSE: 0.8398
MAE:  0.6526
RMSE: 0.9765
MAE:  0.7872
RMSE: 0.9075
MAE:  0.7206
RMSE: 0.9765
MAE:  0.7872


In [60]:
#Combine all the results into a list 
result_list = [baselineresult, svdresult, knnbasicresult, knnbaselineresult, knnwmresult]

In [61]:
#Transform the results lists into a DataFrame 
df_results_updated = pd.DataFrame.from_dict(result_list, orient='columns')
df_results_updated = df_results_updated.set_index('model')

In [62]:
#Display the results for all of the models 
df_results_updated

Unnamed: 0_level_0,RMSE,MAE,CV
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
baseline,0.888788,0.703616,0.897237
svd,0.839809,0.652579,0.867976
knnbasic,0.976495,0.787222,0.983007
knnbaseline,0.90753,0.72055,0.915693
knnwm,0.976495,0.787222,0.98234


### Generating New Ratings

In [63]:
#Define function that can generate new user movie ratings 
def movie_rater(movie_df,num, genre=None):
    #Create new user with userId = 1000
    userID = 1000
    
    #Create an empty list of ratings 
    rating_list = []
    
    #For all number of ratings, provide a random movie sample within the specified genre for the user to rate 
    while num > 0:
        if genre:
            movie = movie_df[movie_df['genres'].str.contains(genre)].sample(1)
        else:
            movie = movie_df.sample(1)
        print(movie)
    
    #Provide user with a prompt to rate the movie, then print the userID, movieID, then title, then append 
    #results to the rating_list 
        rating = input('How do you rate this movie on a scale of 1-5, press n if you have not seen :\n')
        if rating == 'n':
            continue
        else:
            rating_one_movie = {'userId':userID,'movieId':movie['movieId'].values[0],'title':movie['title'].values[0], 'rating':rating}
            rating_list.append(rating_one_movie) 
            num -= 1
    return rating_list

In [110]:
#Define function that can generate new user movie ratings 
def song_rater(song_df,num, genre=None):
    #Create new artist with artist_id = 1000
    artistid = 1000
    
    #Create an empty list of ratings 
    rating_list = []
    
    #For all number of ratings, provide a random movie sample within the specified genre for the user to rate 
    while num > 0:
        if genre:
            song = song_df[song_df['genrecategory'].str.contains(genre)].sample(1)
        else:
            song = song_df.sample(1)
        print(song)
    
    #Provide user with a prompt to rate the movie, then print the userID, movieID, then title, then append 
    #results to the rating_list 
        rating = input('How do you rate this song on a scale of 1-5, press n if you have not listened to it :\n')
        if rating == 'n':
            continue
        else:
            rating_one_song = {'artist_id':artistid, 'track_id':song['track_id'].values[0], 
                               'ratings':rating,'track_name':song['track_name'],
                                'artist':song['artist'].values[0], 
                               'genrecategory':song['genrecategory'].values[0]}
            rating_list.append(rating_one_song) 
            num -= 1
    return rating_list

In [99]:
dfnew = df[['artist_id', 'track_id', 'ratings', 'track_name', 'artist','genrecategory']]
dfnew.head()

Unnamed: 0,artist_id,track_id,ratings,track_name,artist,genrecategory
0,6jJ0s89eD6GaHleKKya26X,4lCv7b86sLynZbXhfScfm2,4,Firework,Katy Perry,dance
1,6jJ0s89eD6GaHleKKya26X,6tS3XVuOyu10897O3ae7bi,4,California Gurls,Katy Perry,dance
2,6jJ0s89eD6GaHleKKya26X,455AfCsOhhLPRc68sE01D8,4,Last Friday Night (T.G.I.F.),Katy Perry,dance
3,6jJ0s89eD6GaHleKKya26X,14iN3o8ptQ8cFVZTEmyQRV,4,I Kissed A Girl,Katy Perry,dance
4,6jJ0s89eD6GaHleKKya26X,1nZzRJbFvCEct3uzu04ZoL,4,Part Of Me,Katy Perry,dance


In [113]:
artistrating = song_rater(dfnew, 3, 'dance')

                   artist_id                track_id  ratings  \
2200  6c4sUNBgdonFJz8Kx2VsGz  7eXrGBrl1mBKhxlWX0IoOQ        3   

                     track_name  artist genrecategory  
2200  Beggin - Original Version  Madcon         dance  
How do you rate this song on a scale of 1-5, press n if you have not listened to it :
3
                   artist_id                track_id  ratings track_name  \
4182  0Qnx1MPnHYt3jJCYrRFVwX  4r7raHoBR2YR8F6qJ1dpIO        2  Hit & Run   

              artist genrecategory  
4182  Greyson Chance         dance  
How do you rate this song on a scale of 1-5, press n if you have not listened to it :
4
                   artist_id                track_id  ratings     track_name  \
5214  2DuJi13MWHjRHrqRUwk8vH  3ZbtdyrzuKg0xv6pqzXoD5        2  Battle Sirens   

           artist genrecategory  
5214  Knife Party         dance  
How do you rate this song on a scale of 1-5, press n if you have not listened to it :
5


In [114]:
artistrating

[{'artist_id': 1000,
  'track_id': '7eXrGBrl1mBKhxlWX0IoOQ',
  'ratings': '3',
  'track_name': 2200    Beggin - Original Version
  Name: track_name, dtype: object,
  'artist': 'Madcon',
  'genrecategory': 'dance'},
 {'artist_id': 1000,
  'track_id': '4r7raHoBR2YR8F6qJ1dpIO',
  'ratings': '4',
  'track_name': 4182    Hit & Run
  Name: track_name, dtype: object,
  'artist': 'Greyson Chance',
  'genrecategory': 'dance'},
 {'artist_id': 1000,
  'track_id': '3ZbtdyrzuKg0xv6pqzXoD5',
  'ratings': '5',
  'track_name': 5214    Battle Sirens
  Name: track_name, dtype: object,
  'artist': 'Knife Party',
  'genrecategory': 'dance'}]

In [None]:
##Continue from here by looking at the rec system project 4 
## Also check the lab from this section because i didn't actually get the predictions previously; 
#shoudl aim to add this here 