# Hybrid Recommender
This part I'll be combining collaborative filtering with content based filtering to generate the recommendations. First we'll get similar users with Alternating Least Sqaure method then we'll get similar user's liked songs and calculate the most similar items with current user's items. 

In [32]:
import spotipy
import spotipy.util as util
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import numpy as np
import os
import requests
import glob
from scipy.sparse import csr_matrix,dok_matrix
from math import ceil
from tqdm import trange
from sklearn.metrics import roc_auc_score
from scipy.sparse import csr_matrix
import pickle
from sklearn import preprocessing
import warnings
from sklearn.metrics.pairwise import cosine_similarity
import implicit
warnings.filterwarnings('ignore')

In [33]:
user_id = '224kzdpjenqu4hu34ngs45noy' # the user we want to recommend songs for
filetype = 'recentTracks'             # based on user's recent tracks
user_dataset = pd.read_csv('datasets/'+user_id+"_"+filetype+".csv",index_col=[0])
user_dataset.drop_duplicates(inplace=True)
user_dataset.reset_index(inplace=True)

In [34]:
# get other users' dataset

def create_file(filetype):
    path = 'datasets/' 
    #user_files = glob.glob(path + "*_topTracks.csv")
    user_files = glob.glob(path + "*_"+ filetype+".csv")


    li_file = []

    for files in user_files:
        df = pd.read_csv(files, index_col=[0], header=0)
        li_file.append(df)

    frame = pd.concat(li_file, axis=0, ignore_index=True)

    return frame

top = create_file('topTracks')
saved = create_file('savedTracks')
others_dataset = pd.concat([top,saved],axis=0) # only take the top and saved tracks as we are sure those are the ones they like
others_dataset=others_dataset[others_dataset['user_id']!=user_id]
#others_dataset.drop(columns=['Unnamed: 0'],inplace=True)
others_dataset.drop_duplicates(inplace=True)
others_dataset.reset_index(inplace=True)

## Model based Collaborative Filtering - ALS

In [35]:
all_dataset = pd.concat([user_dataset,others_dataset])
df_als = all_dataset.copy()
df_als['listened'] = 1

# convert user_id and track_id to numeric categories
df_als['userid'] = df_als['user_id'].astype('category').cat.codes
df_als['itemid'] = df_als['track_id'].astype('category').cat.codes


# create user-item and item-user matrix
user_items = csr_matrix((df_als['listened'].astype(int),(df_als['userid'], df_als['itemid'])))
item_users = csr_matrix((df_als['listened'].astype(int),(df_als['itemid'], df_als['userid'])))

In [36]:
#Building the als model
als = implicit.als.AlternatingLeastSquares(factors=13, regularization=0.1, iterations=100,calculate_training_loss =True)
alpha_val = 40
data_conf = (item_users * alpha_val).astype('double')
als.fit(data_conf)

HBox(children=(HTML(value=''), FloatProgress(value=0.0), HTML(value='')))




In [37]:
# get the current user's category code
df_user = df_als[df_als['user_id']==user_id]
user_id = df_user.userid.values[0]

#Get top 15 recommendations based on ALS
recommended = als.recommend(user_id, user_items,filter_already_liked_items=True,N=user_items.shape[1])[:15]
print(recommended)

[(117, 0.000120755285), (81, 8.398108e-05), (137, 8.052122e-05), (91, 7.906556e-05), (122, 7.5146556e-05), (112, 7.356517e-05), (30, 6.902125e-05), (2, 5.3450465e-05), (66, 5.2511692e-05), (83, 5.00381e-05), (136, 4.476309e-05), (169, 4.2006373e-05), (20, 3.579259e-05), (57, 3.141351e-05), (107, 2.706796e-05)]


In [38]:
# get the track ids of recommeded songs
list_als = []
for i,j in enumerate(recommended):
    
    a = df_als.loc[df_als['itemid']==j[0]].track_id
    list_als.append(a.values[0])
list_als

['4jSE5cAaa5rwTyhDSXfwQN',
 '2yyluSgySg95O24RBkgrMR',
 '5tqNWWiySiABLrGAvSWsO5',
 '3Wu6MhnZuLuzfH5wFC2g9i',
 '4yJIECgIOgowfgAkPyvo2c',
 '4VPZX5qq0mujdIrVhLFROu',
 '0ri0Han4IRJhzvERHOZTMr',
 '00sPD3dShAg2v5BFDwgjlO',
 '2KNB0oKjKoITfAJXLCS8U6',
 '3ANpoLKfd3MLZOipWyoWA4',
 '5tFDgH6do20IXdSoNBAmXK',
 '72PFP54TZ9Tpj9dYQcb46D',
 '0ceeRNqbOzRiGE42KWSrRa',
 '24zaoEGmRjgA0zt47uLgRF',
 '4Qf9ljqJtUJFH9l2LwKzo7']

In [39]:
# get the top 2 users similar to current user 

similar_users = als.similar_users(user_id,3)
similar_userlist = []

for a in similar_users:
    user_cat  =a[0]
    username = df_als[df_als['userid']==user_cat]
    username1 = username.user_id.values[0]
    similar_userlist.append(username1)

## Content based -  Calculate the cosine similarity between user's items and similar user's item

In [40]:
# create another df storing only the songs of the 2 most similar users

temp = all_dataset[all_dataset['user_id']==similar_userlist[1]]
temp2 =  all_dataset[all_dataset['user_id']==similar_userlist[2]]

similaruser_dataset = pd.concat([temp,temp2],axis=0,ignore_index=True)
similaruser_dataset

Unnamed: 0,index,track_id,name,album,artist,release_date,song_length,popularity,acousticness,danceability,...,key,liveness,loudness,speechiness,tempo,time_signature,valence,mode,user_id,artist_id
0,142,3iH29NcCxYgI5shlkZrUoB,gone girl,Industry Plant,iann dior,2019-11-08,136568,79,0.1620,0.677,...,11,0.0717,-5.637,0.0287,94.956,4,0.3550,1,ci4mnwr8uu4piya6makrn4a7p,
1,143,3tjFYV6RSFtuktYl3ZtYcq,Mood (feat. iann dior),Mood (feat. iann dior),24kGoldn,2020-07-24,140525,99,0.2210,0.700,...,7,0.2720,-3.558,0.0369,90.989,4,0.7560,0,ci4mnwr8uu4piya6makrn4a7p,
2,144,72PFP54TZ9Tpj9dYQcb46D,NIGHTRIDER,NIGHTRIDER,Arizona Zervas,2020-07-24,156367,71,0.0876,0.660,...,0,0.1020,-4.501,0.0469,145.948,4,0.8620,0,ci4mnwr8uu4piya6makrn4a7p,
3,145,7ytR5pFWmSjzHJIeQkgog4,ROCKSTAR (feat. Roddy Ricch),BLAME IT ON BABY,DaBaby,2020-04-17,181733,93,0.2470,0.746,...,11,0.1010,-7.956,0.1640,89.977,4,0.4970,1,ci4mnwr8uu4piya6makrn4a7p,
4,146,0DILU9jjSTnCv1dcbUH3ib,Sick and Tired (ft. Machine Gun Kelly and Trav...,I'm Gone,iann dior,2020-06-12,144000,76,0.1510,0.748,...,11,0.1460,-8.584,0.0498,145.097,4,0.4830,1,ci4mnwr8uu4piya6makrn4a7p,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
158,377,6RuFOroO9VO0aMGEzirLHk,Med slutna ögon,Med slutna ögon,Najma Wallin,2020-02-21,133500,69,0.9740,0.175,...,7,0.1130,-35.072,0.0454,70.872,4,0.0838,1,lt47n0oprtpu0tjswd5s0rx6y,
159,378,6XQHlsNu6so4PdglFkJQRJ,"City Of Stars - From ""La La Land"" Soundtrack",La La Land (Original Motion Picture Soundtrack),Various Artists,2016-12-09,149706,65,0.8990,0.492,...,2,0.0889,-15.150,0.0379,111.787,4,0.4340,0,lt47n0oprtpu0tjswd5s0rx6y,
160,379,6XTpwvfm4jwnhhdGhClVRh,Affection,Affection,Fiji Blue,2020-10-22,174211,53,0.3210,0.827,...,10,0.0950,-8.370,0.0550,113.002,4,0.8360,0,lt47n0oprtpu0tjswd5s0rx6y,
161,380,7kmTnWH0Q4I2xblpOdEQjR,want u back - spring,cover girl (autumn),joan,2020-11-12,210800,41,0.4020,0.705,...,7,0.1290,-8.868,0.0516,119.964,4,0.1160,0,lt47n0oprtpu0tjswd5s0rx6y,


In [41]:
# normalize the features

min_max_scaler = preprocessing.MinMaxScaler()

# features of user's dataset
user_features = user_dataset.drop(columns=['track_id','name','artist','album','release_date','user_id'])
user_features = pd.DataFrame(min_max_scaler.fit_transform(user_features))

# features of similar user's dataset
others_features = similaruser_dataset.drop(columns=['track_id','name','artist','album','release_date','user_id','artist_id'])
others_features = pd.DataFrame(min_max_scaler.fit_transform(others_features))

In [42]:
# get the most similar songs based on cosine similarity

cosine_similarities_count = cosine_similarity(user_features,others_features)

top={}
cosine_score = {}
for i,j in user_features.iterrows():
    similar_indices = cosine_similarities_count[i].argsort()[:-cosine_similarities_count.shape[1]:-1]
    top[i]=similar_indices
    cosine_score[i] = cosine_similarities_count[i]

print("Based on your recent tracks:\n")

for i in top:
    most_similar = top[i][0]
    print('Recommendation for {0} : {1}'.format(user_dataset.iloc[i]['name'],similaruser_dataset.iloc[most_similar]['name']))

Based on your recent tracks:

Recommendation for ‘tis the damn season : Coke & Mentos
Recommendation for gold rush : Summer Nights
Recommendation for champagne problems : 我願意 - 管弦樂版
Recommendation for willow : What You Know Bout Love
Recommendation for no body, no crime (feat. HAIM) : He Wasn't Man Enough
Recommendation for coney island (feat. The National) : That Girl
Recommendation for dorothea : Hate Me!
Recommendation for happiness : free love - dream edit
Recommendation for willow : What You Know Bout Love
Recommendation for cardigan : hot girl bummer
Recommendation for cowboy like me : All Around the World - Remastered
Recommendation for Christmas Tree Farm : Me Vs. The World
Recommendation for long story short : Something Good - Sarm Acoustic Version
Recommendation for cowboy like me : When the Going Gets Tough, The Tough Get Going
Recommendation for dorothea : Plot Twist
Recommendation for champagne problems : Matter To You
Recommendation for invisible string : Honesty


In [43]:
# get the most similar songs id

cosine_similarities_count = cosine_similarity(user_features,others_features)

top={}
cosine_score = {}
similar_list = []
for i,j in user_features.iterrows():
    similar_indices = cosine_similarities_count[i].argsort()[:-cosine_similarities_count.shape[1]:-1]
    top[i]=similar_indices
    cosine_score[i] = cosine_similarities_count[i]

for i in top:
    most_similar = top[i][0]
    similar_list.append(similaruser_dataset.iloc[most_similar]['track_id'])
similar_list

['0XtJdLv5FZN09C2mTixh6S',
 '001GvRtc0EJt8fvzjymQbF',
 '0tlDH6UX4Bq9kG5rPWaiaF',
 '1tkg4EHVoqnhR6iFEXb60y',
 '7f1Dmr246cJ9uQYdbplTbh',
 '08fhbfRSg7eEH0IZT7pB6a',
 '6jmf4OxSGzdgthZruXtcqu',
 '3HAsf0o0TJY9WL4zKCzE3u',
 '1tkg4EHVoqnhR6iFEXb60y',
 '2mt1IqcFyY1zmYZT8Q3xw9',
 '1DapqOfkDAQCiKnvfwKo0g',
 '3CXMsoCv6gYlcRLBz7WkNO',
 '2XYR6mCxzGloPzMs8tB0nv',
 '5UU5FbITNm5OunvHQdsKME',
 '5wDbCy1UXyCV79jpTm4xyF',
 '1InPL1Qm8qJC3FIIvFruNt',
 '487kVkJYUzaFcpKciBcTQP']