In [None]:
!pip install scikit-surprise 
!pip install lightfm
from lightfm import LightFM
import pandas as pd
import numpy as np
import seaborn as sns
import random
import matplotlib.pyplot as plt
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from surprise import BaselineOnly
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise import accuracy
from surprise.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 245kB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp36-cp36m-linux_x86_64.whl size=1670933 sha256=e1dabcaac1d06397a3cfc112c5b03f13096f162e505c329d392d4b66039157d8
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise
Successfully installed scikit-surprise-1.1.1
Collecting lightfm
[?25l  Downloading https://files.pythonhosted.org/packages/5e/fe/8864d723daa8e5afc74080ce510c30f7ad52facf6a157d4b42dec83dfab4/lightfm-1.16.tar.gz (310kB)
[K     |██████████████████████████

In [None]:
path = "https://raw.githubusercontent.com/aztgmb/Recommender_System/master/new_data_merged(1).csv"
data = pd.read_csv(path)
data = data.drop(data.columns[0],axis=1)

In [None]:
ratings = data[['overall', 'userID', 'itemID']]

In [None]:
def get_interactions_matrix(df):
    sparse_data = sparse.csr_matrix((df['overall'], (df['userID'], df['itemID'])))
    return sparse_data
train, test = train_test_split(ratings, test_size=0.5, random_state=100)

In [None]:
train_sparse_data = get_interactions_matrix(train)
test_sparse_data = get_interactions_matrix(test)
# to make both sparse matrices the same size for compatability in lightFM
test_sparse_data_old=test_sparse_data
test_sparse_data = test_sparse_data[:126998,:]
print(train_sparse_data.shape)
print(test_sparse_data.shape)
print(test_sparse_data_old.shape)

(126998, 65969)
(126998, 65969)
(127006, 65969)


In [None]:
model = LightFM(loss='warp', random_state=100, learning_rate=0.01,learning_schedule='adagrad', no_components=30)
model = model.fit(train_sparse_data, epochs=50, num_threads=10, verbose=False)

In [None]:
from lightfm.evaluation import auc_score
from lightfm.evaluation import precision_at_k
from lightfm.evaluation import recall_at_k
train_auc = auc_score(model, train_sparse_data, num_threads=10).mean()
print('Collaborative filtering train AUC: %s' % train_auc)
test_auc = auc_score(model, test_sparse_data,  num_threads=10).mean()
print('Collaborative filtering test AUC: %s' % test_auc)
prec_at_k = precision_at_k(model, test_sparse_data,  num_threads=10,k=3).mean()
print('Collaborative filtering test precision at 3: %s' % prec_at_k)
recall_at_k = recall_at_k(model, test_sparse_data,  num_threads=10, k=3).mean()
print('Collaborative filtering test recall at 3: %s' % recall_at_k)

Collaborative filtering train AUC: 0.99989
Collaborative filtering test AUC: 0.95163643
Collaborative filtering test precision at 3: 0.10690122
Collaborative filtering test recall at 3: 0.22227353965904173


In [None]:
def get_recommendations(model, data, userIDs):
    users, items = data.shape
    for userID in userIDs:
        listened = data[userID].indices
        
        scores = model.predict(userID, np.arange(items))

        top_items = np.argsort(-scores)

        print("User %s" % userID)
        print("     Already listened:")
        
        for x in listened[:3]:
            print("        %s" % x)
        
        print("     Recommended:")
        
        for x in top_items[:3]:
            print("        %s" % x)

In [None]:
userIDs = [98150, 16758,117479]
get_recommendations(model, test_sparse_data , userIDs)

User 98150
     Already listened:
     Recommended:
        8589
        7057
        46357
User 16758
     Already listened:
        8823
     Recommended:
        8823
        58447
        2516
User 117479
     Already listened:
        5146
        9970
        11325
     Recommended:
        9970
        22164
        11345


In [None]:
from surprise.model_selection.validation import cross_validate
from surprise.model_selection.search import GridSearchCV
from surprise.prediction_algorithms import SVD
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import GridSearchCV
from surprise.model_selection import cross_validate
from surprise import accuracy

In [None]:
train1, test1 = train_test_split(ratings, test_size=0.2, random_state=100)

In [None]:
from surprise import SVD
from surprise.model_selection import GridSearchCV
reader = Reader(rating_scale=(1, 5))
data_train = Dataset.load_from_df(train1[['userID', 'itemID', 'overall']], reader)
data_test = Dataset.load_from_df(test1[['userID', 'itemID', 'overall']], reader)
trainset = data_train.build_full_trainset()
testset1 = data_test.build_full_trainset()
testset2 = testset1.build_anti_testset()
pgrid = {'n_factors':[50,100,150],'n_epochs':[30,50,100],  'lr_all':[0.01,0.05,0.1],'reg_all':[0.1,0.2, 0.5]}
grid = GridSearchCV(SVD, pgrid, measures=['rmse'], n_jobs=-1, cv=10)
grid.fit(data_train)
parameters = grid.best_params['rmse']
svdopt = SVD(n_factors=parameters['n_factors'], n_epochs=parameters['n_epochs'],lr_all=parameters['lr_all'], reg_all=parameters['reg_all'])

In [None]:
print(parameters)

{'n_factors': 150, 'n_epochs': 30, 'lr_all': 0.05, 'reg_all': 0.1}


In [None]:
svdeval = SVD(n_factors=100,n_epochs=30,lr_all=0.05,reg_all=0.1)
svdeval.fit(trainset)
pred = svdeval.test(testset2)
accuracy.rmse(pred)

RMSE: 0.4146


0.4146196024741164

In [None]:
from surprise import CoClustering
pgrid1 = {'n_cltr_u':[3,5,7],'n_cltr_i':[3,5,7], 'n_epochs':[30,50,100]}
grid1 = GridSearchCV(CoClustering, pgrid1, measures=['rmse'], n_jobs=-1, cv=10)
grid1.fit(data_train)
parameters1 = grid1.best_params['rmse']
coclusteropt = CoClustering(n_cltr_u=parameters1['n_cltr_u'], n_cltr_i=parameters1['n_cltr_i'],n_epochs=parameters['n_epochs'])

In [None]:
print(parameters1)

{'n_cltr_u': 7, 'n_cltr_i': 7, 'n_epochs': 30}


In [None]:
coclusteval = CoClustering(n_cltr_u=7,n_cltr_i=7,n_epochs=30)
coclusteval.fit(trainset)
pred1 = coclusteval.test(testset2)
accuracy.rmse(pred1)

RMSE: 0.7898


0.789849035975243