In [1]:
import numpy as np
import pandas as pd
import surprise
import matplotlib.pyplot as plt
from math import sqrt
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing
from surprise import KNNWithMeans
from surprise import accuracy
from surprise.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import pairwise_distances
from sklearn.neighbors import NearestNeighbors
from project_example import *

# Approach 1 

# News Article Recommendation using Collaborative Filtering 

In [2]:
data = load_data("active1000")
data.head()

Unnamed: 0,eventId,category,activeTime,title,url,userId,publishtime,time,documentId
0,2059407368,,9.0,,http://adressa.no,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,,1488063609,
1,682091708,,,,http://adressa.no,cx:16lfhhoa0byyq3c06phiivxcas:qf0oz0oiyr1g,,1488063624,
2,2064579395,nyheter|okonomi,,- Landet bygges ikke bare mellom klokken 8 og 16,http://adressa.no/nyheter/okonomi/2017/02/25/l...,cx:13512028457711940833770:1cg93fqzhke70,2017-02-25T16:47:38.000Z,1488063624,cbfe5a35b79b2362796607c1e381a5f835394aa4
3,709984968,,32.0,,http://adressa.no,cx:htkipe5w759w5vw9:2souxtezokfus,,1488063625,
4,114179448,nyheter|trondheim,48.0,To innbruddsforsøk på tre dager,http://adressa.no/nyheter/trondheim/2017/02/25...,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,2017-02-25T20:29:21.000Z,1488063629,b3aa4b3a8d5f22ea512f67fd2b9ce0a572de0323


### Identifying all the unique documents 

In [3]:
docuTitle = data[['documentId', 'title', 'time']].copy()
docuTitle = docuTitle[~docuTitle['documentId'].isnull()]
docuTitle.drop_duplicates(subset=['documentId'], inplace=True)
print(docuTitle.shape)

(20344, 3)


In [4]:
docuTitle.insert(0, 'DocuID', range(0, 20344))

In [5]:
docuTitle.head()

Unnamed: 0,DocuID,documentId,title,time
2,0,cbfe5a35b79b2362796607c1e381a5f835394aa4,- Landet bygges ikke bare mellom klokken 8 og 16,1488063624
4,1,b3aa4b3a8d5f22ea512f67fd2b9ce0a572de0323,To innbruddsforsøk på tre dager,1488063629
8,2,da0bc63984b8948df5f0b70b167b1f1c2b58e5a2,Gull-Marit: – Comebacket umulig uten Fred Børre,1488063659
9,3,411a1eaed3f10b453dd8d963fe9bf937d8a82b65,Så ofte bør du vaske tak og vegger i huset,1488063660
15,4,cdfcf41324251f750be0db169cb5ae1b739a15bf,Mulig å bygge superbusstasjoner til en fjerded...,1488063683


### Identifying all the unique users 

In [6]:
users = data[['userId']].copy()
users = users[~users['userId'].isnull()]
users.drop_duplicates(subset=['userId'], inplace=True)
print(users.shape)

(1000, 1)


In [7]:
users.insert(0, 'UserID', range(0, 1000))

In [8]:
users.head()

Unnamed: 0,UserID,userId
0,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y
1,1,cx:16lfhhoa0byyq3c06phiivxcas:qf0oz0oiyr1g
2,2,cx:13512028457711940833770:1cg93fqzhke70
3,3,cx:htkipe5w759w5vw9:2souxtezokfus
5,4,cx:ij8oq9zo22617jtj:358ocuqlq8g26


### Identifying all the informative events 

In [9]:
event_logs = data[['userId', 'documentId', 'activeTime']].copy()

In [10]:
event_logs.shape

(2207608, 3)

In [11]:
event_logs = event_logs[~event_logs['activeTime'].isnull()]

In [12]:
event_logs.shape

(1351878, 3)

In [13]:
event_logs.head()

Unnamed: 0,userId,documentId,activeTime
0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,,9.0
3,cx:htkipe5w759w5vw9:2souxtezokfus,,32.0
4,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,b3aa4b3a8d5f22ea512f67fd2b9ce0a572de0323,48.0
7,cx:1mlm8olu6w8ayu4zlqnq777x:rsedmg95nj2q,,12.0
8,cx:htkipe5w759w5vw9:2souxtezokfus,da0bc63984b8948df5f0b70b167b1f1c2b58e5a2,168.0


In [14]:
event_logs = event_logs[~event_logs['documentId'].isnull()]

In [15]:
event_logs.shape

(409279, 3)

### Normalising/mapping the Active time to a value in range 0 to 1 from the actual spread of 0 to 899.

In [16]:
column_names_to_normalize = ['activeTime']
x = event_logs[column_names_to_normalize].values
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df_temp = pd.DataFrame(x_scaled, columns=column_names_to_normalize, index = event_logs.index)
event_logs[column_names_to_normalize] = df_temp

In [17]:
event_logs.head()

Unnamed: 0,userId,documentId,activeTime
4,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,b3aa4b3a8d5f22ea512f67fd2b9ce0a572de0323,0.052339
8,cx:htkipe5w759w5vw9:2souxtezokfus,da0bc63984b8948df5f0b70b167b1f1c2b58e5a2,0.185969
9,cx:1mlm8olu6w8ayu4zlqnq777x:rsedmg95nj2q,411a1eaed3f10b453dd8d963fe9bf937d8a82b65,0.082405
15,cx:htq14rid67bygskq:1cmzr15o51hq0,cdfcf41324251f750be0db169cb5ae1b739a15bf,0.025612
22,cx:3fjnxmsh9p4koj9lhzmb811gt:1qja31cvau6xt,cdfcf41324251f750be0db169cb5ae1b739a15bf,0.129176


#### Adding Document titles 

In [18]:
collabDocu = pd.merge(docuTitle, event_logs, on='documentId')

In [19]:
collabDocu.shape

(409279, 6)

#### Adding new user IDs 

In [20]:
collabUserDocu = pd.merge(users, collabDocu, on='userId')

In [21]:
collabUserDocu.shape

(409279, 7)

In [22]:
collabUserDocu.head()

Unnamed: 0,UserID,userId,DocuID,documentId,title,time,activeTime
0,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,1,b3aa4b3a8d5f22ea512f67fd2b9ce0a572de0323,To innbruddsforsøk på tre dager,1488063629,0.052339
1,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,2,da0bc63984b8948df5f0b70b167b1f1c2b58e5a2,Gull-Marit: – Comebacket umulig uten Fred Børre,1488063659,0.08686
2,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,4,cdfcf41324251f750be0db169cb5ae1b739a15bf,Mulig å bygge superbusstasjoner til en fjerded...,1488063683,0.121381
3,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,4,cdfcf41324251f750be0db169cb5ae1b739a15bf,Mulig å bygge superbusstasjoner til en fjerded...,1488063683,0.07461
4,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,6,840af31130cac69eb1effeb60eedcc022f5c9adc,Langrennssjef Tor-Arne Hetland tok ikke hintet...,1488063706,0.488864


In [23]:
missing_pivot = collabUserDocu.pivot_table(values = 'activeTime', index = 'UserID', columns = 'title')

## User-Document Utility Matrix

In [24]:
missing_pivot.head()

title,"""Magic in Trondheim""",#ælsketrøndelag,&laquo;Fræm fra hæsjan!&raquo;,&laquo;Vår eneste hensikt er hyggelig samvær&raquo;,&ndash; Vi er luksusinnvandrere,&ndash; Østbyen mangler bydelspub,- 10 000 brannfarlige radioer i norske hjem,- 160-200 ansatte mister jobben sin ved Britannia,- 22. juli har blitt et tabu i Ap,- 36 000 studenter som skal finne kjæreste merkes i restauranten,...,– Vi har aldri vært i nærheten av så gode som i dag,"– Vi har ulike roller. Noen skal selge aviser, jeg skal lede norsk skisport.",– Vi investerer i fremtidig vekst,– Vi lurte på om Bendtner ville dra tilbake til Nottingham,– Vi sto på gaten utenfor sykehuset og ante ikke hvordan vi skulle gi guttene beskjeden om at pappaen kom til å dø,– Vil være den beste 3. divisjonsspilleren i landet om han signerer,– Vårt sosiale engasjement kan ikke vurderes ut ifra medieoppslag,– Ønsker definitivt å hente en midtstopper før vi drar til Gran Canaria,–Trump kommer med helsereform-advarsler,”Helikopterskandale” gir mer fregattrøbbel
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,0.045657,,,,...,,0.070156,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,0.266147,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


## Identifying Documents each user has read.

In [25]:
read = {}
rows_indexes = {}
for i,row in missing_pivot.iterrows():
    rows = [x for x in range(0,len(missing_pivot.columns))]
    combine = list(zip(row.index, row.values, rows))
    readd = [(x,z) for x,y,z in combine if str(y) != 'nan']
    index = [i[1] for i in readd]
    row_names = [i[0] for i in readd]
    rows_indexes[i] = index
    read[i] = row_names

In [26]:
read[38]

['- Jeg havnet på sykehus etter råd fra personlig trener',
 '- Jeg må si jeg beundrer de damene som er gift med sånne som meg.',
 '- Sønnen er svært preget',
 '72-åring dømt til betinget fengsel etter dødsulykke',
 'Alvorlig trafikkulykke i Grillstadtunnelen',
 'Ansatte i Reinertsen flytter til Leangen',
 'Apple ruller ut Ios 10.3 til alle enheter – dette er nytt',
 'Astrid Uhrenholdt Jacobsen: – Jeg vil ikke bli savnet når jeg slutter',
 'Bil heist ned fra bergknaus etter utforkjøring',
 'Bjørgen ble glidd fra og røk ut. NRK-profil har aldri sett på maken.',
 'Bjørgen: – Det begynner å nærme seg slutten',
 'Borettslaget kastet rattkjelken til Simon (6) på dynga',
 'Bronsevinneren var så nervøs at han trente to økter før løpet',
 'Brå om Bjørgen: - Det var kanskje mer ille enn man trodde',
 'Byåsen-profil må opereres på grunn av hjertetrøbbel',
 'City Lade ble evakuert',
 'Da langrennsjentene kritiserte forbundet, skjedde det noe med kvinnesatsingen',
 'De skulle lage gullfest på hjemm

In [27]:
pivot_table = collabUserDocu.pivot_table(values = 'activeTime', index = 'UserID', columns = 'title').fillna(0)

In [28]:
pivot_table = pivot_table.apply(np.sign)

## User-Document Sparse Matrix

In [29]:
pivot_table.head()

title,"""Magic in Trondheim""",#ælsketrøndelag,&laquo;Fræm fra hæsjan!&raquo;,&laquo;Vår eneste hensikt er hyggelig samvær&raquo;,&ndash; Vi er luksusinnvandrere,&ndash; Østbyen mangler bydelspub,- 10 000 brannfarlige radioer i norske hjem,- 160-200 ansatte mister jobben sin ved Britannia,- 22. juli har blitt et tabu i Ap,- 36 000 studenter som skal finne kjæreste merkes i restauranten,...,– Vi har aldri vært i nærheten av så gode som i dag,"– Vi har ulike roller. Noen skal selge aviser, jeg skal lede norsk skisport.",– Vi investerer i fremtidig vekst,– Vi lurte på om Bendtner ville dra tilbake til Nottingham,– Vi sto på gaten utenfor sykehuset og ante ikke hvordan vi skulle gi guttene beskjeden om at pappaen kom til å dø,– Vil være den beste 3. divisjonsspilleren i landet om han signerer,– Vårt sosiale engasjement kan ikke vurderes ut ifra medieoppslag,– Ønsker definitivt å hente en midtstopper før vi drar til Gran Canaria,–Trump kommer med helsereform-advarsler,”Helikopterskandale” gir mer fregattrøbbel
UserID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [30]:
notread = {}
notread_indexes = {}
for i,row in pivot_table.iterrows():
    rows = [x for x in range(0,len(missing_pivot.columns))]
    combine = list(zip(row.index, row.values, row))
    idx_row = [(idx,col) for idx, val, col in combine if not val > 0]
    indices = [i[1] for i in idx_row]
    row_names = [i[0] for i in idx_row]
    notread_indexes[i] = indices
    notread[i] = row_names

In [31]:
notread[38]

['"Magic in Trondheim"',
 '#ælsketrøndelag',
 '&laquo;Fræm fra hæsjan!&raquo;',
 '&laquo;Vår eneste hensikt er hyggelig samvær&raquo;',
 '&ndash; Vi er luksusinnvandrere',
 '&ndash; Østbyen mangler bydelspub',
 '- 10 000 brannfarlige radioer i norske hjem',
 '- 160-200 ansatte mister jobben sin ved Britannia',
 '- 22. juli har blitt et tabu i Ap',
 '- 36 000 studenter som skal finne kjæreste merkes i restauranten',
 '- 4,2 enheter alkohol i uka? De bløffer!',
 '- AUF vil ikke ha en mur rundt Midtbyen',
 '- Adressa er del av trønderidentiteten',
 '- Adressa er en verdig konkurrent',
 '- Afghanske politikere er bare på TV',
 '- Agatha Christie var min første kjærlighet',
 '- Akkurat nå er det en hemsko å bo i London og ikke i Trondheim',
 '- Aldri mer samarbeid med Frp',
 '- Aldri sett noe liknende',
 '- Aldri tatt en prostituert med så mye penger',
 '- Aldri, aldri, aldri ta med rumpetaske',
 '- Alle bussene kan ikke gå innom Midtbyen',
 '- Alle fakta må på bordet',
 '- Alle forskere er

## Unsupervised Nearest Neighbor Recommender

In [32]:
n = 5
cosine_knn = NearestNeighbors(n_neighbors = n, algorithm = 'brute', metric = 'cosine')
docu_cosine_knn_fit = cosine_knn.fit(pivot_table.T.values)
docu_distances, docu_indices = docu_cosine_knn_fit.kneighbors(pivot_table.T.values)

In [33]:
docus_dic = {}
for i in range(len(pivot_table.T.index)):
    docu_idx = docu_indices[i]
    col_names = pivot_table.T.index[docu_idx].tolist()
    docus_dic[pivot_table.T.index[i]] = col_names

In [34]:
topRecs = {}
for k,v in rows_indexes.items():
    docu_idx = [j for i in docu_indices[v] for j in i]
    docu_dist = [j for i in docu_distances[v] for j in i]
    combine = list(zip(docu_dist, docu_idx))
    diction = {i:d for d,i in combine if i not in v}
    zipped = list(zip(diction.keys(),diction.values()))
    sort = sorted(zipped, key = lambda x: x[1])
    recommendations = [(pivot_table.columns[i], d) for i,d in sort]
    topRecs[k] = recommendations

In [35]:
def getrecommendations(user, number_of_recs = 10):
    if user > len(pivot_table.index):
        print('New User, Out of range'.format(len(pivot_table.index)))
    else:
        print("We recommend you these documents \n")
    for k,v in topRecs.items():
        if user == k:
            for i in v[:number_of_recs]:
                print('{} with similarity: {:.4f}'.format(i[0], 1 - i[1]))

## These are the top recommendations.

In [36]:
getrecommendations(38)

We recommend you these documents 

Even Brandsdal til årsmøtet: - Jeg blir skikkelig provosert! with similarity: 0.5774
Islandsk stopper på Viking-besøk: - Jeg vet en god del om klubben allerede with similarity: 0.5774
TUIL fikk hemmelig støtte til Johnsgård-avtale with similarity: 0.5774
TILs nye millionspiss er på plass - forteller om grepet som snudde karrieren with similarity: 0.5774
RBK fikk Europa midt i fleisen: - Vi fikk sjokk with similarity: 0.5274
Strøk på den medisinske testen - RBK vurderer å hente Spalvis likevel with similarity: 0.5212
Pasienter fra medisinsk senter i Trondheim ringer til Aslak fra Finnsnes hver dag with similarity: 0.5164
Niklas Dyrhaug trodde ikke på mammas hemmelighet with similarity: 0.5149
Opptil 100 lastebiler passerer hver time with similarity: 0.4987
Rettssaken etter dødsbrannen på Øysand ble stoppet. with similarity: 0.4968


## Evaluation using RMSE

In [37]:
docu_distances = 1 - docu_distances

In [38]:
predictions = docu_distances.T.dot(pivot_table.T.values) / np.array([np.abs(docu_distances.T).sum(axis = 1)]).T

In [39]:
ground_truth = pivot_table.T.values[docu_distances.argsort()[0]]

In [40]:
def rmse(prediction, ground_truth):
    prediction = prediction[ground_truth.nonzero()].flatten()
    ground_truth = ground_truth[ground_truth.nonzero()].flatten()
    return sqrt(mean_squared_error(prediction, ground_truth))

### RMSE

In [41]:
error_rate = rmse(predictions,ground_truth)
print(error_rate)

0.9747803277795116


### Accuracy

In [42]:
print(100 - error_rate)

99.02521967222049


In [43]:
len(topRecs)

994

In [44]:
ColRec = pd.DataFrame(list(topRecs.items()),columns = ['Users','Recommendations'])

In [45]:
ColRec.head()

Unnamed: 0,Users,Recommendations
0,0,"[(I dag fyller mannen bak dette bildet 70, 0.2..."
1,1,[(Slik avsløres alle passordene dine i nettles...
2,2,[(Romanias president erklærer full politisk kr...
3,3,"[(Graffiti er blitt stuerent, 0.29289321881345..."
4,4,"[(4000 så premieren på Stiklestad-spelet, 0.42..."


# Approach 2
# User-Document-based recommender using Surprise

In [46]:
collabUserDocu.head()

Unnamed: 0,UserID,userId,DocuID,documentId,title,time,activeTime
0,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,1,b3aa4b3a8d5f22ea512f67fd2b9ce0a572de0323,To innbruddsforsøk på tre dager,1488063629,0.052339
1,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,2,da0bc63984b8948df5f0b70b167b1f1c2b58e5a2,Gull-Marit: – Comebacket umulig uten Fred Børre,1488063659,0.08686
2,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,4,cdfcf41324251f750be0db169cb5ae1b739a15bf,Mulig å bygge superbusstasjoner til en fjerded...,1488063683,0.121381
3,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,4,cdfcf41324251f750be0db169cb5ae1b739a15bf,Mulig å bygge superbusstasjoner til en fjerded...,1488063683,0.07461
4,0,cx:2ry8kk0pg92r51msqcyu96z5u6:3mhvo7hdj9g6y,6,840af31130cac69eb1effeb60eedcc022f5c9adc,Langrennssjef Tor-Arne Hetland tok ikke hintet...,1488063706,0.488864


In [47]:
activeTimes = collabUserDocu.drop(['userId', 'documentId', 'title', 'time'], axis = 1)

In [48]:
activeTimes.shape

(409279, 3)

In [49]:
activeTimes.head()

Unnamed: 0,UserID,DocuID,activeTime
0,0,1,0.052339
1,0,2,0.08686
2,0,4,0.121381
3,0,4,0.07461
4,0,6,0.488864


## Fitting the model

In [50]:
activeTimes.columns = ['uid', 'iid', 'rating']

In [51]:
activeTimes.head()

Unnamed: 0,uid,iid,rating
0,0,1,0.052339
1,0,2,0.08686
2,0,4,0.121381
3,0,4,0.07461
4,0,6,0.488864


In [52]:
lower_rating = activeTimes['rating'].min()
upper_rating = activeTimes['rating'].max()
print(lower_rating, upper_rating)

0.0 1.0


In [53]:
reader = surprise.Reader(rating_scale = (0.0,1.))
Modeldata = surprise.Dataset.load_from_df(activeTimes, reader)

In [54]:
alg = surprise.SVDpp()
output = alg.fit(Modeldata.build_full_trainset())

In [55]:
pred = alg.predict(uid='50', iid='52')
score = pred.est
print(score)

0.08291083786534474


## Making Recommendations

In [56]:
iids = activeTimes['iid'].unique()
iids50 = activeTimes.loc[activeTimes['uid'] == 50, 'iid']
iids_to_pred = np.setdiff1d(iids, iids50)

In [57]:
testset = [[50, iid, 1.] for iid in iids_to_pred]
predictions = alg.test(testset)
predictions[50]

Prediction(uid=50, iid=52, r_ui=1.0, est=0.11499262586814175, details={'was_impossible': False})

## Evaluating the model

In [58]:
pred_ratings = np.array([pred.est for pred in predictions])
i_max = pred_ratings.argmax()
iid = iids_to_pred[i_max]

In [59]:
print ('Top item for user 50 has iid {0} with predicted rating {1}'.format(iid, pred_ratings[i_max]))

Top item for user 50 has iid 10804 with predicted rating 0.38559399467316435


In [60]:
param_grid = {'lr_all' : [.001, .01], 'reg_all' : [.1, .5]}
gs = surprise.model_selection.GridSearchCV(surprise.SVDpp, param_grid, measures=['rmse', 'mae'], cv=3)
gs.fit(Modeldata)
print(gs.best_params['rmse'])

{'lr_all': 0.01, 'reg_all': 0.1}


In [61]:
alg = surprise.SVDpp(lr_all = .001)
output = surprise.model_selection.cross_validate(alg, Modeldata, verbose = True)

Evaluating RMSE, MAE of algorithm SVDpp on 5 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Mean    Std     
RMSE (testset)    0.0841  0.0841  0.0837  0.0836  0.0835  0.0838  0.0003  
MAE (testset)     0.0583  0.0586  0.0584  0.0582  0.0580  0.0583  0.0002  
Fit time          1931.95 1950.68 1935.88 1922.20 1933.61 1934.86 9.18    
Test time         37.79   39.79   38.16   37.95   39.02   38.54   0.75    
