### Hybrid Recommendations with SURPRISE

In [1]:
import pandas as pd
from surprise import Reader, SVD, Dataset, NormalPredictor, KNNBasic
from surprise.model_selection import cross_validate

In [9]:
df = pd.read_csv('data/ratings.csv', index_col=0)
df.reset_index(inplace=True)
df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,39032b0f7a,1,3.0,1997-11-06 06:36:16
1,39032b0f7a,17,3.0,1997-11-06 05:26:18
2,39032b0f7a,21,3.0,2000-10-02 07:45:30
3,39032b0f7a,34,4.0,1997-11-06 05:45:08
4,39032b0f7a,62,4.5,2003-06-18 04:09:17


In [10]:
reader = Reader(line_format='item user rating')
data = Dataset.load_from_df(df[['movieId', 'userId', 'rating']], reader)
train = data.build_full_trainset()
test = train.build_testset()

In [11]:
# SVD Model

svd = SVD(random_state = 42)
svd.fit(train)
svd_preds = svd.test(test)

svd_preds[:5]

[Prediction(uid=1, iid='39032b0f7a', r_ui=3.0, est=3.9464241285042614, details={'was_impossible': False}),
 Prediction(uid=1, iid='adc34e8c1b', r_ui=3.0, est=3.5885030965016194, details={'was_impossible': False}),
 Prediction(uid=1, iid='e9f9759020', r_ui=4.0, est=4.343249702383939, details={'was_impossible': False}),
 Prediction(uid=1, iid='d6dc47fa4c', r_ui=4.0, est=3.8117017022376, details={'was_impossible': False}),
 Prediction(uid=1, iid='5efaf0ea51', r_ui=4.5, est=4.1224996286239, details={'was_impossible': False})]

In [13]:
# SlopeOne Model

from surprise import SlopeOne
slope_one = SlopeOne()
slope_one.fit(train)
slope_one_preds = slope_one.test(test)

slope_one_preds[:5]

[Prediction(uid=1, iid='39032b0f7a', r_ui=3.0, est=3.9621791839024643, details={'was_impossible': False}),
 Prediction(uid=1, iid='adc34e8c1b', r_ui=3.0, est=3.625052008390217, details={'was_impossible': False}),
 Prediction(uid=1, iid='e9f9759020', r_ui=4.0, est=3.6355004516085514, details={'was_impossible': False}),
 Prediction(uid=1, iid='d6dc47fa4c', r_ui=4.0, est=3.8264403298080363, details={'was_impossible': False}),
 Prediction(uid=1, iid='5efaf0ea51', r_ui=4.5, est=3.942173773526506, details={'was_impossible': False})]

In [14]:
# Hybrid Predictions
hybrid_preds = [0.5*i.est + 0.5*j.est for i,j in zip(slope_one_preds, svd_preds)]

hybrid_preds[:5]

[3.954301656203363,
 3.6067775524459185,
 3.989375076996245,
 3.819071016022818,
 4.032336701075203]

In [15]:
# DataFrame of predictions

data = {'Title': [i.iid for i in slope_one_preds],
       'user_id': [i.uid for i in slope_one_preds],
       'hybrid_rating': hybrid_preds,
       'svd_rating': [i.est for i in svd_preds],
       'slope_one_rating': [i.est for i in slope_one_preds]}

hybrid_df = pd.DataFrame(data)

hybrid_df.head()

Unnamed: 0,Title,user_id,hybrid_rating,svd_rating,slope_one_rating
0,39032b0f7a,1,3.954302,3.946424,3.962179
1,adc34e8c1b,1,3.606778,3.588503,3.625052
2,e9f9759020,1,3.989375,4.34325,3.6355
3,d6dc47fa4c,1,3.819071,3.811702,3.82644
4,5efaf0ea51,1,4.032337,4.1225,3.942174
