# Using `surprise`

See the documentation [here](https://surprise.readthedocs.io/en/stable/getting_started.html)!

In [None]:
import surprise
from surprise.prediction_algorithms import *
import pandas as pd
import numpy as np
import datetime as dt

## Agenda

SWBAT:

- use the `surprise` package to build recommendation engines.

In [None]:
data = surprise.Dataset.load_builtin('ml-100k')

Now that we've downloaded the data, we can find it in a hidden directory:

In [None]:
df = pd.read_csv('~/.surprise_data/ml-100k/ml-100k/u.data',
            sep='\t', header=None)
df = df.rename(columns={0: 'user', 1: 'item', 2: 'rating', 3: 'timestamp'})
df

## Data Exploration

In [None]:
df['user'].nunique()

In [None]:
df['item'].nunique()

In [None]:
stats = df[['rating', 'timestamp']].describe()
stats

In [None]:
print(dt.datetime.fromtimestamp(stats.loc['min', 'timestamp']))
print(dt.datetime.fromtimestamp(stats.loc['max', 'timestamp']))

In [None]:
read = surprise.Reader('ml-100k')

In [None]:
read.rating_scale

In [None]:
type(read)

## Modeling

In [None]:
train, test = surprise.model_selection.train_test_split(data, random_state=42)

In [None]:
model = KNNBasic().fit(train)

$\hat{r}_{ui} = \frac{
    \sum\limits_{v \in N^k_i(u)} \text{sim}(u, v) \cdot r_{vi}}
    {\sum\limits_{v \in N^k_i(u)} \text{sim}(u, v)}$
    OR
$\hat{r}_{ui} = \frac{
    \sum\limits_{j \in N^k_u(i)} \text{sim}(i, j) \cdot r_{uj}}
    {\sum\limits_{j \in N^k_u(i)} \text{sim}(i, j)}$

In [None]:
model2 = SVD().fit(train)

$\sum_{r_{ui} \in R_{train}} \left(r_{ui} - \hat{r}_{ui} \right)^2 +
    \lambda\left(b_i^2 + b_u^2 + ||q_i||^2 + ||p_u||^2\right)$

In [None]:
model3 = NMF().fit(train)

$\hat{r}_{ui} = q_i^Tp_u$

In [None]:
model.get_neighbors(iid=51, k=1)

In [None]:
conds = [df['item'] == 51, df['item'] == 65]
choices = 2*[True]

df.loc[np.select(conds, choices, default=False)].sort_values('user')

## Evaluation

In [None]:
model.test(test)

In [None]:
surprise.accuracy.mae(model.test(test))

In [None]:
surprise.accuracy.mae(model2.test(test))

In [None]:
surprise.accuracy.mae(model3.test(test))

In [None]:
surprise.accuracy.rmse(model.test(test))

In [None]:
surprise.accuracy.rmse(model2.test(test))

In [None]:
surprise.accuracy.rmse(model3.test(test))