### Imports

In [44]:
from models import SurpriseModel, RecSys, TorchModel
from preprocessing import DataProcessor
import pandas as pd
from surprise import SVDpp, BaselineOnly, KNNBasic
import numpy as np
from score import score

In [45]:
def predict_3(model, user, title):
    predictions = []
    if model.__class__.__name__ == 'TorchModel':
        for _ in range(3):
            predictions.append(model.predict(user, title).item())
    else:
        for _ in range(3):
            predictions.append(model.predict(user, title))
    return np.array(predictions)

In [46]:
user_data = pd.read_csv("../Course_Scraper/assets/augumented_data/augmented_user_rating.csv")

In [47]:
dp = DataProcessor()

In [48]:
dp.preprocessing(user_data)

In [49]:
train, test = dp.surprise_split()

### BaseLine Model

In [50]:
bsl_options = {
    "n_epochs": 20,
    "method": "sgd",
    "learning_rate": 0.001,
    "seed": 67
}
algo = BaselineOnly(bsl_options=bsl_options)

In [51]:
model = SurpriseModel(algo)

In [52]:
model.fit(train).save_model()

Training Model: BaselineOnly...
Estimating biases using sgd...


In [53]:
predictions_bsl = []
ratings_bsl = []
for u, t, r in test:
    ratings_bsl.append(r)
    predictions_bsl.append(predict_3(model, u, t))

In [54]:
print(f'Score for BaseLine mode: {score(ratings_bsl, predictions_bsl)}')

Score for BaseLine mode: 0.6641402179062056


### KNN

In [15]:
sim_options = {'name': 'cosine', 'user_based': True}
knn = KNNBasic(sim_options=sim_options)

In [16]:
model = SurpriseModel(knn)

In [17]:
model.fit(train).save_model()

Training Model: KNNBasic...
Computing the cosine similarity matrix...
Done computing similarity matrix.


In [19]:
predictions_knn = []
ratings = []
for u, t, r in test:
    ratings.append(r)
    predictions_knn.append(predict_3(model, u, t))

In [20]:
print(f'Score for KNN mode: {score(ratings, predictions_knn)}')

Score for KNN mode: 0.6387967787778304


### NN

In [21]:
train, val, test = dp.split_data()
train_loader, val_loader = dp.make_batches(train, val, 8192)

In [22]:
recsys = RecSys(len(dp.user_mapping), len(dp.title_mapping)).to('cuda')

In [23]:
model = TorchModel(recsys)

In [24]:
# train_losses, val_losses = model.fit(train_loader, val_loader)

In [25]:
model.load_model()

In [27]:
predictions_nn = []
ratings = []
for i, (u, t, r) in test.iterrows():
    ratings.append(r)
    predictions_nn.append(predict_3(model, u, t))

In [28]:
print(f'Score for NN mode: {score(ratings, predictions_nn)}')

Score for NN mode: 0.6197519083969466


### Ensemble

In [55]:
enb = [np.array(predictions_bsl), np.array(predictions_knn)]

In [57]:
print(f'Ensemble Score is: {score(ratings_bsl, np.mean(enb, axis=0))}')

Ensemble Score is: 0.6563240170535292
