In [4]:
import pandas as pd
import numpy as np

from sklearn.metrics import root_mean_squared_error 
from surprise import Dataset, Reader
from surprise.dataset import DatasetAutoFolds
from surprise import SVD, NMF, KNNBasic
from surprise.accuracy import rmse, mse
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from typing import List

In [5]:
filePathReviewF: str = "reviews_filtered.csv"
authorId: str = "AuthorId"
recipeId: str = "RecipeId"

df_food_reviews = pd.read_csv(filePathReviewF, usecols=["RecipeId", "AuthorId", "Rating"])
df_food_reviews = df_food_reviews[["AuthorId", "RecipeId", "Rating"]]

reader = Reader(rating_scale=(1,5))
rd : DatasetAutoFolds = Dataset.load_from_df(df_food_reviews,reader)

In [6]:
train_set, test_test = train_test_split(rd, test_size=0.2)
default_measures: List[str] = ["RMSE", "MSE" ]
svd = SVD()
nmf = NMF()

svd_cv_results = cross_validate(svd, data=rd, cv=20,
                                verbose=True, measures=default_measures)
nmf_cv_results = cross_validate(nmf, data=rd, cv=20,
                                verbose=True, measures=default_measures)


Evaluating RMSE, MSE of algorithm SVD on 20 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Fold 11 Fold 12 Fold 13 Fold 14 Fold 15 Fold 16 Fold 17 Fold 18 Fold 19 Fold 20 Mean    Std     
RMSE (testset)    1.3516  1.5527  1.4245  1.4941  1.4509  1.3706  1.4529  1.3733  1.3046  1.6271  1.4553  1.3873  1.4597  1.4890  1.6282  1.5954  1.6103  1.5001  1.5982  1.5320  1.4829  0.0952  
MSE (testset)     1.8268  2.4108  2.0293  2.2323  2.1053  1.8785  2.1111  1.8860  1.7019  2.6474  2.1178  1.9245  2.1306  2.2170  2.6509  2.5452  2.5931  2.2502  2.5542  2.3470  2.2080  0.2823  
Fit time          0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.00    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.0

In [7]:
svd.fit(train_set)
nmf.fit(train_set)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x79f0f89425b0>

In [8]:
svd.test(test_test)

[Prediction(uid=23302, iid=15066, r_ui=5.0, est=4.048120444124036, details={'was_impossible': False}),
 Prediction(uid=9333, iid=11572, r_ui=5.0, est=4.0551073374226645, details={'was_impossible': False}),
 Prediction(uid=5899, iid=2923, r_ui=0.0, est=4.076083292800921, details={'was_impossible': False}),
 Prediction(uid=19799, iid=8765, r_ui=4.0, est=4.575572045642725, details={'was_impossible': False}),
 Prediction(uid=15712, iid=14787, r_ui=5.0, est=4.038598659500913, details={'was_impossible': False}),
 Prediction(uid=19008, iid=10281, r_ui=4.0, est=3.9170616715247215, details={'was_impossible': False}),
 Prediction(uid=21547, iid=566, r_ui=4.0, est=4.081559569719116, details={'was_impossible': False}),
 Prediction(uid=10216, iid=13103, r_ui=4.0, est=4.491631097334371, details={'was_impossible': False}),
 Prediction(uid=6357, iid=14942, r_ui=5.0, est=3.6705903827453774, details={'was_impossible': False}),
 Prediction(uid=15656, iid=11412, r_ui=0.0, est=4.052311426543937, details={'

In [16]:
def adam_optimizer(test_set, weights: List[float], lr: float = 0.1,
                   beta_1: float = 0.9, beta_2: float = 0.999,
                   epislon: float = 1e-8, h: float = 0.01,
                   n_range: int  = 5000):
    m1_dw = np.zeros_like(weights)
    v1_dw = np.zeros_like(weights)

    time_step: int = 0

    for i in range(n_range):
        grads: float = np.zeros_like(weights)

        # Calculate derivative
        for j in range(len(weights)):
            new_weights = np.copy(weights)
            new_weights[j] += h
            grads[j] = (obj_func(test_set, new_weights) - obj_func(test_set, weights)) / h # Implement Turunan
        time_step += 1
        
        # Update momentum Weights
        m1_dw = beta_1 * m1_dw + (1 - beta_1) * grads

        # Update rms Weight
        v1_dw = beta_2 * v1_dw + (1 - beta_2) * (grads ** 2)


        # Bias corrected momentum
        m1_dw_corrected = m1_dw / (1 - beta_1 ** time_step)
        v1_dw_corrected = v1_dw / (1 - beta_2 ** time_step)

        #update weights

        weights -= lr * m1_dw_corrected / (np.sqrt(v1_dw_corrected) + epislon)

    print("Opimizer Weights:", weights)


def cst_rmse(squared_error: List):
    return np.sqrt(np.mean(squared_error))

def parallel_weighted(preds_svd, preds_nmf,
                     weight_svd=0.5, weight_nmf=0.5):
    weighted_preds: List = []
    for pred_svd, pred_nmf in zip(preds_svd, preds_nmf):
        squared_error = (pred_svd.est * weight_svd - pred_nmf.est * weight_nmf)**2
        weighted_preds.append(squared_error)
    return weighted_preds

def obj_func(test_set, weights):
    predictions_svd: List = svd.test(test_set)
    predictions_nmf: List = nmf.test(test_set) 
    squared_error: List  = parallel_weighted(predictions_svd, predictions_nmf,
                                              weights[0], weights[1])
    return(cst_rmse(squared_error))

preds_svd = svd.test(test_test)
preds_nmf = nmf.test(test_test)


adam_optimizer(test_test, [0.5, 0.5])

Opimizer Weights: [-0.30260972 -0.32010963]


In [17]:
cst_rmse(parallel_weighted(preds_svd, preds_nmf, weight_nmf=-0.32010963, weight_svd=-0.30260972))

0.22087185215846591