In [34]:
import pandas as pd
import numpy as np

from sklearn.metrics import root_mean_squared_error 
from surprise import Dataset, Reader
from surprise.dataset import DatasetAutoFolds
from surprise import SVD, NMF, KNNBasic
from surprise.accuracy import rmse, mse
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split
from typing import List
np.random.seed(0)

In [35]:
filePathReviewF: str = "reviews_filtered.csv"
authorId: str = "AuthorId"
recipeId: str = "RecipeId"

df_food_reviews = pd.read_csv(filePathReviewF, usecols=["RecipeId", "AuthorId", "Rating"])
df_food_reviews = df_food_reviews[["AuthorId", "RecipeId", "Rating"]]

reader = Reader(rating_scale=(1,5))
rd : DatasetAutoFolds = Dataset.load_from_df(df_food_reviews,reader)

In [36]:
train_set, test_test = train_test_split(rd, test_size=0.2)
default_measures: List[str] = ["RMSE", "MSE" ]
svd = SVD()
nmf = NMF()

svd_cv_results = cross_validate(svd, data=rd, cv=20,
                                verbose=True, measures=default_measures)
nmf_cv_results = cross_validate(nmf, data=rd, cv=20,
                                verbose=True, measures=default_measures)

Evaluating RMSE, MSE of algorithm SVD on 20 split(s).

                  Fold 1  Fold 2  Fold 3  Fold 4  Fold 5  Fold 6  Fold 7  Fold 8  Fold 9  Fold 10 Fold 11 Fold 12 Fold 13 Fold 14 Fold 15 Fold 16 Fold 17 Fold 18 Fold 19 Fold 20 Mean    Std     
RMSE (testset)    1.5552  1.6753  1.5655  1.3757  1.6958  1.2040  1.4259  1.4979  1.4524  1.5820  1.4831  1.2807  1.4208  1.4452  1.5393  1.5396  1.4015  1.3171  1.4892  1.6935  1.4820  0.1282  
MSE (testset)     2.4186  2.8068  2.4508  1.8926  2.8759  1.4497  2.0333  2.2436  2.1095  2.5029  2.1996  1.6403  2.0186  2.0886  2.3694  2.3702  1.9642  1.7349  2.2177  2.8680  2.2128  0.3778  
Fit time          0.07    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.04    0.01    
Test time         0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.00    0.0

In [37]:
svd.fit(train_set)
nmf.fit(train_set)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x7c0d65cf7310>

In [38]:
svd.test(test_test)

[Prediction(uid=25925, iid=329, r_ui=4.0, est=4.155912640341106, details={'was_impossible': False}),
 Prediction(uid=25229, iid=12441, r_ui=5.0, est=4.0361596009975065, details={'was_impossible': False}),
 Prediction(uid=13763, iid=671, r_ui=4.0, est=4.03344398110565, details={'was_impossible': False}),
 Prediction(uid=6179, iid=6852, r_ui=0.0, est=3.9440716625037737, details={'was_impossible': False}),
 Prediction(uid=6357, iid=14130, r_ui=5.0, est=3.165602584696859, details={'was_impossible': False}),
 Prediction(uid=14613, iid=4840, r_ui=5.0, est=4.152028533389544, details={'was_impossible': False}),
 Prediction(uid=17721, iid=8579, r_ui=5.0, est=4.444954480084114, details={'was_impossible': False}),
 Prediction(uid=3166, iid=4165, r_ui=0.0, est=3.6972396990603693, details={'was_impossible': False}),
 Prediction(uid=9869, iid=8754, r_ui=4.0, est=4.781301770848249, details={'was_impossible': False}),
 Prediction(uid=5060, iid=13320, r_ui=5.0, est=4.1306640282041895, details={'was_imp

In [46]:
def adam_optimizer(svd_algo, nmf_algo,  test_set, weights: List[float], lr: float = 0.1,
                   beta_1: float = 0.9, beta_2: float = 0.999,
                   epislon: float = 1e-8, h: float = 0.01,
                   n_range: int  = 50):
    m1_dw = np.zeros_like(weights)
    v1_dw = np.zeros_like(weights)

    time_step: int = 0

    for i in range(n_range):
        grads: float = np.zeros_like(weights)

        # Calculate derivative
        for j in range(len(weights)):
            new_weights = np.copy(weights)
            new_weights[j] += h
            grads[j] = (obj_func(svd_algo, nmf_algo, 
                                  test_set, new_weights) - obj_func(svd_algo, nmf_algo, 
                                                                    test_set, new_weights)) / h # Implement Turunan
        time_step += 1
        
        # Update momentum Weights
        m1_dw = beta_1 * m1_dw + (1 - beta_1) * grads

        # Update rms Weight
        v1_dw = beta_2 * v1_dw + (1 - beta_2) * (grads ** 2)


        # Bias corrected momentum
        m1_dw_corrected = m1_dw / (1 - beta_1 ** time_step)
        v1_dw_corrected = v1_dw / (1 - beta_2 ** time_step)

        #update weights

        weights -= lr * m1_dw_corrected / (np.sqrt(v1_dw_corrected) + epislon)

    print("Opimizer Weights:", weights)
    return weights

def cst_rmse(squared_error: List):
    return np.sqrt(np.mean(squared_error))

def calculate_error(preds_svd, preds_nmf,
                     weight_svd=0.5,
                     weight_nmf=0.5,
                     ):
    weighted_preds: List = []
    for pred_svd, pred_nmf in zip(preds_svd, preds_nmf):
        squared_error = (pred_svd.est * weight_svd - pred_nmf.est * weight_nmf)**2
        weighted_preds.append(squared_error)
    return weighted_preds

def obj_func(svd_algo, nmf_algo, test_set, weights):
    predictions_svd: List = svd_algo.test(test_set)
    predictions_nmf: List = nmf_algo.test(test_set) 
    squared_error: List  = calculate_error(predictions_svd, predictions_nmf,
                                              weights[0], weights[1])
    return(cst_rmse(squared_error))


adam_optimizer(svd, nmf, test_test, [0.5, 0.5, 0.5])

Opimizer Weights: [-0.31736808 -0.32897743  0.5       ]


array([-0.31736808, -0.32897743,  0.5       ])

**This simple_gradient_descent function performs the following steps:**

1. Initialization: Initializes the gradient (grads) and sets the learning rate (lr) and number of iterations (n_range).
2. Gradient Calculation: Calculates the gradient for each weight by approximating the derivative using a small change h.
3. Weight Update: Updates the weights by subtracting the product of the learning rate and the gradient.
4. Iteration: Repeats the process for n_range iterations.

In [47]:
def simple_gradient_descent(svd_algo, nmf_algo, test_set, weights: List[float], lr: float = 0.01, h: float = 0.01, n_range: int = 100):
    for i in range(n_range):
        grads = np.zeros_like(weights)
        
        # Calculate gradient
        for j in range(len(weights)):
            new_weights = np.copy(weights)
            new_weights[j] += h
            grads[j] = (obj_func(svd_algo, nmf_algo, test_set, new_weights) - obj_func(svd_algo, nmf_algo, test_set, weights)) / h
        
        # Update weights
        weights -= lr * grads

    print("Optimized Weights:", weights)
    return weights

# Example usage:
svd.fit(train_set)
nmf.fit(train_set)
test_set = test_test
weights = [0.5, 0.5]
optimized_weights = simple_gradient_descent(svd, nmf, test_set, weights)

Optimized Weights: [-0.03127514 -0.07231824]


In [None]:
df_food_reviews

Unnamed: 0,AuthorId,RecipeId,Rating
0,1634,4384,4
1,2046,4523,2
2,1773,7435,5
3,2085,44,5
4,2046,5221,4
...,...,...,...
4005,21656,10125,5
4006,16827,10125,5
4007,5076,10125,5
4008,21267,10125,2


In [48]:
low = 0.01
high = 1.00
total_parallel : int = 2
starting_weights = [[np.random.uniform(low, high), np.random.uniform(low, high)] for i in range(total_parallel)]

def predict_recommendations(user_id, items: List, algo):
    preds : List = []
    preds_iid: List = []
    for item_indx in range(len(items)):
        pred = algo.predict(user_id, items[item_indx])
        if pred.iid not in preds_iid:
            preds_iid.append(pred.iid)
            preds.append(pred)
    preds = sorted(preds, key=lambda x : x.est, reverse=True)
    return preds

def parallel_weight(user_id, items: List, svd_algo, nmf_algo, train_set, list_of_h: List[float],
                    list_of_lr: List[float],
                    weights_param: List= [[0.2, 0.8, 0.1]]):

    results = {}
    for indx_weight in range(len(weights_param)):
        for indx_lr in range(len(list_of_lr)):
            for indx_h in range(len(list_of_h)):
                w1, w2 = adam_optimizer(svd_algo, nmf_algo, train_set, weights_param[indx_weight],
                                        h=list_of_h[indx_h], lr=list_of_lr[indx_lr])
                preds_svd = predict_recommendations(user_id, items, svd_algo)
                preds_nmf = predict_recommendations(user_id, items, nmf_algo)
                rmse_loc = cst_rmse(calculate_error(preds_svd, preds_nmf,
                                                    weight_svd=w1, weight_nmf=w2,))
                results[f"res_{indx_weight}-h-{list_of_h[indx_h]}-lr-{list_of_lr[indx_lr]}"] = {
                        "w1": w1,
                        "w2": w2,
                        "rmse": rmse_loc,
                        "preds_svd": preds_svd,
                        "preds_nmf": preds_nmf,
                    }
    
    return sorted(results.items(), key=lambda x : x[1]["rmse"])
user_id: int = 25925
items = df_food_reviews.iloc[:, 1]
train_list = list(train_set.all_ratings())
ll = parallel_weight(user_id, items, svd, nmf, train_list, [0.01], [0.001], starting_weights)

Opimizer Weights: [0.11185909 0.1374154 ]
Opimizer Weights: [0.09095848 0.78509699]


In [49]:
ll

[('res_0-h-0.01-lr-0.001',
  {'w1': 0.11185908710342993,
   'w2': 0.137415395463648,
   'rmse': 0.10721916814472972,
   'preds_svd': [Prediction(uid=25925, iid=9272, r_ui=None, est=4.827105501190952, details={'was_impossible': False}),
    Prediction(uid=25925, iid=10125, r_ui=None, est=4.731559519680006, details={'was_impossible': False}),
    Prediction(uid=25925, iid=9526, r_ui=None, est=4.7283506177501655, details={'was_impossible': False}),
    Prediction(uid=25925, iid=9836, r_ui=None, est=4.713587053374094, details={'was_impossible': False}),
    Prediction(uid=25925, iid=11287, r_ui=None, est=4.696645679695971, details={'was_impossible': False}),
    Prediction(uid=25925, iid=3596, r_ui=None, est=4.6804375081658245, details={'was_impossible': False}),
    Prediction(uid=25925, iid=10422, r_ui=None, est=4.659111493633144, details={'was_impossible': False}),
    Prediction(uid=25925, iid=5365, r_ui=None, est=4.651002899663747, details={'was_impossible': False}),
    Prediction(ui

In [50]:
user_id2 = 5060
le = parallel_weight(user_id2, items, svd, nmf, train_list, [0.1], [0.001], starting_weights)


Opimizer Weights: [0.0985726  0.13741411]
Opimizer Weights: [0.09095846 0.78509699]


In [51]:
le

[('res_0-h-0.1-lr-0.001',
  {'w1': 0.098572603301746,
   'w2': 0.1374141053273816,
   'rmse': 0.13082410405252837,
   'preds_svd': [Prediction(uid=5060, iid=9272, r_ui=None, est=4.9526778442015935, details={'was_impossible': False}),
    Prediction(uid=5060, iid=3587, r_ui=None, est=4.658783632262802, details={'was_impossible': False}),
    Prediction(uid=5060, iid=4330, r_ui=None, est=4.635210138401639, details={'was_impossible': False}),
    Prediction(uid=5060, iid=8579, r_ui=None, est=4.612629624249114, details={'was_impossible': False}),
    Prediction(uid=5060, iid=12583, r_ui=None, est=4.609125861399215, details={'was_impossible': False}),
    Prediction(uid=5060, iid=9836, r_ui=None, est=4.56799186944121, details={'was_impossible': False}),
    Prediction(uid=5060, iid=14580, r_ui=None, est=4.56262747758755, details={'was_impossible': False}),
    Prediction(uid=5060, iid=14927, r_ui=None, est=4.558157568490436, details={'was_impossible': False}),
    Prediction(uid=5060, iid=8

In [None]:
le

[('res_1-h-0.001-lr-0.1',
  {'w1': -0.006476645514650098,
   'w2': -0.0007178395035234509,
   'rmse': 0.02422949140437319,
   'preds_svd': [Prediction(uid=5060, iid=9272, r_ui=None, est=4.898335299086907, details={'was_impossible': False}),
    Prediction(uid=5060, iid=9836, r_ui=None, est=4.654987230458979, details={'was_impossible': False}),
    Prediction(uid=5060, iid=11181, r_ui=None, est=4.6315074582969675, details={'was_impossible': False}),
    Prediction(uid=5060, iid=15364, r_ui=None, est=4.631460493707017, details={'was_impossible': False}),
    Prediction(uid=5060, iid=8594, r_ui=None, est=4.618092339778605, details={'was_impossible': False}),
    Prediction(uid=5060, iid=10125, r_ui=None, est=4.6137290847918155, details={'was_impossible': False}),
    Prediction(uid=5060, iid=10422, r_ui=None, est=4.60953172922006, details={'was_impossible': False}),
    Prediction(uid=5060, iid=70137, r_ui=None, est=4.607945746062933, details={'was_impossible': False}),
    Prediction(uid