# General Code

In [1]:
import urllib.request
import math

NUM_USERS = 943
NUM_ITEMS = 1682

MIN_RATING = 1
MAX_RATING = 5

ratings = [[None for _ in range(NUM_ITEMS)] for _ in range(NUM_USERS)]

training_file = urllib.request.urlopen("https://drive.upm.es/s/tDdluElfGInyUnU/download")
for line in training_file:
  [u, i, rating] = line.decode("utf-8").split("::")
  ratings[int(u)][int(i)] = int(rating)
  
test_ratings = [[None for _ in range(NUM_ITEMS)] for _ in range(NUM_USERS)]

test_file = urllib.request.urlopen("https://drive.upm.es/s/Jn75Vg6okOPsgZu/download")
for line in test_file:
  [u, i, rating] = line.decode("utf-8").split("::")
  test_ratings[int(u)][int(i)] = int(rating)

In [2]:
import numpy as np

ratings_np = np.array([[float(x) if x is not None else np.nan for x in row] for row in ratings], dtype=np.float64)
test_ratings_np = np.array([[float(x) if x is not None else np.nan for x in row] for row in test_ratings], dtype=np.float64)

In [3]:
ratings_np = np.array([[np.nan if r is None else r for r in user_ratings] for user_ratings in ratings])

def rating_average(u):
    return np.nanmean(ratings_np[u])

## -- 1. KNN --

In [None]:
# Correlation similarity
# Haremos todas las correlaciones y la que mejor resultados de será la que pongamos en el poster, 
# explicando por qué funciona mejor que las demás y por qué solo aparece esa 
# (podríamos poner un gráfico de como funciona cada una según la profe).

def correlation_similarity(u, v):
    r_u = ratings_np[u]
    r_v = ratings_np[v]
    
    # Mask of items both users voted
    common_mask = ~np.isnan(r_u) & ~np.isnan(r_v)
    
    if not np.any(common_mask):
        return None  

    # Mean - centered valorations
    avg_u = np.nanmean(r_u)
    avg_v = np.nanmean(r_v)

    diff_u = r_u[common_mask] - avg_u
    diff_v = r_v[common_mask] - avg_v

    num = np.dot(diff_u, diff_v)
    den = np.linalg.norm(diff_u) * np.linalg.norm(diff_v)

    if den == 0:
        return None  
    return num / den

In [5]:
print(correlation_similarity(5,940))
print(correlation_similarity(113,534))
print(correlation_similarity(92,355))

0.010617834767124232
-0.08282165590325247
None


In [6]:
import numpy as np

def jmsd_similarity(u, v):
    r_u = ratings_np[u]
    r_v = ratings_np[v]
    
    # Masks
    rated_u = ~np.isnan(r_u)
    rated_v = ~np.isnan(r_v)
    both_rated = rated_u & rated_v
    either_rated = rated_u | rated_v

    union = np.sum(either_rated)
    intersection = np.sum(both_rated)

    if intersection == 0:
        return None

    # Normalized ratings
    norm_u = (r_u[both_rated] - MIN_RATING) / (MAX_RATING - MIN_RATING)
    norm_v = (r_v[both_rated] - MIN_RATING) / (MAX_RATING - MIN_RATING)

    # Mean Squared Difference
    msd = np.mean((norm_u - norm_v) ** 2)

    # Jaccard * (1 - MSD)
    jaccard = intersection / union
    return jaccard * (1 - msd)


In [7]:
print(jmsd_similarity(5,940))
print(jmsd_similarity(113,534))
print(jmsd_similarity(92,355))

0.04025900900900901
0.1138771186440678
None


In [None]:
k = 25

def get_neighbors(u, similarities, k):
    sims = np.array([np.nan if x is None else x for x in similarities], dtype=np.float64)

    valid = (~np.isnan(sims)) & (sims > 0)
    valid[u] = False

    valid_indices = np.where(valid)[0]
    sorted_indices = valid_indices[np.argsort(sims[valid_indices])[::-1]]

    return sorted_indices[:k].tolist()

In [12]:
u = 112
similarities_corr = [np.nan if u == v else correlation_similarity(u, v) for v in range(NUM_USERS)]
similarities_jmsd = [np.nan if u == v else jmsd_similarity(u, v) for v in range(NUM_USERS)]

neighbors_corr = get_neighbors(u, similarities_corr, k)
neighbors_jmsd = get_neighbors(u, similarities_jmsd, k)
print(neighbors_corr)
print(neighbors_jmsd)

[66, 186, 365, 121, 113, 97, 318, 50, 515, 521, 699, 40, 507, 659, 606, 87, 703, 797, 856, 134, 722, 875, 132, 379, 608]
[172, 769, 73, 936, 742, 469, 169, 581, 412, 702, 925, 509, 175, 643, 148, 154, 62, 833, 904, 322, 700, 577, 729, 673, 138]


In [None]:
def average_prediction(i, neighbors):
    # Neighbours not None and value for the i item
    valid_ratings = [ratings[n][i] for n in neighbors if n is not None and ratings[n][i] is not None]

    if len(valid_ratings) > 0:
        return np.mean(valid_ratings)
    else:
        return None

In [15]:
i = 324
avgpred_corr = average_prediction (i, neighbors_corr)
avgpred_jmsd = average_prediction (i, neighbors_jmsd)
print(avgpred_corr)
print(avgpred_jmsd)

1.0
1.75


In [16]:
def weighted_average_prediction(i, neighbors, similarities):
    sims = np.array([np.nan if x is None else x for x in similarities], dtype=np.float64)

    neighbors = np.array(neighbors)

    valid_mask = neighbors != None
    valid_neighbors = neighbors[valid_mask]

    ratings_array = np.array([ratings[n][i] if ratings[n][i] is not None else np.nan for n in valid_neighbors], dtype=np.float64)

    sims_array = sims[valid_neighbors]

    mask = (~np.isnan(sims_array)) & (~np.isnan(ratings_array))

    if np.any(mask):
        num = np.sum(sims_array[mask] * ratings_array[mask])
        den = np.sum(sims_array[mask])
        if den > 0:
            return num / den
    return None


In [17]:
wavgpred_corr = weighted_average_prediction (i, neighbors_corr, similarities_corr)
wavgpred_jmsd = weighted_average_prediction (i, neighbors_jmsd, similarities_jmsd)
print(wavgpred_corr)
print(wavgpred_jmsd)

1.0
1.7426994721014446


In [18]:
ratings_np = np.array(ratings, dtype=float)

def deviation_from_mean_prediction(u, i, neighbors):
    neighbors = np.array(neighbors)
    
    valid_neighbors = neighbors[neighbors != None]
    
    ratings_i = ratings_np[valid_neighbors, i]
    
    averages = np.array([rating_average(n) for n in valid_neighbors])
    
    mask = (~np.isnan(ratings_i)) & (~np.isnan(averages))
    if np.any(mask):
        deviations = ratings_i[mask] - averages[mask]
        avg_u = rating_average(u)
        if np.isnan(avg_u):
            return None
        prediction = avg_u + np.mean(deviations)
        return prediction
    else:
        return None


In [19]:
dfmpred_corr = deviation_from_mean_prediction (u, i, neighbors_corr)
dfmpred_jmsd = deviation_from_mean_prediction (u, i, neighbors_jmsd)
print(dfmpred_corr)
print(dfmpred_jmsd)

1.4452214452214451
2.367587967587968


In [20]:
N = 5

def get_recommendations(predictions, N):
    preds = np.array(predictions, dtype=float)
    # Reemplazamos None por -inf para que no interfieran en la búsqueda de máximos
    preds = np.where(np.isnan(preds), -np.inf, preds)

    # Obtener indices de los N valores más altos sin repetidos
    # argsort ordena ascendente, por eso usamos [::-1]
    top_indices = np.argsort(preds)[::-1][:N]

    return top_indices.tolist()

In [22]:
avg_predictions_corr = [None if ratings[u][i] != None else average_prediction(i, neighbors_corr) for i in range(NUM_ITEMS)]
avg_predictions_jmsd = [None if ratings[u][i] != None else average_prediction(i, neighbors_jmsd) for i in range(NUM_ITEMS)]
wavg_predictions_corr = [None if ratings[u][i] != None else weighted_average_prediction(i, neighbors_corr, similarities_corr) for i in range(NUM_ITEMS)]
wavg_predictions_jmsd = [None if ratings[u][i] != None else weighted_average_prediction(i, neighbors_jmsd, similarities_jmsd) for i in range(NUM_ITEMS)]
dfm_predictions_corr = [None if ratings[u][i] != None else deviation_from_mean_prediction(u, i, neighbors_corr) for i in range(NUM_ITEMS)]
dfm_predictions_jmsd = [None if ratings[u][i] != None else deviation_from_mean_prediction(u, i, neighbors_jmsd) for i in range(NUM_ITEMS)]
recommendations_avg_corr = get_recommendations(avg_predictions_corr, N)
recommendations_avg_jmsd = get_recommendations(avg_predictions_jmsd, N)
recommendations_wavg_corr = get_recommendations(wavg_predictions_corr, N)
recommendations_wavg_jmsd = get_recommendations(wavg_predictions_jmsd, N)
recommendations_dfm_corr = get_recommendations(dfm_predictions_corr, N)
recommendations_dfm_jmsd = get_recommendations(dfm_predictions_jmsd, N)
print(recommendations_avg_corr)
print(recommendations_avg_jmsd)
print(recommendations_wavg_corr)
print(recommendations_wavg_jmsd)
print(recommendations_dfm_corr)
print(recommendations_dfm_jmsd)

[191, 487, 903, 820, 353]
[994, 21, 202, 155, 466]
[142, 282, 820, 814, 273]
[10, 22, 994, 1136, 466]
[284, 286, 876, 407, 282]
[1136, 171, 650, 21, 22]


In [23]:
def has_test_ratings (u):
  for i in range(NUM_ITEMS):
    if test_ratings[u][i] != None:
      return True
  return False

test_ratings_np = np.array(test_ratings, dtype=float)

def has_test_ratings(u):
    user_test_ratings = test_ratings_np[u]
    return np.any(~np.isnan(user_test_ratings))

In [29]:
# Inicializamos matrices de predicción con np.nan
avg_predictions_corr = np.full((NUM_USERS, NUM_ITEMS), np.nan)
avg_predictions_jmsd = np.full((NUM_USERS, NUM_ITEMS), np.nan)
wavg_predictions_corr = np.full((NUM_USERS, NUM_ITEMS), np.nan)
wavg_predictions_jmsd = np.full((NUM_USERS, NUM_ITEMS), np.nan)
dfm_predictions_corr = np.full((NUM_USERS, NUM_ITEMS), np.nan)
dfm_predictions_jmsd = np.full((NUM_USERS, NUM_ITEMS), np.nan)

for u in range(NUM_USERS):
    if np.any(~np.isnan(test_ratings_np[u])):  # Usuario u tiene ratings en test
        similarities_corr = np.array([np.nan if u == v else correlation_similarity(u, v) for v in range(NUM_USERS)])
        similarities_jmsd = np.array([np.nan if u == v else jmsd_similarity(u, v) for v in range(NUM_USERS)])

        neighbors_corr = get_neighbors(u, similarities_corr, k)
        neighbors_jmsd = get_neighbors(u, similarities_jmsd, k)

        test_items = np.where(~np.isnan(test_ratings_np[u]))[0]

        for i in test_items:
            avg_predictions_corr[u, i] = average_prediction(i, neighbors_corr)
            avg_predictions_jmsd[u, i] = average_prediction(i, neighbors_jmsd)
            wavg_predictions_corr[u, i] = weighted_average_prediction(i, neighbors_corr, similarities_corr)
            wavg_predictions_jmsd[u, i] = weighted_average_prediction(i, neighbors_jmsd, similarities_jmsd)
            dfm_predictions_corr[u, i] = deviation_from_mean_prediction(u, i, neighbors_corr)
            dfm_predictions_jmsd[u, i] = deviation_from_mean_prediction(u, i, neighbors_jmsd)

In [32]:
def get_user_mae(u, predictions):
    mask = ~np.isnan(test_ratings_np[u]) & ~np.isnan(predictions[u])
    if np.any(mask):
        errors = np.abs(test_ratings_np[u][mask] - predictions[u][mask])
        return errors.mean()
    else:
        return None

def get_mae(predictions):
    maes = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_mae = get_user_mae(u, predictions)
            if user_mae is not None:
                maes.append(user_mae)
    if maes:
        return np.mean(maes)
    else:
        return None

In [31]:
mae_avg_corr = get_mae(avg_predictions_corr)
mae_avg_jmsd = get_mae(avg_predictions_jmsd)
mae_wavg_corr = get_mae(wavg_predictions_corr)
mae_wavg_jmsd = get_mae(wavg_predictions_jmsd)
mae_dfm_corr = get_mae(dfm_predictions_corr)
mae_dfm_jmsd = get_mae(dfm_predictions_jmsd)
print("MAE Avg Prediction with Corr Similarity = " + str(mae_avg_corr))
print("MAE Avg Prediction with JMSD Similarity = " + str(mae_avg_jmsd))
print("MAE Weighted Avg Prediction with Corr Similarity = " + str(mae_wavg_corr))
print("MAE Weighted Avg Prediction with JMSD Similarity = " + str(mae_wavg_jmsd))
print("MAE Dev from Mean Prediction with Corr Similarity = " + str(mae_dfm_corr))
print("MAE Dev from Mean Prediction with JMSD Similarity = " + str(mae_dfm_jmsd))

MAE Avg Prediction with Corr Similarity = 1.0013447824660648
MAE Avg Prediction with JMSD Similarity = 0.8803027239233328
MAE Weighted Avg Prediction with Corr Similarity = 1.0018989085104146
MAE Weighted Avg Prediction with JMSD Similarity = 0.8816893193480325
MAE Dev from Mean Prediction with Corr Similarity = 1.0025785672182643
MAE Dev from Mean Prediction with JMSD Similarity = 0.8259244673950326


In [33]:
def get_user_rmse(u, predictions):
    mask = ~np.isnan(test_ratings_np[u]) & ~np.isnan(predictions[u])
    if np.any(mask):
        errors = test_ratings_np[u][mask] - predictions[u][mask]
        mse = np.mean(errors ** 2)
        return np.sqrt(mse)
    else:
        return None

def get_rmse(predictions):
    rmses = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])): 
            user_rmse = get_user_rmse(u, predictions)
            if user_rmse is not None:
                rmses.append(user_rmse)

    if len(rmses) > 0:
        return np.mean(rmses)
    else:
        return None

In [34]:
rmse_avg_corr = get_rmse(avg_predictions_corr)
rmse_avg_jmsd = get_rmse(avg_predictions_jmsd)
rmse_wavg_corr = get_rmse(wavg_predictions_corr)
rmse_wavg_jmsd = get_rmse(wavg_predictions_jmsd)
rmse_dfm_corr = get_rmse(dfm_predictions_corr)
rmse_dfm_jmsd = get_rmse(dfm_predictions_jmsd)
print("RMSE Avg Prediction with Corr Similarity = " + str(rmse_avg_corr))
print("RMSE Avg Prediction with JMSD Similarity = " + str(rmse_avg_jmsd))
print("RMSE Weighted Avg Prediction with Corr Similarity = " + str(rmse_wavg_corr))
print("RMSE Weighted Avg Prediction with JMSD Similarity = " + str(rmse_wavg_jmsd))
print("RMSE Dev from Mean Prediction with Corr Similarity = " + str(rmse_dfm_corr))
print("RMSE Dev from Mean Prediction with JMSD Similarity = " + str(rmse_dfm_jmsd))

RMSE Avg Prediction with Corr Similarity = 1.1918639939800388
RMSE Avg Prediction with JMSD Similarity = 1.0718794441589246
RMSE Weighted Avg Prediction with Corr Similarity = 1.1923278434784828
RMSE Weighted Avg Prediction with JMSD Similarity = 1.0725693999383665
RMSE Dev from Mean Prediction with Corr Similarity = 1.1764354008972988
RMSE Dev from Mean Prediction with JMSD Similarity = 1.0133916095034077


In [35]:
theta = 4

def get_user_precision(u, predictions, theta):
    precision = 0
    count = 0
    recommendations = get_recommendations(predictions[u], N)

    for i in recommendations:
        if i is not None and not np.isnan(test_ratings_np[u, i]):
            if test_ratings_np[u, i] >= theta:
                precision += 1
            count += 1

    if count > 0:
        return precision / count
    else:
        return None

def get_precision(predictions, theta):
    precisions = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_precision = get_user_precision(u, predictions, theta)
            if user_precision is not None:
                precisions.append(user_precision)

    if len(precisions) > 0:
        return np.mean(precisions)
    else:
        return None


In [36]:
precision_avg_corr = get_precision(avg_predictions_corr, theta)
precision_avg_jmsd = get_precision(avg_predictions_jmsd, theta)
precision_wavg_corr = get_precision(wavg_predictions_corr, theta)
precision_wavg_jmsd = get_precision(wavg_predictions_jmsd, theta)
precision_dfm_corr = get_precision(dfm_predictions_corr, theta)
precision_dfm_jmsd = get_precision(dfm_predictions_jmsd, theta)
print("Precision Avg Prediction with Corr Similarity = " + str(precision_avg_corr))
print("Precision Avg Prediction with JMSD Similarity = " + str(precision_avg_jmsd))
print("Precision Weighted Avg Prediction with Corr Similarity = " + str(precision_wavg_corr))
print("Precision Weighted Avg Prediction with JMSD Similarity = " + str(precision_wavg_jmsd))
print("Precision Dev from Mean Prediction with Corr Similarity = " + str(precision_dfm_corr))
print("Precision Dev from Mean Prediction with JMSD Similarity = " + str(precision_dfm_jmsd))

Precision Avg Prediction with Corr Similarity = 0.6418848167539267
Precision Avg Prediction with JMSD Similarity = 0.6923931623931625
Precision Weighted Avg Prediction with Corr Similarity = 0.6387434554973822
Precision Weighted Avg Prediction with JMSD Similarity = 0.6944444444444444
Precision Dev from Mean Prediction with Corr Similarity = 0.6356020942408377
Precision Dev from Mean Prediction with JMSD Similarity = 0.6729059829059828


In [37]:
def get_user_recall(u, predictions, theta):
    recall = 0
    count = 0
    recommendations = get_recommendations(predictions[u], N)

    for i in range(NUM_ITEMS):
        if not np.isnan(test_ratings_np[u, i]) and not np.isnan(predictions[u, i]):
            if test_ratings_np[u, i] >= theta:
                recall += 1 if i in recommendations else 0
                count += 1

    if count > 0:
        return recall / count
    else:
        return None

def get_recall(predictions, theta):
    recalls = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_recall = get_user_recall(u, predictions, theta)
            if user_recall is not None:
                recalls.append(user_recall)

    if len(recalls) > 0:
        return np.mean(recalls)
    else:
        return None

In [38]:
recall_avg_corr = get_recall(avg_predictions_corr, theta)
recall_avg_jmsd = get_recall(avg_predictions_jmsd, theta)
recall_wavg_corr = get_recall(wavg_predictions_corr, theta)
recall_wavg_jmsd = get_recall(wavg_predictions_jmsd, theta)
recall_dfm_corr = get_recall(dfm_predictions_corr, theta)
recall_dfm_jmsd = get_recall(dfm_predictions_jmsd, theta)
print("Recall Avg Prediction with Corr Similarity = " + str(recall_avg_corr))
print("Recall Avg Prediction with JMSD Similarity = " + str(recall_avg_jmsd))
print("Recall Weighted Avg Prediction with Corr Similarity = " + str(recall_wavg_corr))
print("Recall Weighted Avg Prediction with JMSD Similarity = " + str(recall_wavg_jmsd))
print("Recall Dev from Mean Prediction with Corr Similarity = " + str(recall_dfm_corr))
print("Recall Dev from Mean Prediction with JMSD Similarity = " + str(recall_dfm_jmsd))

Recall Avg Prediction with Corr Similarity = 0.8670666466163055
Recall Avg Prediction with JMSD Similarity = 0.558096676993848
Recall Weighted Avg Prediction with Corr Similarity = 0.8634193036211365
Recall Weighted Avg Prediction with JMSD Similarity = 0.5553598801471036
Recall Dev from Mean Prediction with Corr Similarity = 0.8542189414497597
Recall Dev from Mean Prediction with JMSD Similarity = 0.5458787138143124


In [39]:
def get_user_f1(u, predictions, theta):
    precision = get_user_precision(u, predictions, theta)
    recall = get_user_recall(u, predictions, theta)

    if precision is None or recall is None:
        return None
    elif precision == 0 and recall == 0:
        return 0
    else:
        return 2 * precision * recall / (precision + recall)

def get_f1(predictions, theta):
    f1_sum = 0
    count = 0

    for u in range(NUM_USERS):
        if has_test_ratings(u):
            user_f1 = get_user_f1(u, predictions, theta)
            if user_f1 is not None:
                f1_sum += user_f1
                count += 1

    if count > 0:
        return f1_sum / count
    else:
        return None


In [40]:
f1_avg_corr = get_f1(avg_predictions_corr, theta)
f1_avg_jmsd = get_f1(avg_predictions_jmsd, theta)
f1_wavg_corr = get_f1(wavg_predictions_corr, theta)
f1_wavg_jmsd = get_f1(wavg_predictions_jmsd, theta)
f1_dfm_corr = get_f1(dfm_predictions_corr, theta)
f1_dfm_jmsd = get_f1(dfm_predictions_jmsd, theta)
print("F1 Avg Prediction with Corr Similarity = " + str(f1_avg_corr))
print("F1 Avg Prediction with JMSD Similarity = " + str(f1_avg_jmsd))
print("F1 Weighted Avg Prediction with Corr Similarity = " + str(f1_wavg_corr))
print("F1 Weighted Avg Prediction with JMSD Similarity = " + str(f1_wavg_jmsd))
print("F1 Dev from Mean Prediction with Corr Similarity = " + str(f1_dfm_corr))
print("F1 Dev from Mean Prediction with JMSD Similarity = " + str(f1_dfm_jmsd))

F1 Avg Prediction with Corr Similarity = 0.7232889376936995
F1 Avg Prediction with JMSD Similarity = 0.5371481861383391
F1 Weighted Avg Prediction with Corr Similarity = 0.7196306823687777
F1 Weighted Avg Prediction with JMSD Similarity = 0.5359233759475605
F1 Dev from Mean Prediction with Corr Similarity = 0.7151756620804238
F1 Dev from Mean Prediction with JMSD Similarity = 0.5241469868215689


In [41]:
def get_ordered_test_items(u):
    rated_items = np.where(~np.isnan(test_ratings_np[u]))[0]
    ratings = test_ratings_np[u, rated_items]
    ordered_indices = rated_items[np.argsort(-ratings)]
    return ordered_indices

def get_user_idcg(u):
    items = get_ordered_test_items(u)
    if len(items) == 0:
        return 0
    ratings = test_ratings_np[u, items]
    positions = np.arange(1, len(items) + 1)
    idcg = np.sum((2**ratings - 1) / np.log2(positions + 1))
    return idcg

def get_user_dcg(u, recommendations):
    recommendations = np.array([i for i in recommendations if i is not None])
    known_mask = ~np.isnan(test_ratings_np[u, recommendations])
    valid_recs = recommendations[known_mask]
    if len(valid_recs) == 0:
        return 0
    ratings = test_ratings_np[u, valid_recs]
    positions = np.arange(1, len(valid_recs) + 1)
    dcg = np.sum((2**ratings - 1) / np.log2(positions + 1))
    return dcg

def get_user_ndcg(u, predictions):
    recommendations = get_recommendations(predictions[u], N)
    dcg = get_user_dcg(u, recommendations)
    idcg = get_user_idcg(u)
    if idcg == 0:
        return 0
    else:
        return dcg / idcg

def get_ndcg(predictions):
    ndcg_sum = 0
    count = 0
    for u in range(NUM_USERS):
        if has_test_ratings(u):
            user_ndcg = get_user_ndcg(u, predictions)
            if user_ndcg is not None:
                ndcg_sum += user_ndcg
                count += 1
    return ndcg_sum / count if count > 0 else None

In [42]:
ndcg_avg_corr = get_ndcg(avg_predictions_corr)
ndcg_avg_jmsd = get_ndcg(avg_predictions_jmsd)
ndcg_wavg_corr = get_ndcg(wavg_predictions_corr)
ndcg_wavg_jmsd = get_ndcg(wavg_predictions_jmsd)
ndcg_dfm_corr = get_ndcg(dfm_predictions_corr)
ndcg_dfm_jmsd = get_ndcg(dfm_predictions_jmsd)

print(f"nDCG Avg Prediction with Corr Similarity = {ndcg_avg_corr}")
print(f"nDCG Avg Prediction with JMSD Similarity = {ndcg_avg_jmsd}")
print(f"nDCG Weighted Avg Prediction with Corr Similarity = {ndcg_wavg_corr}")
print(f"nDCG Weighted Avg Prediction with JMSD Similarity = {ndcg_wavg_jmsd}")
print(f"nDCG Dev from Mean Prediction with Corr Similarity = {ndcg_dfm_corr}")
print(f"nDCG Dev from Mean Prediction with JMSD Similarity = {ndcg_dfm_jmsd}")

nDCG Avg Prediction with Corr Similarity = 0.4188074338619339
nDCG Avg Prediction with JMSD Similarity = 0.5485775755401524
nDCG Weighted Avg Prediction with Corr Similarity = 0.4178047157483982
nDCG Weighted Avg Prediction with JMSD Similarity = 0.5489139343403171
nDCG Dev from Mean Prediction with Corr Similarity = 0.41508830995089063
nDCG Dev from Mean Prediction with JMSD Similarity = 0.5450478654538947


## 2. -- Matrix Factorization --

In [44]:
ratings_np = np.array(ratings, dtype=float)
test_ratings_np = np.array(test_ratings, dtype=float)

ratings_np = np.where(ratings_np == None, np.nan, ratings_np)
test_ratings_np = np.where(test_ratings_np == None, np.nan, test_ratings_np)

In [46]:
NUM_FACTORS = 7
LEARNING_RATE = 0.001 # gamma
REGULARIZATION = 0.1 # lambda

p = np.random.rand(NUM_USERS, NUM_FACTORS)  # user itemas
q = np.random.rand(NUM_ITEMS, NUM_FACTORS)  # matrix items

In [47]:
def compute_prediction(p_u, q_i):
    return np.dot(p_u, q_i)

In [48]:
NUM_ITERATIONS = 10

for it in range(NUM_ITERATIONS):
  print("Iteración " + str(it + 1) + " de " + str(NUM_ITERATIONS))

  updated_p = list(p) # clone p matrix
  updated_q = list(q) # clone q matrix

  for u in range(NUM_USERS):
    for i in range(NUM_ITEMS):
      if ratings[u][i] != None:

        prediction = compute_prediction(p[u], q[i])
        rating = ratings[u][i]
        error = rating - prediction

        for k in range(NUM_FACTORS):
          updated_p[u][k] += LEARNING_RATE * (error * q[i][k] - REGULARIZATION * p[u][k])
          updated_q[i][k] += LEARNING_RATE * (error * p[u][k] - REGULARIZATION * q[i][k])

  p = updated_p
  q = updated_q

Iteración 1 de 10
Iteración 2 de 10
Iteración 3 de 10
Iteración 4 de 10
Iteración 5 de 10
Iteración 6 de 10
Iteración 7 de 10
Iteración 8 de 10
Iteración 9 de 10
Iteración 10 de 10


In [49]:
N = 5

def get_recommendations(predictions, N=10):
    predictions = np.array(predictions)
    
    predictions = np.where(predictions == None, -np.inf, predictions)

    top_indices = np.argpartition(-predictions, N)[:N]
    
    top_indices = top_indices[np.argsort(-predictions[top_indices])]
    return top_indices.tolist()

In [50]:
predictions = np.full((NUM_USERS, NUM_ITEMS), np.nan)

for u in range(NUM_USERS):
    for i in range(NUM_ITEMS):
        if test_ratings[u][i] is not None:
            predictions[u, i] = compute_prediction(p[u], q[i])

In [51]:
test_ratings_np = np.array([[np.nan if x is None else x for x in row] for row in test_ratings], dtype=np.float64)
predictions_np = np.array([[np.nan if x is None else x for x in row] for row in predictions], dtype=np.float64)

def get_user_mae(u, predictions_np):
    mask = ~np.isnan(test_ratings_np[u]) & ~np.isnan(predictions_np[u])
    if np.any(mask):
        errors = np.abs(test_ratings_np[u][mask] - predictions_np[u][mask])
        return errors.mean()
    else:
        return None

def get_mae(predictions_np):
    maes = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_mae = get_user_mae(u, predictions_np)
            if user_mae is not None:
                maes.append(user_mae)
    if maes:
        return np.mean(maes)
    else:
        return None

In [52]:
def get_user_rmse(u, predictions):
    mask = ~np.isnan(test_ratings_np[u]) & ~np.isnan(predictions[u])
    if np.any(mask):
        errors = test_ratings_np[u][mask] - predictions[u][mask]
        mse = np.mean(errors ** 2)
        return np.sqrt(mse)
    else:
        return None

def get_rmse(predictions):
    rmses = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])): 
            user_rmse = get_user_rmse(u, predictions)
            if user_rmse is not None:
                rmses.append(user_rmse)

    if len(rmses) > 0:
        return np.mean(rmses)
    else:
        return None

In [53]:
theta = 4

def get_user_precision(u, predictions, theta):
    precision = 0
    count = 0
    recommendations = get_recommendations(predictions[u], N)

    for i in recommendations:
        if i is not None and not np.isnan(test_ratings_np[u, i]):
            if test_ratings_np[u, i] >= theta:
                precision += 1
            count += 1

    if count > 0:
        return precision / count
    else:
        return None

def get_precision(predictions, theta):
    precisions = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_precision = get_user_precision(u, predictions, theta)
            if user_precision is not None:
                precisions.append(user_precision)

    if len(precisions) > 0:
        return np.mean(precisions)
    else:
        return None

In [54]:
def get_user_recall(u, predictions, theta):
    recall = 0
    count = 0
    recommendations = get_recommendations(predictions[u], N)

    for i in range(NUM_ITEMS):
        if not np.isnan(test_ratings_np[u, i]) and not np.isnan(predictions[u, i]):
            if test_ratings_np[u, i] >= theta:
                recall += 1 if i in recommendations else 0
                count += 1

    if count > 0:
        return recall / count
    else:
        return None

def get_recall(predictions, theta):
    recalls = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_recall = get_user_recall(u, predictions, theta)
            if user_recall is not None:
                recalls.append(user_recall)

    if len(recalls) > 0:
        return np.mean(recalls)
    else:
        return None

In [55]:
def get_user_f1(u, predictions):
    precision = get_user_precision(u, predictions, theta)
    recall = get_user_recall(u, predictions, theta)

    if precision is None or recall is None:
        return None
    elif precision == 0 and recall == 0:
        return 0.0
    else:
        return 2 * precision * recall / (precision + recall)

def get_f1(predictions):
    user_f1_scores = np.array([get_user_f1(u, predictions) for u in range(NUM_USERS)], dtype=np.float64)
    valid_f1_scores = user_f1_scores[~np.isnan(user_f1_scores)]

    return np.mean(valid_f1_scores) if valid_f1_scores.size > 0 else None


In [56]:
def get_ordered_test_items(u):
    ratings_u = np.array(test_ratings[u], dtype=np.float64)
    valid_indices = np.where(~np.isnan(ratings_u))[0]
    sorted_indices = valid_indices[np.argsort(-ratings_u[valid_indices])]
    return sorted_indices.tolist()

def get_user_idcg(u):
    items = get_ordered_test_items(u)
    if len(items) == 0:
        return 0
    ratings = np.array([test_ratings[u][i] for i in items], dtype=np.float64)
    positions = np.arange(1, len(items) + 1)
    idcg = np.sum((2 ** ratings - 1) / np.log2(positions + 1))
    return idcg

def get_user_dcg(u, recommendations):
    valid_recs = [i for i in recommendations if i is not None and test_ratings[u][i] is not None]
    if len(valid_recs) == 0:
        return 0
    ratings = np.array([test_ratings[u][i] for i in valid_recs], dtype=np.float64)
    positions = np.arange(1, len(valid_recs) + 1)
    dcg = np.sum((2 ** ratings - 1) / np.log2(positions + 1))
    return dcg

def get_user_ndcg(u, predictions):
    recommendations = get_recommendations(predictions[u])
    dcg = get_user_dcg(u, recommendations)
    idcg = get_user_idcg(u)
    return dcg / idcg if idcg > 0 else 0

def get_ndcg(predictions):
    ndcgs = np.array([get_user_ndcg(u, predictions) for u in range(NUM_USERS)], dtype=np.float64)
    return np.mean(ndcgs[~np.isnan(ndcgs)]) if np.any(~np.isnan(ndcgs)) else None

In [57]:
predictions = np.array([[np.nan if x is None else x for x in row] for row in predictions], dtype=np.float64)

mae = get_mae(predictions)
rmse = get_rmse(predictions)
precision = get_precision(predictions, theta)
recall = get_recall(predictions, theta)
f1 = get_f1(predictions)
ndcg = get_ndcg(predictions)
print("MAE = " + str(mae))
print("RMSE = " + str(rmse))
print("Precision = " + str(precision))
print("Recall = " + str(recall))
print("F1 = " + str(f1))
print("nDCG = " + str(ndcg))

MAE = 0.9160203748609498
RMSE = 1.0865300065270902
Precision = 0.7109401709401709
Recall = 0.5305556466727257
F1 = 0.5275183811188838
nDCG = 0.14891946204071588


In [58]:
def compute_biased_prediction(avg, b_u, b_i, p_u, q_i):
    deviation = np.dot(p_u, q_i)
    prediction = avg + b_u + b_i + deviation
    return prediction


ratings_np = np.array(ratings, dtype=np.float64)
rating_average = np.nanmean(ratings_np)

In [59]:
p = np.random.rand(NUM_USERS, NUM_FACTORS)
q = np.random.rand(NUM_ITEMS, NUM_FACTORS)

bu = np.random.rand(NUM_USERS)
bi = np.random.rand(NUM_ITEMS)

In [60]:
for it in range(NUM_ITERATIONS):
    print("Iteración " + str(it + 1) + " de " + str(NUM_ITERATIONS))

    updated_p = p.copy()  # clone p matrix
    updated_q = q.copy()  # clone q matrix

    updated_bu = bu.copy()  # clone bu vector
    updated_bi = bi.copy()  # clone bi vector

    for u in range(NUM_USERS):
        for i in range(NUM_ITEMS):
            if ratings[u][i] is not None:
                prediction = rating_average + bu[u] + bi[i] + np.dot(p[u], q[i])
                rating = ratings[u][i]
                error = rating - prediction

                updated_p[u] += LEARNING_RATE * (error * q[i] - REGULARIZATION * p[u])
                updated_q[i] += LEARNING_RATE * (error * p[u] - REGULARIZATION * q[i])

                updated_bu[u] += LEARNING_RATE * (error - REGULARIZATION * bu[u])
                updated_bi[i] += LEARNING_RATE * (error - REGULARIZATION * bi[i])

    p = updated_p
    q = updated_q
    bu = updated_bu
    bi = updated_bi

Iteración 1 de 10
Iteración 2 de 10
Iteración 3 de 10
Iteración 4 de 10
Iteración 5 de 10
Iteración 6 de 10
Iteración 7 de 10
Iteración 8 de 10
Iteración 9 de 10
Iteración 10 de 10


In [None]:
predictions = np.full((NUM_USERS, NUM_ITEMS), None)

for u in range(NUM_USERS):
    for i in range(NUM_ITEMS):
        if test_ratings[u][i] is not None:
            predictions[u, i] = rating_average + bu[u] + bi[i] + np.dot(p[u], q[i])

In [None]:
test_ratings_np = np.array([[np.nan if x is None else x for x in row] for row in test_ratings])
predictions_np = np.array([[np.nan if x is None else x for x in row] for row in predictions])

def get_precision(predictions_np, threshold=3.5):
    relevant = test_ratings_np >= threshold
    recommended = predictions_np >= threshold
    true_positive = np.logical_and(relevant, recommended).sum()
    predicted_positive = recommended.sum()
    return true_positive / predicted_positive if predicted_positive > 0 else None

def get_recall(predictions_np, threshold=3.5):
    relevant = test_ratings_np >= threshold
    recommended = predictions_np >= threshold
    true_positive = np.logical_and(relevant, recommended).sum()
    actual_positive = relevant.sum()
    return true_positive / actual_positive if actual_positive > 0 else None

def get_f1(predictions_np, threshold=3.5):
    precision = get_precision(predictions_np, threshold)
    recall = get_recall(predictions_np, threshold)
    if precision is None or recall is None:
        return None
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)

mae = get_mae(predictions_np)
rmse = get_rmse(predictions_np)
precision = get_precision(predictions_np)
recall = get_recall(predictions_np)
f1 = get_f1(predictions_np)
ndcg = get_ndcg(predictions) 

print(f"MAE = {mae}")
print(f"RMSE = {rmse}")
print(f"Precision = {precision}")
print(f"Recall = {recall}")
print(f"F1 = {f1}")
print(f"nDCG = {ndcg}")


MAE = 0.9160203748609498
RMSE = 1.0865300065270902
Precision = 0.760725075528701
Recall = 0.5638154948499776
F1 = 0.6476337448559671
nDCG = 0.14891946204071588


## 3. -- Bernouilli matrix factorization --

In [62]:
SCORES = [1, 2, 3, 4, 5]

U = np.random.rand(len(SCORES), NUM_USERS, NUM_FACTORS)
V = np.random.rand(len(SCORES), NUM_ITEMS, NUM_FACTORS)

In [74]:
def logit (x):
  return 1 / (1 + np.exp(-x))

def compute_prediction(u, i):
    dots = np.array([np.dot(U[s][u], V[s][i]) for s in range(len(SCORES))])
    probs = logit(dots)
    
    s_max = np.argmax(probs)
    
    prediction = SCORES[s_max]
    prob = probs[s_max]
    return prediction, prob

In [75]:
NUM_ITERATIONS = 10

for it in range(NUM_ITERATIONS):
    print("Iteración " + str(it + 1) + " de " + str(NUM_ITERATIONS))

    for s in range(len(SCORES)):

        # update users
        for u in range(NUM_USERS):
            delta = [0.0] * NUM_FACTORS
            for i in range(NUM_ITEMS):
                if ratings[u][i] is not None:
                    dot = 0
                    for f in range(NUM_FACTORS):
                        dot += U[s][u][f] * V[s][i][f]

                    for f in range(NUM_FACTORS):
                        if ratings[u][i] == SCORES[s]:
                            delta[f] += (1 - logit(dot)) * V[s][i][f]
                        else:
                            delta[f] -= logit(dot) * V[s][i][f]

            for f in range(NUM_FACTORS):
                U[s][u][f] += LEARNING_RATE * (delta[f] - REGULARIZATION * U[s][u][f])

        # update items
        for i in range(NUM_ITEMS):
            theta = [0.0] * NUM_FACTORS
            for u in range(NUM_USERS):
                if ratings[u][i] is not None:
                    dot = 0
                    for f in range(NUM_FACTORS):
                        dot += U[s][u][f] * V[s][i][f]

                    for f in range(NUM_FACTORS):
                        if ratings[u][i] == SCORES[s]:
                            theta[f] += (1 - logit(dot)) * U[s][u][f]
                        else:
                            theta[f] -= logit(dot) * U[s][u][f]

            for f in range(NUM_FACTORS):
                V[s][i][f] += LEARNING_RATE * (theta[f] - REGULARIZATION * V[s][i][f])

Iteración 1 de 10
Iteración 2 de 10
Iteración 3 de 10
Iteración 4 de 10
Iteración 5 de 10
Iteración 6 de 10
Iteración 7 de 10
Iteración 8 de 10
Iteración 9 de 10
Iteración 10 de 10


In [76]:
N = 5

def get_recommendations(predictions, N=10):
    predictions = np.array(predictions)
    # Convertimos los None en -inf para que no se seleccionen
    predictions = np.where(predictions == None, -np.inf, predictions)
    # Obtenemos los índices de los N valores más altos
    top_indices = np.argpartition(-predictions, N)[:N]
    # Ordenamos los N mejores por valor descendente
    top_indices = top_indices[np.argsort(-predictions[top_indices])]
    return top_indices.tolist()

In [None]:
predictions = [[None for _ in range(NUM_ITEMS)] for _ in range(NUM_USERS)]

for u in range(NUM_USERS):
    for i in range(NUM_ITEMS):
        if not np.isnan(test_ratings_np[u, i]):  # <-- esta es la forma correcta
            pred, prob = compute_prediction(u, i)
            predictions[u][i] = pred

In [78]:
def get_user_mae(u, predictions_np):
    mask = ~np.isnan(test_ratings_np[u]) & ~np.isnan(predictions_np[u])
    if np.any(mask):
        errors = np.abs(test_ratings_np[u][mask] - predictions_np[u][mask])
        return errors.mean()
    else:
        return None

def get_mae(predictions_np):
    maes = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_mae = get_user_mae(u, predictions_np)
            if user_mae is not None:
                maes.append(user_mae)
    if maes:
        return np.mean(maes)
    else:
        return None

In [79]:
def get_user_rmse(u, predictions):
    mask = ~np.isnan(test_ratings_np[u]) & ~np.isnan(predictions[u])
    if np.any(mask):
        errors = test_ratings_np[u][mask] - predictions[u][mask]
        mse = np.mean(errors ** 2)
        return np.sqrt(mse)
    else:
        return None

def get_rmse(predictions):
    rmses = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):  
            user_rmse = get_user_rmse(u, predictions)
            if user_rmse is not None:
                rmses.append(user_rmse)

    if len(rmses) > 0:
        return np.mean(rmses)
    else:
        return None

In [80]:
theta = 4

def get_user_precision(u, predictions, theta):
    precision = 0
    count = 0
    recommendations = get_recommendations(predictions[u], N)

    for i in recommendations:
        if i is not None and not np.isnan(test_ratings_np[u, i]):
            if test_ratings_np[u, i] >= theta:
                precision += 1
            count += 1

    if count > 0:
        return precision / count
    else:
        return None

def get_precision(predictions, theta):
    precisions = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_precision = get_user_precision(u, predictions, theta)
            if user_precision is not None:
                precisions.append(user_precision)

    if len(precisions) > 0:
        return np.mean(precisions)
    else:
        return None

In [81]:
def get_user_recall(u, predictions, theta):
    recall = 0
    count = 0
    recommendations = get_recommendations(predictions[u], N)

    for i in range(NUM_ITEMS):
        if not np.isnan(test_ratings_np[u, i]) and not np.isnan(predictions[u, i]):
            if test_ratings_np[u, i] >= theta:
                recall += 1 if i in recommendations else 0
                count += 1

    if count > 0:
        return recall / count
    else:
        return None

def get_recall(predictions, theta):
    recalls = []
    for u in range(NUM_USERS):
        if np.any(~np.isnan(test_ratings_np[u])):
            user_recall = get_user_recall(u, predictions, theta)
            if user_recall is not None:
                recalls.append(user_recall)

    if len(recalls) > 0:
        return np.mean(recalls)
    else:
        return None

In [82]:
def get_user_f1(u, predictions):
    precision = get_user_precision(u, predictions, theta)
    recall = get_user_recall(u, predictions, theta)

    if precision is None or recall is None:
        return None
    elif precision == 0 and recall == 0:
        return 0.0
    else:
        return 2 * precision * recall / (precision + recall)

def get_f1(predictions):
    user_f1_scores = np.array([get_user_f1(u, predictions) for u in range(NUM_USERS)], dtype=np.float64)
    valid_f1_scores = user_f1_scores[~np.isnan(user_f1_scores)]

    return np.mean(valid_f1_scores) if valid_f1_scores.size > 0 else None

In [83]:
def get_ordered_test_items(u):
    ratings_u = np.array(test_ratings[u], dtype=np.float64)
    valid_indices = np.where(~np.isnan(ratings_u))[0]
    sorted_indices = valid_indices[np.argsort(-ratings_u[valid_indices])]
    return sorted_indices.tolist()

def get_user_idcg(u):
    items = get_ordered_test_items(u)
    if len(items) == 0:
        return 0
    ratings = np.array([test_ratings[u][i] for i in items], dtype=np.float64)
    positions = np.arange(1, len(items) + 1)
    idcg = np.sum((2 ** ratings - 1) / np.log2(positions + 1))
    return idcg

def get_user_dcg(u, recommendations):
    valid_recs = [i for i in recommendations if i is not None and test_ratings[u][i] is not None]
    if len(valid_recs) == 0:
        return 0
    ratings = np.array([test_ratings[u][i] for i in valid_recs], dtype=np.float64)
    positions = np.arange(1, len(valid_recs) + 1)
    dcg = np.sum((2 ** ratings - 1) / np.log2(positions + 1))
    return dcg

def get_user_ndcg(u, predictions):
    recommendations = get_recommendations(predictions[u])
    dcg = get_user_dcg(u, recommendations)
    idcg = get_user_idcg(u)
    return dcg / idcg if idcg > 0 else 0

def get_ndcg(predictions):
    ndcgs = np.array([get_user_ndcg(u, predictions) for u in range(NUM_USERS)], dtype=np.float64)
    return np.mean(ndcgs[~np.isnan(ndcgs)]) if np.any(~np.isnan(ndcgs)) else None

In [85]:
predictions = np.array([[np.nan if x is None else x for x in row] for row in predictions], dtype=np.float64)

mae = get_mae(predictions)
rmse = get_rmse(predictions)
precision = get_precision(predictions, theta)
recall = get_recall(predictions, theta)
f1 = get_f1(predictions)
ndcg = get_ndcg(predictions)
print("MAE = " + str(mae))
print("RMSE = " + str(rmse))
print("Precision = " + str(precision))
print("Recall = " + str(recall))
print("F1 = " + str(f1))
print("nDCG = " + str(ndcg))

MAE = 1.1194441566401367
RMSE = 1.4539904914734938
Precision = 0.6104273504273504
Recall = 0.4976023732633204
F1 = 0.4799430595549093
nDCG = 0.1340347281437326


## Neural network

In [86]:
from keras.models import Model
from keras.layers import Embedding, Flatten, Input, Dense, Concatenate, Dot

In [87]:
NUM_USERS = 943
NUM_ITEMS = 1682

X_train = [np.array([], dtype=int), np.array([], dtype=int)]
y_train = np.array([], dtype=int)

training_file = urllib.request.urlopen("https://drive.upm.es/s/tDdluElfGInyUnU/download")
for line in training_file:
  [u, i, rating] = line.decode("utf-8").split("::")
  X_train[0] = np.append(X_train[0], int(u))
  X_train[1] = np.append(X_train[1], int(i))
  y_train = np.append(y_train, int(rating))

In [88]:
X_test = [np.array([], dtype=int), np.array([], dtype=int)]
y_test = np.array([], dtype=int)

test_file = urllib.request.urlopen("https://drive.upm.es/s/Jn75Vg6okOPsgZu/download")
for line in test_file:
  [u, i, rating] = line.decode("utf-8").split("::")
  X_test[0] = np.append(X_test[0], int(u))
  X_test[1] = np.append(X_test[1], int(i))
  y_test = np.append(y_test, int(rating))

In [89]:
latent_dim = 5
epochs = 10

In [90]:
user_input = Input(shape=[1])
user_embedding = Embedding(NUM_USERS, latent_dim)(user_input)
user_vec = Flatten()(user_embedding)

item_input = Input(shape=[1])
item_embedding =  Embedding(NUM_ITEMS, latent_dim)(item_input)
item_vec = Flatten()(item_embedding)

output = Dot(axes=1)([user_vec, item_vec])

GMF = Model([user_input, item_input], output)

In [91]:
GMF.compile(optimizer='adam', metrics=['mae'], loss='mean_squared_error')
GMF.summary()
GMF.fit(X_train, y_train, epochs=epochs, verbose=1)

Epoch 1/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 492us/step - loss: 12.5977 - mae: 3.3498
Epoch 2/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 492us/step - loss: 2.8906 - mae: 1.3788
Epoch 3/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 543us/step - loss: 1.3197 - mae: 0.8961
Epoch 4/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 468us/step - loss: 1.0253 - mae: 0.7908
Epoch 5/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 494us/step - loss: 0.9348 - mae: 0.7586
Epoch 6/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 508us/step - loss: 0.9022 - mae: 0.7467
Epoch 7/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 489us/step - loss: 0.8824 - mae: 0.7395
Epoch 8/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 447us/step - loss: 0.8779 - mae: 0.7373
Epoch 9/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x319d8aff0>

In [92]:
y_pred = GMF.predict(X_test)
y_pred

[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 517us/step


array([[3.5138578],
       [2.714233 ],
       [2.277063 ],
       ...,
       [3.2852485],
       [3.6452658],
       [3.394258 ]], dtype=float32)

In [93]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_pred)

0.7732337976556947

In [94]:
user_input = Input(shape=[1])
user_embedding = Embedding(NUM_USERS, latent_dim)(user_input)
user_vec = Flatten()(user_embedding)

item_input = Input(shape=[1])
item_embedding = Embedding(NUM_ITEMS, latent_dim)(item_input)
item_vec = Flatten()(item_embedding)

concat = Concatenate(axis=1)([user_vec, item_vec])

layer1 = Dense(20, 'relu')(concat)
layer2 = Dense(10, 'relu')(layer1)
output = Dense(1, activation='relu')(layer2)

MLP = Model([user_input, item_input], output)

In [95]:
MLP.compile(optimizer='adam', metrics=['mae'], loss='mean_squared_error')
MLP.summary()
MLP.fit(X_train, y_train, epochs=epochs, verbose=1)

Epoch 1/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 567us/step - loss: 2.5392 - mae: 1.1681
Epoch 2/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 566us/step - loss: 0.8735 - mae: 0.7401
Epoch 3/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 531us/step - loss: 0.8641 - mae: 0.7358
Epoch 4/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 495us/step - loss: 0.8408 - mae: 0.7237
Epoch 5/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 552us/step - loss: 0.8179 - mae: 0.7112
Epoch 6/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 569us/step - loss: 0.8025 - mae: 0.7060
Epoch 7/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 536us/step - loss: 0.7858 - mae: 0.6976
Epoch 8/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 542us/step - loss: 0.7689 - mae: 0.6884
Epoch 9/10
[1m2995/2995[0m [32m━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x31eb08b60>

In [96]:
y_pred = MLP.predict(X_test)
y_pred

[1m131/131[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 694us/step


array([[3.7254906],
       [2.931367 ],
       [2.2511451],
       ...,
       [3.1194272],
       [3.5537028],
       [2.9819207]], dtype=float32)

In [97]:
from sklearn.metrics import mean_absolute_error
mean_absolute_error(y_test, y_pred)

0.7490996851973708