In [1]:
%reload_ext autoreload
%autoreload 2

import os
import sys
sys.path.insert(1, os.path.join(sys.path[0], '..'))

In [2]:
import numpy as np
import pandas as pd
import time

from src.fitting import train_mf_native, reviews_dataset, evaluate_recommendations
from src.fitting.mf_native import preprocess, get_reviews_by_user, update_X, update_Y, get_loss

In [3]:
reviews_train, reviews_validation, reviews_test, images = reviews_dataset()

In [None]:
train_data=reviews_train
d=20
alpha=.1
beta=.1
max_als_epochs=10
sgd_learning_rate=.1
max_sgd_epochs=1000000
sgd_batch_size=1

train_data, I, tags, n, m, k = preprocess(train_data, images)

image_indices, ratings, start_indices, end_indices = get_reviews_by_user(train_data)

X = np.random.normal(size=(n, d))
Y = np.random.normal(size=(k, d))

for als_epoch in range(max_als_epochs):
    print(f"{als_epoch=}")

    print("Computing X")
    start = time.time()
    update_X(
        X=X,
        Y=Y,
        I=I,
        image_indices=image_indices,
        ratings=ratings,
        start_indices=start_indices,
        end_indices=end_indices,
        n=n,
        d=d,
        alpha=alpha,
    )
    end = time.time()
    print(f"{end - start:.2f}s")

    print("Computing Y")
    start = time.time()
    update_Y(
        X=X,
        Y=Y,
        I=I,
        image_indices=image_indices,
        ratings=ratings,
        start_indices=start_indices,
        end_indices=end_indices,
        n=n,
        k=k,
        d=d,
        beta=beta,
        learning_rate=sgd_learning_rate,
        max_epochs=max_sgd_epochs,
        batch_size=sgd_batch_size,
    )
    end = time.time()
    print(f"{end - start:.2f}s")

    print("Computing loss")
    start = time.time()
    loss = get_loss(
        X=X,
        Y=Y,
        I=I,
        image_indices=image_indices,
        ratings=ratings,
        start_indices=start_indices,
        end_indices=end_indices,
        n=n,
        k=k,
        d=d,
        alpha=alpha,
        beta=beta,
    )
    print(f"{loss=}")
    end = time.time()
    print(f"{end - start:.2f}s")

als_epoch=0
Computing X
136.79s
Computing Y
epoch=100000, elapsed: 18.14s, average: 18.14s, delta_Y_size=1.19e+04, Y_size=2.51e+04, percent_change=4.74e-01
epoch=200000, elapsed: 36.33s, average: 18.17s, delta_Y_size=8.54e+03, Y_size=2.28e+04, percent_change=3.74e-01
epoch=300000, elapsed: 53.83s, average: 17.94s, delta_Y_size=7.04e+03, Y_size=2.10e+04, percent_change=3.35e-01
epoch=400000, elapsed: 72.02s, average: 18.00s, delta_Y_size=6.26e+03, Y_size=1.95e+04, percent_change=3.21e-01
epoch=500000, elapsed: 90.52s, average: 18.10s, delta_Y_size=5.61e+03, Y_size=1.83e+04, percent_change=3.06e-01
epoch=600000, elapsed: 108.59s, average: 18.10s, delta_Y_size=5.12e+03, Y_size=1.73e+04, percent_change=2.96e-01
epoch=700000, elapsed: 126.14s, average: 18.02s, delta_Y_size=4.88e+03, Y_size=1.65e+04, percent_change=2.96e-01
epoch=800000, elapsed: 143.64s, average: 17.95s, delta_Y_size=4.44e+03, Y_size=1.57e+04, percent_change=2.83e-01
epoch=900000, elapsed: 161.02s, average: 17.89s, delta_Y_

In [None]:
recommender = train_mf_native(
    train_data=reviews_train,
    images=images,
    d=20,
    alpha=.1,
    beta=.1,
    max_als_epochs=10,
    sgd_learning_rate=.1,
    max_sgd_epochs=1000000,
    sgd_batch_size=1,
)

als_epoch=0
Computing X


In [None]:
reviews_validation_by_user = reviews_validation.groupby("user_index").apply(lambda group: (group["image_index"].to_numpy(), group["rating"].to_numpy()))

In [None]:
validation_loss = 1/n * sum(1 / len(image_ids) * sum((ratings - image_tags[image_ids] @ (Y @ X[u])) ** 2) for u, (image_ids, ratings) in reviews_validation_by_user.items() if len(image_ids) > 0)
validation_loss

In [None]:
recommender = train_knn(reviews_train, word_embedding)

In [None]:
evaluate_recommendations(recommender, reviews_validation)