In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
matrix = np.array(pd.read_csv('database.csv'))

In [3]:
def mse(matrix, predictions):
    matrix = matrix.flatten()
    predictions = predictions.flatten()
    nonzero = np.nonzero(matrix)
    return mean_squared_error(matrix[nonzero], predictions[nonzero])

In [4]:
def predict(global_bias, user_bias, item_bias, user_embedding, item_embedding, u, i):
    prediction = global_bias + user_bias[u] + item_bias[i] + np.dot(user_embedding[u],item_embedding[i])
    return prediction

In [5]:
def predictAll(global_bias, user_bias, item_bias, user_embedding, item_embedding):
    n_users = user_embedding.shape[0]
    n_items = item_embedding.shape[0]
    predictions = np.zeros((n_users, n_items))
    for u in range(n_users):
        for i in range(n_items):
            predictions[u,i] = predict(global_bias, user_bias, item_bias, user_embedding, item_embedding, u, i)
    return predictions

In [8]:
def train(ratings, k, learning_rate, regularization, epochs):
    n_users, n_items = ratings.shape
    user_embedding = np.random.normal(scale = 1/k, size=(n_users, k))
    item_embedding = np.random.normal(scale = 1/k, size=(n_items, k))
    global_bias = np.mean(ratings[np.where(ratings != 0)])
    user_bias = np.zeros(n_users)
    item_bias = np.zeros(n_items)
    rows, cols = np.nonzero(ratings)
    for epoch in range(epochs):
        p = np.random.permutation(len(rows))
        rows, cols = rows[p], cols[p]
        for u,i in zip(*(rows,cols)):
            prediction = predict(global_bias, user_bias, item_bias, user_embedding, item_embedding, u, i)
            actual = ratings[u,i]
            e = actual - prediction
            loss = e**2 + regularization*(np.linalg.norm(user_embedding[u]) + np.linalg.norm(item_embedding[i]) + user_bias[u] + item_bias[i])
            user_bias[u] += learning_rate * (e - regularization * user_bias[u])
            item_bias[i] += learning_rate * (e - regularization * item_bias[i])
            user_embedding[u] += learning_rate * (e * item_embedding[i] - regularization * user_embedding[u])
            item_embedding[i] += learning_rate * (e * user_embedding[u] - regularization * item_embedding[i])
    return global_bias, user_bias, item_bias, user_embedding, item_embedding

In [9]:
global_bias, user_bias, item_bias, user_embedding, item_embedding = train(matrix, 40, 0.0001, 0, 1)

In [10]:
predictions = predictAll(global_bias, user_bias, item_bias, user_embedding, item_embedding)

In [11]:
predictions = np.where(np.isnan(predictions), 0, predictions)
predictions = np.where(np.isinf(predictions), 0, predictions)

In [12]:
mse(matrix, predictions)

1.0616311378966825