In [1]:
### Imports
import gzip
from collections import defaultdict
import math
import numpy as np
import string
import random
import string
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import pandas as pd
import time
import itertools
from datetime import datetime
import copy

import warnings
warnings.filterwarnings("ignore")

In [2]:
### EVALUATION / METRICS
######################################
def convert_to_np_array(A):
    """
    If A is not already an array, convert it to an array
    """
    if not isinstance(A, np.ndarray): return np.array(A)
    else: return A

def get_MSE(A, B):
    """
    Given list A and list B:
    Return the mean squared error between A and B
    """
    return np.mean((convert_to_np_array(A) - convert_to_np_array(B))**2)

def inner(A, B):
    """
    Return the dot product between list A and list B
    """
    return np.dot(convert_to_np_array(A), convert_to_np_array(B))

def get_SSE(A, B):
    """
    Given list A and list B:
    Return the sum of squared errors between A and B
    """
    return np.sum((convert_to_np_array(A) - convert_to_np_array(B))**2)

def get_SE(A,B):
    """
    Given list A and list B:
    Return the squared error between each element
    """
    return (convert_to_np_array(A) - convert_to_np_array(B))**2

def get_accuracy(A,B):
    """
    Given list A and list B:
    Return the accuracy
    """
    return np.sum(convert_to_np_array(A) == convert_to_np_array(B)) / len(A)

def get_BER(y_actual, y_predicted):
    """
    "Return the balanced error rate between positive (1) and negative(0) instances
    """

    TP, FP, TN, FN = 0, 0, 0, 0
    n_pos, n_neg = 0, 0
    for actual, pred in zip(y_actual, y_predicted):
        if actual==1:
            n_pos += 1
            if actual==pred:
                TP += 1
            else:
                FN += 1
        else:
            n_neg += 1
            if actual==pred:
                TN += 1
            else:
                FP += 1
    FPR = FP / (FP + TN)
    FNR = FN / (FN + TP)
        
    return (1/2) * (FPR + FNR)

def get_errorMetrics_binary(y_actual, y_predicted, beta=1):
    """
    Return a set of error metrics between positive (1) and negative (0) instances
    This is valid for a binary class case
    Return a dictionary containing all calculated values
    """

    output = {}
    TP, FP, TN, FN = 0, 0, 0, 0
    n_pos, n_neg = 0, 0
    for actual, pred in zip(y_actual, y_predicted):
        if actual==1:
            n_pos += 1
            if actual==pred:
                TP += 1
            else:
                FN += 1
        else:
            n_neg += 1
            if actual==pred:
                TN += 1
            else:
                FP += 1
    ###
    TPR, FNR = TP / n_pos, FN / n_pos
    FPR, TNR = FP / n_neg, TN / n_neg
    prec = TP / (TP + FP)
    recall = TP / (TP + FN)
    output["TP"], output["FP"], output["TN"], output["FN"] = TP, FP, TN, FN
    output["TPR"], output["FPR"], output["FNR"], output["TNR"] = TPR, FPR, FNR, TNR
    output["precision"], output["recall"] = prec, recall
    output["BER"] = (1/2) * (FPR + FNR)
    output[f"F{beta}_Score"] = (1 + beta**2) * (prec * recall) / ((beta**2)*prec + recall)
    output["F_Score"] = 2 * (prec * recall) / (prec + recall)

    return output

In [3]:
### FACTORIZATION MACHINE
#
#

def initialize_weighted_params(user_weights, item_weights, k, init_bounds):
    """
    Return the initialized user/item bias/gamma parameters
    --> Add a secondary term to multiply values from the data to each bias
    """
    lower, upper = init_bounds
    user_bias, item_bias, user_gamma, item_gamma = {}, {}, {}, {}
    #
    weighted_users, weighted_items = [], []
    user_count, item_count = len(user_weights), len(item_weights)
    if user_count > 0: 
        weighted_users = [(key,val) for key,val in user_weights.items()]
        for u_id, weight in weighted_users:
            user_bias[u_id] = (random.uniform(lower,upper), weight)
            user_gamma[u_id] = (np.array([random.uniform(lower,upper) for ki in range(k)]), weight)
    if item_count > 0: 
        weighted_items = [(key,val) for key,val in item_weights.items()]
        for item_id, weight in weighted_items:
            item_bias[item_id] = (random.uniform(lower, upper), weight)
            item_gamma[item_id] = (np.array([random.uniform(lower,upper) for ki in range(k)]), weight)

    return (user_bias, item_bias, user_gamma, item_gamma)

def get_id_weights(df, id_col, weight_col=None, weight_type=None, only_nonzero=False):
    """
    Return the weights for id_col based on stats
    """
    if weight_type==1:
        # Return 1 as the weight for all ids in id_col
        return {key:1 for key in df[id_col].unique()}
    else:
        if only_nonzero:
            df = df[df[weight_col] != 0][[id_col, weight_col]]
        return {key:val for key,val in zip(df[id_col], df[weight_col])}

def predict_latent_factor_batch(Xtrain, theta, k, value_bounds=None, avgParams=None, training=None):
    """
    Return the prediction based on u_id and item_id
    If u_id is not in itemsPerUser --> Use the average gamma vector
    Repeat this process for if item_id is not in usersPerItem

    Bound the output by the min and max of the possible values
    (ex: Model shouldn't exceed 5 when the scale is 5)
    """
    n_rows = len(Xtrain)
    if value_bounds is None: value_bounds = (-np.inf, np.inf)
    alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
    #
    bias1, gamma1 = user_bias, user_gamma
    bias2, gamma2 = item_bias, item_gamma
    gbias1, ggamma1 = guser_bias, guser_gamma
    gbias2, ggamma2 = gitem_bias, gitem_gamma
    #
    alphas = np.full(n_rows, alpha)
    biases = np.zeros((n_rows, (1 + len(g_keys))*2))
    gammas = np.zeros((n_rows, (1 + len(g_keys))*2, k))
    for i,tuple in enumerate(Xtrain):
        u_id, item_id = tuple[0], tuple[1]
        #
        biases1 = [bias1] + [gbias1[g_key] if g_key in gbias1 else {} for g_key in g_keys]
        gammas1 = [gamma1] + [ggamma1[g_key] if g_key in ggamma1 else {} for g_key in g_keys]
        biases2 = [bias2] + [gbias2[g_key] if g_key in gbias2 else {} for g_key in g_keys]
        gammas2 = [gamma2] + [ggamma2[g_key] if g_key in ggamma2 else {} for g_key in g_keys]
        #
        biases[i,:] = get_biases(u_id, biases1) + get_biases(item_id, biases2)
        gammas[i,:,:] = get_gammas(u_id, k, gammas1) + get_gammas(item_id, k, gammas2)
    C2 = np.square(np.sum(gammas, axis=1))
    D  = np.sum(np.square(gammas), axis=1)
    gamma_term = (1/2) * (C2 - D).sum(axis=1)
    bias_term = biases.sum(axis=1)
    predictions = alphas + bias_term + gamma_term

    # Adjust for value_bounds:
    predictions[predictions < value_bounds[0]] = value_bounds[0]
    predictions[predictions > value_bounds[1]] = value_bounds[1]

    return predictions

def get_cost_mse(theta, lambdas, Xtrain, ytrain, k, value_bounds=None):
    """
    Calculate the cost for the given theta parameters
    Xtrain must be of form [(user, item), (user, item), ...]
    """
    alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
    lambda_user_bias, lambda_user_gamma = lambdas["lambda_user_bias"], lambdas["lambda_user_gamma"]
    lambda_item_bias, lambda_item_gamma = lambdas["lambda_item_bias"], lambdas["lambda_item_gamma"]
    # Predict using the current theta values
    predictions = predict_latent_factor_batch(Xtrain, theta, k, training=True)
    # Get training mse
    mse = get_MSE(predictions, ytrain)
    # Calculate SSE + regularization
    cost = get_SSE(predictions, ytrain)
    cost += lambda_user_bias * np.sum(np.array([val[0]**2 for val in user_bias.values()]))
    cost += lambda_item_bias * np.sum(np.array([val[0]**2 for val in item_bias.values()]))
    cost += lambda_user_gamma * np.sum(np.array([np.dot(gam[0], gam[0]) for gam in user_gamma.values()]))
    cost += lambda_item_gamma * np.sum(np.array([np.dot(gam[0], gam[0]) for gam in item_gamma.values()]))
    #
    for g_key in g_keys:
        cost += lambda_user_bias * np.sum(np.array([val[0]**2 for val in guser_bias[g_key].values()]))
        cost += lambda_item_bias * np.sum(np.array([val[0]**2 for val in gitem_bias[g_key].values()]))
        cost += lambda_user_gamma * np.sum(np.array([np.dot(gam[0], gam[0]) for gam in guser_gamma[g_key].values()]))
        cost += lambda_item_gamma * np.sum(np.array([np.dot(gam[0], gam[0]) for gam in gitem_gamma[g_key].values()]))

    return (cost, mse)

def get_biases(id, bias_terms):
    return [bias_term[id][0] * bias_term[id][1] if id in bias_term else 0 for bias_term in bias_terms]

def get_gammas(id, k, gamma_terms):
    return [gamma_term[id][0] * gamma_term[id][1] if id in gamma_term else np.zeros(k) for gamma_term in gamma_terms]

def get_gterms(gterms, g_keys):
    return [gterms[g_key] if g_key in gterms else {} for g_key in g_keys]

def get_batches(x, n):
    limit = len(x)
    for i in range(0, limit, n):
        yield x[i:min(i + n, limit)]

def fit_parameters(Xtrain, ytrain, theta, ep=0.005, iter_limit=200, quiet=True, quiet2=True, value_bounds=(-np.inf, np.inf), mini_batch=False, n_mini_batch=None, step=None, **kwargs):
    """
    Fit the parameters until convergence (when difference in cost is less than ep)
    Arguments packed into **kwargs:
    lambda_bias --> Regularization parameter for the user/item biases
    lambda_gamma --> Regularization parameter for the user/item gamma matrix
    k --> Number of latent parameters to use per user/item vector
    ep --> The threshold for early stopping between mse checks
    iter_limit --> The maximum number of iterations allowed (epochs)
    value_bounds --> The expected range of values expected to be outputted by the model
    """
    t_fit = time.time()
    last_mse, last_cost = np.inf, np.inf
    best_params = [theta, last_mse, last_cost]
    epoch_count = 0
    if mini_batch:
        if n_mini_batch is None: n_mini_batch = 5
        batch_size = math.ceil(len(ytrain) / n_mini_batch)

    if not quiet:
        print("Fitting parameters...\n-----")
    while True:
        epoch_count += 1
        ### Update theta
        t_theta = time.time()
        if mini_batch:
            X_y = list(zip(Xtrain, ytrain))
            random.shuffle(X_y)
            Xtrain, ytrain = zip(*X_y)
            for i,(X_batch, y_batch) in enumerate(zip(get_batches(Xtrain, batch_size), get_batches(ytrain, batch_size))):
                print(f"Batch {i + 1}/{n_mini_batch}")
                theta = update_params(theta, lambdas, X_batch, y_batch, k)
        else:
            # theta = update_params_hybrid(theta, lambdas, Xtrain, ytrain, k)
            theta = update_params_coordinateDescent(theta, lambdas, Xtrain, ytrain, k, quiet2=quiet2)
            # theta = update_params_blockCoordinateDescent(theta, lambdas, Xtrain, ytrain, k)
            # theta = update_params_gradientDescent(theta, lambdas, Xtrain, ytrain, k, step=step)

        cost, mse = get_cost_mse(theta, lambdas, Xtrain, ytrain, k, value_bounds)
        if not quiet:
            print(f"-----> Epoch {epoch_count}: Cost = {cost}, Train MSE = {mse}, Time Elapsed = {time.time() - t_theta}")
        ### Save current params as best_params if the mse is less than the last mse
        if mse < best_params[1]:
            best_params = [theta, mse, cost]
        ### Check if cost is too high - sometimes the algorithm diverges
        if mse > 500000:
            theta, mse, cost = best_params
            print(f"Training MSE too high: Best Train MSE = {best_params[1]}, Total Time Elapsed = {time.time() - t_fit}")
            break
        ### Early stop if the ep condition is met
        if (abs(last_mse - mse) > ep):
            last_mse = mse
            last_cost = cost
        else:
            print(f"Convergence after {epoch_count} epochs: Cost = {cost}, Train MSE = {mse}, Total Time Elapsed = {time.time() - t_fit}")
            break
        ### If the iteration limit is reached, stop and return the best parameters
        if epoch_count > iter_limit:
            theta, mse, cost = best_params
            print(f"Iteration limit reached after {epoch_count} epochs: Best Train MSE = {best_params[1]}, Total Time Elapsed = {time.time() - t_fit}")
            break

    return (theta, cost, mse)


In [4]:
### Coordinate Descent for FM --> ALPHA
def update_alpha(theta, lambdas, Xtrain, ytrain, k):
    #
    lambda_user_bias, lambda_user_gamma = lambdas["lambda_user_bias"], lambdas["lambda_user_gamma"]
    lambda_item_bias, lambda_item_gamma = lambdas["lambda_item_bias"], lambdas["lambda_item_gamma"]
    alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
    #
    bias1, gamma1 = user_bias, user_gamma
    bias2, gamma2 = item_bias, item_gamma
    gbias1, ggamma1 = guser_bias, guser_gamma
    gbias2, ggamma2 = gitem_bias, gitem_gamma
    #
    n_rows = len(y_train)
    ratings = np.array(y_train)
    biases = np.zeros((n_rows, (1 + len(g_keys))*2))
    gammas = np.zeros((n_rows, (1 + len(g_keys))*2, k))
    #
    for i,((u_id, item_id), rating) in enumerate(zip(Xtrain, ytrain)):
        # Extract values
        biases1 = [bias1] + [gbias1[g_key] if g_key in gbias1 else {} for g_key in g_keys]
        gammas1 = [gamma1] + [ggamma1[g_key] if g_key in ggamma1 else {} for g_key in g_keys]
        biases2 = [bias2] + [gbias2[g_key] if g_key in gbias2 else {} for g_key in g_keys]
        gammas2 = [gamma2] + [ggamma2[g_key] if g_key in ggamma2 else {} for g_key in g_keys]
        biases[i,:] = get_biases(u_id, biases1) + get_biases(item_id, biases2)
        gammas[i,:,:] = get_gammas(u_id, k, gammas1) + get_gammas(item_id, k, gammas2)
    C2 = np.square(np.sum(gammas, axis=1))
    D  = np.sum(np.square(gammas), axis=1)
    gamma_term = (1/2) * (C2 - D).sum(axis=1)
    bias_term = biases.sum(axis=1)
    alpha = (ratings - bias_term - gamma_term).sum() / n_rows
    return alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys

In [5]:
### Coordinate Descent for FM --> BIASES
def update_biases(theta, lambdas, k, update_type):
    """
    Update bias1 to dbias1
    """
    alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
    lambda_user_bias, lambda_user_gamma = lambdas["lambda_user_bias"], lambdas["lambda_user_gamma"]
    lambda_item_bias, lambda_item_gamma = lambdas["lambda_item_bias"], lambdas["lambda_item_gamma"]
    if update_type == "user":
        dict1, bias1, gamma1 = itemsPerUser, user_bias, user_gamma
        dict2, bias2, gamma2 = usersPerItem, item_bias, item_gamma
        gbias1, ggamma1 = guser_bias, guser_gamma
        gbias2, ggamma2 = gitem_bias, gitem_gamma
        lambda_b = lambda_user_bias
    else:
        dict1, bias1, gamma1 = usersPerItem, item_bias, item_gamma
        dict2, bias2, gamma2 = itemsPerUser, user_bias, user_gamma
        gbias1, ggamma1 = gitem_bias, gitem_gamma
        gbias2, ggamma2 = guser_bias, guser_gamma
        lambda_b = lambda_item_bias
    #
    for id1 in dict1.keys():
        n_rows = len(dict1[id1])
        ratings = np.zeros(n_rows)
        alphas = np.full(n_rows, alpha)
        biases, gammas = np.zeros((n_rows, (1 + len(g_keys))*2)), np.zeros((n_rows, (1 + len(g_keys))*2, k))
        G, G2, H = np.zeros((n_rows,k)), np.zeros((n_rows,k)), np.zeros((n_rows,k))
        # gamma_term = np.zeros(n_rows)
        ### Get id1 terms
        biases1 = get_biases(id1, [bias1] + get_gterms(gbias1, g_keys))
        gammas1 = get_gammas(id1, k, [gamma1] + get_gterms(ggamma1, g_keys))
        #
        for i,(id2, rating) in enumerate(dict1[id1]):
            ### Get id2 terms
            biases2 = get_biases(id2, [bias2] + get_gterms(gbias2, g_keys))
            gammas2 = get_gammas(id2, k, [gamma2] + get_gterms(ggamma2, g_keys))
            ### Extract values
            ratings[i] = rating
            biases[i,:] = np.array(biases1 + biases2)
            gammas_i = np.array(gammas1 + gammas2)
            gammas[i,:,:] = gammas_i
            G[i,:] = np.sum(gammas_i, axis=0)
            G2[i,:] = np.sum(gammas_i, axis=0)**2
            H[i,:]  = np.sum(gammas_i**2, axis=0)
        gamma_term = (1/2) * (G2 - H).sum(axis=1)
        ### Update each bias
        for exclude_ind,exclude_bias in enumerate([None] + g_keys):
            if exclude_bias: 
                if id1 not in gbias1[exclude_bias]: continue
                bias_val = gbias1[exclude_bias][id1][1]
            else: 
                bias_val = bias1[id1][1]
            biases[:,exclude_ind] = np.zeros(n_rows)
            bias_term = biases.sum(axis=1)
            bias_numer = bias_val * (ratings - alphas - bias_term - gamma_term).sum()
            bias_denom = (n_rows * bias_val**2) + lambda_b
            new_bias = bias_numer / bias_denom
            if exclude_bias: 
                gbias1[exclude_bias][id1] = (new_bias, bias_val)
                biases[:,exclude_ind] = np.full(n_rows, new_bias * bias_val)
            else:
                bias1[id1] = (new_bias, bias_val)
                biases[:,exclude_ind] = np.full(n_rows, new_bias * bias_val)

    if update_type == "user":
        return (alpha, bias1, bias2, gamma1, gamma2, gbias1, gbias2, ggamma1, ggamma2, g_keys)
    else:
        return (alpha, bias2, bias1, gamma2, gamma1, gbias2, gbias1, ggamma2, ggamma1, g_keys)

In [6]:
# ### Coordinate Descent for FM --> GAMMA
# def update_gamma(theta, lambdas, k, update_type, exclude_key=None):
#     """
#     Update bias1 to dbias1
#     """
#     alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
#     if update_type == "user":
#         dict1, bias1, gamma1 = itemsPerUser, user_bias, user_gamma
#         dict2, bias2, gamma2 = usersPerItem, item_bias, item_gamma
#         gbias1, ggamma1 = guser_bias, guser_gamma
#         gbias2, ggamma2 = gitem_bias, gitem_gamma
#         lambda_b, lambda_g = lambdas["lambda_user_bias"], lambdas["lambda_user_gamma"]
#     else:
#         dict1, bias1, gamma1 = usersPerItem, item_bias, item_gamma
#         dict2, bias2, gamma2 = itemsPerUser, user_bias, user_gamma
#         gbias1, ggamma1 = gitem_bias, gitem_gamma
#         gbias2, ggamma2 = guser_bias, guser_gamma
#         lambda_b, lambda_g = lambdas["lambda_item_bias"], lambdas["lambda_item_gamma"]
#     #
#     if exclude_key:
#         gamma = ggamma1[exclude_key].copy()
#         exclude_ind = g_keys.index(exclude_key)
#         print(exclude_ind)
#     else:
#         gamma = gamma1.copy()
#     # print(exclude_key, exclude_ind)
#     for id1 in gamma.keys():
#         n_rows = len(dict1[id1])
#         gamma_val = gamma[id1][1]
#         #
#         numer_term = 0
#         denom_term = 0
#         # Get id1 terms
#         bias1_0, gamma1_0 = get_bias(id1, bias1), get_gamma(id1, k, gamma1)
#         gbiases1, ggammas1 = get_other_bias_gamma(id1, k, g_keys, gbias1, ggamma1)
#         if exclude_key: 
#             ggammas1[g_keys.index(exclude_key)] = np.zeros(k)
#         else: 
#             gamma1_0  = np.zeros(k)
#         gammas1 = [gamma1_0] + ggammas1
#         for i,(id2,rating) in enumerate(dict1[id1]):
#             # Get id2 terms
#             bias2_0, gamma2_0 = get_bias(id2, bias2), get_gamma(id2, k, gamma2)
#             gbiases2, ggammas2 = get_other_bias_gamma(id2, k, g_keys, gbias2, ggamma2)
#             # Get summations
#             biases = np.array([bias1_0, bias2_0] + gbiases1 + gbiases2)
#             gammas = np.array(gammas1 + [gamma2_0] + ggammas2)
#             A = rating - alpha - biases.sum()
#             C = gammas.sum(axis=0)
#             C2 = np.square(gammas.sum(axis=0))
#             D = np.sum(np.square(gammas), axis=0)
#             numer_term += C * (A - ((1/2) * (C2 - D)))
#             denom_term += C2
#         gamma[id1] = ((gamma_val * numer_term) / ((denom_term * gamma_val**2) + lambda_g), gamma_val)
#     if exclude_key:
#         ggamma1[exclude_key] = gamma.copy()
#     else:
#         gamma1 = gamma.copy()

#     if update_type == "user":
#         return (alpha, bias1, bias2, gamma1, gamma2, gbias1, gbias2, ggamma1, ggamma2, g_keys)
#     else:
#         return (alpha, bias2, bias1, gamma2, gamma1, gbias2, gbias1, ggamma2, ggamma1, g_keys)

# def update_gamma(theta, lambdas, k, update_type, exclude_key=None):
#     """
#     Update bias1 to dbias1
#     """
#     alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
#     if update_type == "user":
#         dict1, bias1, gamma1 = itemsPerUser, user_bias, user_gamma
#         dict2, bias2, gamma2 = usersPerItem, item_bias, item_gamma
#         gbias1, ggamma1 = guser_bias, guser_gamma
#         gbias2, ggamma2 = gitem_bias, gitem_gamma
#         lambda_b, lambda_g = lambdas["lambda_user_bias"], lambdas["lambda_user_gamma"]
#     else:
#         dict1, bias1, gamma1 = usersPerItem, item_bias, item_gamma
#         dict2, bias2, gamma2 = itemsPerUser, user_bias, user_gamma
#         gbias1, ggamma1 = gitem_bias, gitem_gamma
#         gbias2, ggamma2 = guser_bias, guser_gamma
#         lambda_b, lambda_g = lambdas["lambda_item_bias"], lambdas["lambda_item_gamma"]
#     #
#     if exclude_key:
#         gamma = ggamma1[exclude_key].copy()
#         exclude_ind = g_keys.index(exclude_key)
#         print(exclude_ind)
#     else:
#         gamma = gamma1.copy()
#     # print(exclude_key, exclude_ind)
#     for id1 in gamma.keys():
#         n_rows = len(dict1[id1])
#         gamma_val = gamma[id1][1]
#         #
#         numer_term = 0
#         denom_term = 0
#         # Get id1 terms
#         bias1_0, gamma1_0 = get_bias(id1, bias1), get_gamma(id1, k, gamma1)
#         gbiases1, ggammas1 = get_other_bias_gamma(id1, k, g_keys, gbias1, ggamma1)
#         if exclude_key: 
#             ggammas1[g_keys.index(exclude_key)] = np.zeros(k)
#         else: 
#             gamma1_0  = np.zeros(k)
#         gammas1 = [gamma1_0] + ggammas1
#         for i,(id2,rating) in enumerate(dict1[id1]):
#             # Get id2 terms
#             bias2_0, gamma2_0 = get_bias(id2, bias2), get_gamma(id2, k, gamma2)
#             gbiases2, ggammas2 = get_other_bias_gamma(id2, k, g_keys, gbias2, ggamma2)
#             # Get summations
#             biases = np.array([bias1_0, bias2_0] + gbiases1 + gbiases2)
#             gammas = np.array(gammas1 + [gamma2_0] + ggammas2)
#             A = rating - alpha - biases.sum()
#             C = gammas.sum(axis=0)
#             C2 = np.square(gammas.sum(axis=0))
#             D = np.sum(np.square(gammas), axis=0)
#             numer_term += C * (A - ((1/2) * (C2 - D)))
#             denom_term += C2
#         gamma[id1] = ((gamma_val * numer_term) / ((denom_term * gamma_val**2) + lambda_g), gamma_val)
#     if exclude_key:
#         ggamma1[exclude_key] = gamma.copy()
#     else:
#         gamma1 = gamma.copy()

#     if update_type == "user":
#         return (alpha, bias1, bias2, gamma1, gamma2, gbias1, gbias2, ggamma1, ggamma2, g_keys)
#     else:
#         return (alpha, bias2, bias1, gamma2, gamma1, gbias2, gbias1, ggamma2, ggamma1, g_keys)



In [7]:
# ### Coordinate descent update (TERMS AND ALG, individual gammas)

# ### Coordinate Descent for FM --> TERMS
# def update_terms(theta, lambdas, k, update_type, exclude_key=None):
#     """
#     Update bias1 to dbias1
#     """
#     alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
#     lambda_user_bias, lambda_user_gamma = lambdas["lambda_user_bias"], lambdas["lambda_user_gamma"]
#     lambda_item_bias, lambda_item_gamma = lambdas["lambda_item_bias"], lambdas["lambda_item_gamma"]
#     if update_type == "user":
#         dict1, bias1, gamma1 = itemsPerUser, user_bias, user_gamma
#         dict2, bias2, gamma2 = usersPerItem, item_bias, item_gamma
#         gbias1, ggamma1 = guser_bias, guser_gamma
#         gbias2, ggamma2 = gitem_bias, gitem_gamma
#         lambda_b, lambda_g = lambda_user_bias, np.full(k, lambda_user_gamma)
#     else:
#         dict1, bias1, gamma1 = usersPerItem, item_bias, item_gamma
#         dict2, bias2, gamma2 = itemsPerUser, user_bias, user_gamma
#         gbias1, ggamma1 = gitem_bias, gitem_gamma
#         gbias2, ggamma2 = guser_bias, guser_gamma
#         lambda_b, lambda_g = lambda_item_bias, np.full(k, lambda_item_gamma)
#     # Define the excluded gamma index
#     if exclude_key: exclude_ind = g_keys.index(exclude_key) + 1
#     else: exclude_ind = 0

#     for id1 in dict1.keys():
#         n_rows = len(dict1[id1])
#         ratings = np.zeros(n_rows)
#         alphas = np.full(n_rows, alpha)
#         biases, gammas = np.zeros((n_rows, (1 + len(g_keys))*2)), np.zeros((n_rows, (1 + len(g_keys))*2, k))
#         C, C2, D = np.zeros((n_rows,k)), np.zeros((n_rows,k)), np.zeros((n_rows,k))
#         # gamma_term = np.zeros(n_rows)
#         ### Get id1 terms
#         biases1 = get_biases(id1, [bias1] + get_gterms(gbias1, g_keys))
#         gammas1 = get_gammas(id1, k, [gamma1] + get_gterms(ggamma1, g_keys))
#         #
#         for i,(id2, rating) in enumerate(dict1[id1]):
#             ### Get id2 terms
#             biases2 = get_biases(id2, [bias2] + get_gterms(gbias2, g_keys))
#             gammas2 = get_gammas(id2, k, [gamma2] + get_gterms(ggamma2, g_keys))
#             ### Extract values
#             ratings[i] = rating
#             biases[i,:] = np.array(biases1 + biases2)
#             gammas_i = np.array(gammas1 + gammas2)
#             gammas[i,:,:] = gammas_i
#             C[i,:] = np.sum(gammas_i, axis=0)
#             C2[i,:] = np.sum(gammas_i, axis=0)**2
#             D[i,:]  = np.sum(gammas_i**2, axis=0)
#         gamma_term = (1/2) * (C2 - D).sum(axis=1)
#         ### Update each bias
#         for exclude_ind,exclude_bias in enumerate([None] + g_keys):
#             if exclude_bias: 
#                 if id1 not in gbias1[exclude_bias]: continue
#                 bias_val = gbias1[exclude_bias][id1][1]
#             else: 
#                 bias_val = bias1[id1][1]
#             biases[:,exclude_ind] = np.zeros(n_rows)
#             bias_term = biases.sum(axis=1)
#             bias_numer = bias_val * (ratings - alphas - bias_term - gamma_term).sum()
#             bias_denom = (n_rows * bias_val**2) + lambda_b
#             new_bias = bias_numer / bias_denom
#             if exclude_bias: 
#                 gbias1[exclude_bias][id1] = (new_bias, bias_val)
#                 biases[:,exclude_ind] = np.full(n_rows, new_bias * bias_val)
#             else:
#                 bias1[id1] = (new_bias, bias_val)
#                 biases[:,exclude_ind] = np.full(n_rows, new_bias * bias_val)
#         ### GAMMA ACTIONS
#         rating_term = np.repeat(ratings[:, np.newaxis], k, axis=1)
#         alpha_term = np.repeat(alphas[:, np.newaxis], k, axis=1)
#         bias_term = np.repeat(biases.sum(axis=1)[:, np.newaxis], k, axis=1)
#         ### Update the specified gamma term if id1 is in gamma
#         if exclude_key:
#             if id1 not in ggamma1[exclude_key]: continue
#             else:
#                 gamma_val = ggamma1[exclude_key][id1][1]
#                 exclude_ind = g_keys.index(exclude_key) + 1
#         else: 
#             gamma_val = gamma1[id1][1]
#             exclude_ind = 0
#         gammas[:,exclude_ind,:] = np.zeros((n_rows, k))
#         C  = np.sum(gammas, axis=1)
#         C2 = C**2
#         D  = np.sum(gammas**2, axis=1)
#         #
#         C_forGamma = np.repeat(C.sum(axis=1)[:, np.newaxis], k, axis=1)
#         C2_forGamma = np.repeat(C2.sum(axis=1)[:, np.newaxis], k, axis=1)
#         D_forGamma = np.repeat(D.sum(axis=1)[:, np.newaxis], k, axis=1)
#         gamma_term2 = (1/2) * (D_forGamma - C2_forGamma)
#         # print(C.shape, rating_term.shape, alpha_term.shape, bias_term.shape, gamma_term2.shape, C2.shape)
#         #
#         gamma_numer = np.sum(C_forGamma * (gamma_term2 + rating_term - alpha_term - bias_term), axis=0) * gamma_val
#         gamma_denom = np.sum(np.repeat(C2_forGamma.sum(axis=1)[:, np.newaxis], k, axis=1), axis=0) * gamma_val**2
#         # print(gamma_numer.shape, gamma_denom.shape)
#         if exclude_key:
#             ggamma1[exclude_key][id1] = (np.array(gamma_numer / (gamma_denom + lambda_g)), gamma_val)
#         else:
#             gamma1[id1] = (np.array(gamma_numer / (gamma_denom + lambda_g)), gamma_val)

#     if update_type == "user":
#         return (alpha, bias1, bias2, gamma1, gamma2, gbias1, gbias2, ggamma1, ggamma2, g_keys)
#     else:
#         return (alpha, bias2, bias1, gamma2, gamma1, gbias2, gbias1, ggamma2, ggamma1, g_keys)


# def update_params_coordinateDescent(theta, lambdas, Xtrain, ytrain, k, quiet2=True):
#     """
#     Update parameters based on how well they predict in their CURRENT states
#     Coordinate descent instead of gradient descent for faster convergence
#     ###
#     Latent factor model
#          Fix user_gamma --> iterate and update alpha, user_bias, item_gamma
#          Fix item_gamma --> iterate and update alpha, user_bias, user_gamma
#     Fix user_gamma... Fix item_gamma...
#     Repeat the above until model have converged
#     """
#     alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
#     # Get cycle count
#     exclude_keys = [None, None]
#     for key in g_keys:
#         if len(guser_gamma[key]) > 0:
#             exclude_keys.append(key)
#         if len(gitem_gamma[key]) > 0:
#             exclude_keys.append(key)
#     n_cycles = len(exclude_keys)
#     # Get iteration params
#     exclude_keys = [None, None]
#     n_guser_gammas = len(guser_gamma)
#     for key in g_keys:
#         exclude_keys.extend([key, key])
#     # n_cycles = len(exclude_keys)

#     iter_counter = 0
#     for cycle,exclude_key in enumerate(exclude_keys):
#         if (cycle > 1):
#             if cycle % 2:
#                 # Check guser gamma
#                 if len(guser_gamma[exclude_key]) == 0: continue
#             else:
#                 # Check gitem gamma
#                 if len(gitem_gamma[exclude_key]) == 0: continue
#         iter_counter += 1
#         if not quiet2:
#             print(f"Starting Cycle {iter_counter}/{n_cycles} --> Exclude key {exclude_key}")
#         # Set iteration parameters
        
#         t_cycle = time.time()
#         ### ALPHA
#         t0 = time.time()
#         theta = update_alpha(theta, lambdas, Xtrain, ytrain, k)
#         if not quiet2:
#             print(f"   Alpha Complete --> Time elapsed: {time.time() - t0}")
#             # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k)) ###
#         ### Cycle between user/item updates
#         if not cycle % 2:
#             # Item biases
#             t0 = time.time()
#             theta = update_biases(theta, lambdas, k, "item")
#             if not quiet2:
#                 print(f"   Item Biases complete --> Time elapsed: {time.time() - t0}")
#                 # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k)) ###
#             # User biases and gamma
#             t0 = time.time()
#             theta = update_terms(theta, lambdas, k, "user", exclude_key)
#             if not quiet2:
#                 print(f"   User Biases and Gamma ({exclude_key}) Complete --> Time elapsed: {time.time() - t0}")
#                 # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k)) ###
#         else:
#             ### User biases
#             t0 = time.time()
#             theta = update_biases(theta, lambdas, k, "user")
#             if not quiet2:
#                 print(f"   User Biases complete --> Time elapsed: {time.time() - t0}")
#                 # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k)) ###
#             # Item biases and gamma
#             t0 = time.time()
#             theta = update_terms(theta, lambdas, k, "item", exclude_key)
#             if not quiet2:
#                 print(f"   Item Biases and Gamma ({exclude_key}) Complete --> Time elapsed: {time.time() - t0}")
#                 # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k)) ###

#         if not quiet2:
#             print(f"Cycle {iter_counter}/{n_cycles} Complete --> Time elapsed: {round(time.time() - t_cycle, 3)}s")
#         print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k)) ###

#     return theta

In [8]:
### Coordinate descent update (TERMS AND ALG, gamma simultaneous)

### Coordinate Descent for FM --> TERMS
def update_terms(theta, lambdas, k, update_type):
    """
    Update bias1 to dbias1
    """
    alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
    lambda_user_bias, lambda_user_gamma = lambdas["lambda_user_bias"], lambdas["lambda_user_gamma"]
    lambda_item_bias, lambda_item_gamma = lambdas["lambda_item_bias"], lambdas["lambda_item_gamma"]
    if update_type == "user":
        dict1, bias1, gamma1 = itemsPerUser, user_bias, user_gamma
        dict2, bias2, gamma2 = usersPerItem, item_bias, item_gamma
        gbias1, ggamma1 = guser_bias, guser_gamma
        gbias2, ggamma2 = gitem_bias, gitem_gamma
        lambda_b, lambda_g = lambda_user_bias, lambda_user_gamma
    else:
        dict1, bias1, gamma1 = usersPerItem, item_bias, item_gamma
        dict2, bias2, gamma2 = itemsPerUser, user_bias, user_gamma
        gbias1, ggamma1 = gitem_bias, gitem_gamma
        gbias2, ggamma2 = guser_bias, guser_gamma
        lambda_b, lambda_g = lambda_item_bias, lambda_item_gamma
        
    # Define the excluded gamma index
    for id1 in dict1.keys():
        n_rows = len(dict1[id1])
        ratings = np.zeros(n_rows)
        alphas = np.full(n_rows, alpha)
        biases, gammas = np.zeros((n_rows, (1 + len(g_keys))*2)), np.zeros((n_rows, (1 + len(g_keys))*2, k))
        C, C2, D = np.zeros((n_rows,k)), np.zeros((n_rows,k)), np.zeros((n_rows,k))
        # gamma_term = np.zeros(n_rows)
        ### Get id1 terms
        biases1 = get_biases(id1, [bias1] + get_gterms(gbias1, g_keys))
        gammas1 = get_gammas(id1, k, [gamma1] + get_gterms(ggamma1, g_keys))
        #
        for i,(id2, rating) in enumerate(dict1[id1]):
            ### Get id2 terms
            biases2 = get_biases(id2, [bias2] + get_gterms(gbias2, g_keys))
            gammas2 = get_gammas(id2, k, [gamma2] + get_gterms(ggamma2, g_keys))
            ### Extract values
            ratings[i] = rating
            biases[i,:] = np.array(biases1 + biases2)
            gammas_i = np.array(gammas1 + gammas2)
            gammas[i,:,:] = gammas_i
            C[i,:] = np.sum(gammas_i, axis=0)
            C2[i,:] = np.sum(gammas_i, axis=0)**2
            D[i,:]  = np.sum(gammas_i**2, axis=0)
        gamma_term = (1/2) * (C2 - D).sum(axis=1)
        ### Update each bias
        for exclude_ind,exclude_bias in enumerate([None] + g_keys):
            if exclude_bias: 
                if id1 not in gbias1[exclude_bias]: continue
                bias_val = gbias1[exclude_bias][id1][1]
            else: 
                bias_val = bias1[id1][1]
            biases[:,exclude_ind] = np.zeros(n_rows)
            bias_term = biases.sum(axis=1)
            bias_numer = bias_val * (ratings - alphas - bias_term - gamma_term).sum()
            bias_denom = (n_rows * bias_val**2) + lambda_b
            new_bias = bias_numer / bias_denom
            if exclude_bias: 
                gbias1[exclude_bias][id1] = (new_bias, bias_val)
                biases[:,exclude_ind] = np.full(n_rows, new_bias * bias_val)
            else:
                bias1[id1] = (new_bias, bias_val)
                biases[:,exclude_ind] = np.full(n_rows, new_bias * bias_val)
        ### GAMMA ACTIONS
        for exclude_ind,exclude_key in enumerate([None] + g_keys):
            if exclude_key: 
                if id1 not in ggamma1[exclude_key]: continue
                gamma_val = ggamma1[exclude_key][id1][1]
            else: 
                gamma_val = gamma1[id1][1]
            rating_term = np.repeat(ratings[:, np.newaxis], k, axis=1)
            alpha_term = np.repeat(alphas[:, np.newaxis], k, axis=1)
            bias_term = np.repeat(biases.sum(axis=1)[:, np.newaxis], k, axis=1)
            #
            gammas[:,exclude_ind,:] = np.zeros((n_rows, k))
            C  = np.sum(gammas, axis=1)
            C2 = C**2
            D  = np.sum(gammas**2, axis=1)
            #
            C_forGamma = np.repeat(C.sum(axis=1)[:, np.newaxis], k, axis=1)
            C2_forGamma = np.repeat(C2.sum(axis=1)[:, np.newaxis], k, axis=1)
            D_forGamma = np.repeat(D.sum(axis=1)[:, np.newaxis], k, axis=1)
            gamma_term2 = (1/2) * (D_forGamma - C2_forGamma)
            # print(C.shape, rating_term.shape, alpha_term.shape, bias_term.shape, gamma_term2.shape, C2.shape)
            #
            gamma_numer = np.sum(C_forGamma * (gamma_term2 + rating_term - alpha_term - bias_term), axis=0) * gamma_val
            gamma_denom = np.sum(np.repeat(C2_forGamma.sum(axis=1)[:, np.newaxis], k, axis=1), axis=0) * gamma_val**2
            new_gamma = np.array(gamma_numer / (gamma_denom + lambda_g))
            # print(gamma_numer.shape, gamma_denom.shape)
            if exclude_key:
                ggamma1[exclude_key][id1] = (new_gamma, gamma_val)
            else:
                gamma1[id1] = (new_gamma, gamma_val)
            gammas[:,exclude_ind,:] = np.repeat((new_gamma * gamma_val)[np.newaxis, :], n_rows, axis=0) 

    if update_type == "user":
        return (alpha, bias1, bias2, gamma1, gamma2, gbias1, gbias2, ggamma1, ggamma2, g_keys)
    else:
        return (alpha, bias2, bias1, gamma2, gamma1, gbias2, gbias1, ggamma2, ggamma1, g_keys)

def update_params_coordinateDescent(theta, lambdas, Xtrain, ytrain, k, quiet2=True):
    """
    Update parameters based on how well they predict in their CURRENT states
    Coordinate descent instead of gradient descent for faster convergence
    ###
    Latent factor model
         Fix user_gamma --> iterate and update alpha, user_bias, item_gamma
         Fix item_gamma --> iterate and update alpha, user_bias, user_gamma
    Fix user_gamma... Fix item_gamma...
    Repeat the above until model have converged
    """
    alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta
    # Get cycle count
    n_cycles = 2

    for cycle in range(n_cycles):
        if not quiet2:
            print(f"Starting Cycle {cycle + 1}/{n_cycles}")
        t_cycle = time.time()
        
        ### ALPHA
        t0 = time.time()
        theta = update_alpha(theta, lambdas, Xtrain, ytrain, k)
        # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
        if not quiet2:
            print(f"   Alpha Complete --> Time elapsed: {time.time() - t0}")
            
        ### Cycle between user/item updates
        if cycle % 2:
            # Item biases
            t0 = time.time()
            theta = update_biases(theta, lambdas, k, "item")
            # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
            if not quiet2:
                print(f"   Item Biases complete --> Time elapsed: {time.time() - t0}")
            # User biases and gammas
            t0 = time.time()
            theta = update_terms(theta, lambdas, k, "user")
            # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
            if not quiet2:
                print(f"   User Biases and Gammas Complete --> Time elapsed: {time.time() - t0}")
        else:
            ### User biases
            t0 = time.time()
            theta = update_biases(theta, lambdas, k, "user")
            # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
            if not quiet2:
                print(f"   User Biases complete --> Time elapsed: {time.time() - t0}")
            # Item biases and gammas
            t0 = time.time()
            theta = update_terms(theta, lambdas, k, "item")
            # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
            if not quiet2:
                print(f"   Item Biases and Gammas Complete --> Time elapsed: {time.time() - t0}")

        # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
        if not quiet2:
            print(f"Cycle {cycle + 1}/{n_cycles} Complete --> Time elapsed: {round(time.time() - t_cycle, 3)}s")

    return theta

In [9]:
### Coordinate descent update (Fastest)
def update_params_coordinateDescent(theta, lambdas, Xtrain, ytrain, k, quiet2=True):
    """
    Update parameters based on how well they predict in their CURRENT states
    Coordinate descent instead of gradient descent for faster convergence
    ###
    Latent factor model
         Fix user_gamma --> iterate and update alpha, user_bias, item_gamma
         Fix item_gamma --> iterate and update alpha, user_bias, user_gamma
    Fix user_gamma... Fix item_gamma...
    Repeat the above until model have converged
    """
    alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = theta

    ### ALPHA
    t0 = time.time()
    theta = update_alpha(theta, lambdas, Xtrain, ytrain, k)
    # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
    if not quiet2:
        print(f"   Alpha Complete --> Time elapsed: {time.time() - t0}")
    ### Item biases and gamma
    t0 = time.time()
    theta = update_terms(theta, lambdas, k, "item")
    # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
    if not quiet2:
        print(f"   Item Biases and Gammas complete --> Time elapsed: {time.time() - t0}")
    ### User biases and gammas
    t0 = time.time()
    theta = update_terms(theta, lambdas, k, "user")
    # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))
    if not quiet2:
        print(f"   User Biases and Gammas Complete --> Time elapsed: {time.time() - t0}")

    # print("   ", get_cost_mse(theta, lambdas, Xtrain, ytrain, k))

    return theta

In [10]:
### OTHER HELPFUL FUNCTIONS
######################################
def get_rec_structs(train_data):
    """
    Extract stats used for creating the classifier features
    Input is (user_id, item_id, value), ...
    Typically value is rating, but can be other things (e.g., hours played)

    itemsPerUser: Records each item in the training set that each user interacted with (along with the corresponding value)
    usersPerItem: Records each user in the training set that each item interacted with (along with the corresponding value)
    valueDict: Records the value for each (user, item) tuple
    userAverages: Gives the average value for each user
    itemAverages: Gives the average value for each item

    """
    ### Record which items each user interacted with and which users interacted with which item
    itemsPerUser = defaultdict(list)
    usersPerItem = defaultdict(list)
    userItemDict, itemUserDict = {}, {}
    for u,b,v in train_data:
        itemsPerUser[u].append((b,v))
        usersPerItem[b].append((u,v))
        userItemDict[(u,b)] = v
        itemUserDict[(b,u)] = v

    ### Calculate user and item average ratings
    userAverages = {}
    itemAverages = {}
    for u,tuples in itemsPerUser.items():
        values = [value for item,value in tuples]
        # values = [value for item,value in tuples if value != 0]
        # if len(values) == 0: continue
        userAverages[u] = sum(values) / len(values)
    for i,tuples in usersPerItem.items():
        values = [value for user,value in tuples]
        # values = [value for user,value in tuples if value != 0]
        # if len(values) == 0: continue
        itemAverages[i] = sum(values) / len(values)

    rec_structs = {"itemsPerUser":itemsPerUser,
                  "usersPerItem":usersPerItem,
                  "userItemDict":userItemDict,
                  "itemUserDict":itemUserDict,
                  "userAverages":userAverages,
                  "itemAverages":itemAverages,}

    return rec_structs

def unpack_rec_structs(rec_structs):
    """
    Take the input recommender_structs and return the itemized contents
    """
    itemsPerUser = rec_structs["itemsPerUser"]
    usersPerItem = rec_structs["usersPerItem"]
    userItemDict = rec_structs["userItemDict"]
    itemUserDict = rec_structs["itemUserDict"]
    userAverages = rec_structs["userAverages"]
    itemAverages = rec_structs["itemAverages"]

    return (itemsPerUser, usersPerItem, userItemDict, itemUserDict, userAverages, itemAverages)

def pd_get_rec_structs(df, user_col, item_col, val_col, user_limit=None, item_limit=None, only_nonzero=None, nested_dicts=False):
    """
    Like get_rec_structs(), but is customized for the pandas library
    Extract stats used for creating the classifier features
    Input is df which has at least a user_col, item_col, val_col, ...
    Typically value is rating, but can be other things (e.g., hours played)

    itemsPerUser: Records each item in the training set that each user interacted with (along with the corresponding value)
    usersPerItem: Records each user in the training set that each item interacted with (along with the corresponding value)
    valueDict: Records the value for each (user, item) tuple
    userAverages: Gives the average value for each user
    itemAverages: Gives the average value for each item

    user/item limit allows for estimating with less data to save time
    only_nonzero: Only track users/items that have non-zero values
    nested_dicts: for itemsPerUser and usersPerItem, output a nested dict instead of a list of tuples
    """
    if only_nonzero is None: only_nonzero = False

    dfu, dfi = df.copy(), df.copy()
    dfu["itemsPerUser"] = list(zip(dfu[item_col], dfu[val_col]))
    dfi["usersPerItem"] = list(zip(dfi[user_col], dfi[val_col]))
    dfu["valueDict_userItem"] = list(zip(dfu[user_col], dfu[item_col]))
    dfi["valueDict_itemUser"] = list(zip(dfi[item_col], dfi[user_col]))
    #
    if user_limit is None: user_limit = len(dfu)
    if item_limit is None: item_limit = len(dfi)
    # Limit data to only_nonzero if necessary
    if only_nonzero:
        dfu = dfu[dfu[val_col] > 0]
        dfi = dfi[dfi[val_col] > 0]
    if nested_dicts:
        itemsPerUser = dfu.groupby(user_col)["itemsPerUser"].apply(lambda x: list(x)[:item_limit]).apply(lambda x: {key:val for key,val in x}).to_dict()
        usersPerItem = dfi.groupby(item_col)["usersPerItem"].apply(lambda x: list(x)[:user_limit]).apply(lambda x: {key:val for key,val in x}).to_dict()
    else:
        itemsPerUser = dfu.groupby(user_col)["itemsPerUser"].apply(lambda x: list(x)[:item_limit]).to_dict()
        usersPerItem = dfi.groupby(item_col)["usersPerItem"].apply(lambda x: list(x)[:user_limit]).to_dict()

    valueDict = dfu[["valueDict_userItem", val_col]].drop_duplicates().set_index("valueDict_userItem", drop=True).to_dict()[val_col]
    meanValue = np.mean([val for val in valueDict.values()])
    medianValue = np.median([val for val in valueDict.values()])
    userAverages = dfu.groupby(user_col)[val_col].mean().to_dict()
    itemAverages = dfi.groupby(item_col)[val_col].mean().to_dict()

    user_structs = [itemsPerUser, userAverages]
    item_structs = [usersPerItem, itemAverages]

    return user_structs, item_structs, valueDict, meanValue, medianValue


### Change number of genres


In [11]:
##### Params

# Category params
offset = 0
n_categories = 21
include_other = True # True if other categories are condensed into "Other" --> False if ONLY the desired categories are included
user_weight_cutoff = 0 # 0 if all played games contribute to the weight --> 3.8 if only games where playtime is above Q1 is included (for example)

# weight_df_type --> "genres" for genres, "tags" for tags
weight_df_type = "tags"
# Other genre --> weight type = 1 (indiciates presence of other genre) or "count" (weight based on number of other genres)
weight_type = "count"

### Model params
k = 5
lambda_bias = 1
lambda_gamma = 5
ep = 0.0005
iter_limit = 300
output_bounds = (0, np.inf)
init_bounds1 = (-.1, .1) # For regular params
init_bounds2 = (-.5, .5) # For genre params

quiet = False
quiet2 = True

### FACTORIZATION MACHINE BASED ON GENRES

In [12]:
### Load review data and set up additional weight dataframes
seed = 100

# Train/valid/train2/test
train_filepath = "data/train_df2.csv"
valid_filepath = "data/valid_df.csv"
test_filepath = "data/test_df.csv"

# Item data
item_filepath1 = "data/game_genres_df.csv"
item_filepath2 = "data/game_tags_df.csv"

# Set up columns
user_col, item_col = "user_id", "item_id"
class_col = "playtime_log"
binary_class_col = "playtime_binary"

# Load data
train_df = pd.read_csv(train_filepath)
valid_df = pd.read_csv(valid_filepath)
test_df = pd.read_csv(test_filepath)
game_genres_df = pd.read_csv(item_filepath1, index_col=item_col)
game_tags_df = pd.read_csv(item_filepath2, index_col=item_col)

### Set up additional weight dataframes

if n_categories==0:
    item_weight_df = pd.DataFrame()
    user_weight_df = pd.DataFrame()
else:
    if weight_df_type == "genres":
        item_weight_df = game_genres_df.loc[:, game_genres_df.sum(axis=0).sort_values(ascending=False).index]
    elif weight_df_type == "tags":
        item_weight_df = game_tags_df.loc[:, game_tags_df.sum(axis=0).sort_values(ascending=False).index]
    else:
        print("Invalid category column")

    ### Set up item category columns
    all_category_cols = item_weight_df.columns.to_list()
    include_cols = all_category_cols[offset:offset + n_categories]
    exclude_cols = list(set(all_category_cols).difference(set(include_cols)))
    other_col = "Other"
    include_other_cols = include_cols + [other_col]
    
    ### Set up item_weight_df
    # Limit to only items from train_df
    item_weight_df = item_weight_df[item_weight_df.index.isin(train_df[item_col].unique())]
    # For each item, weights are determined by which genres are applied to each item
    # weights for each item should add up to one
    other_item_weight_df = item_weight_df[exclude_cols].sum(axis=1).rename(other_col)
    other_item_weight_df
    item_weight_df = item_weight_df[include_cols]
    if include_other:
        item_weight_df = pd.merge(item_weight_df, other_item_weight_df, on=item_col, how="left")
    item_weight_df = item_weight_df.div(item_weight_df.sum(axis=1), axis=0).fillna(0).reset_index(drop=False)

    ### Set up user_weight_df
    # For each user, weights are determined by # hours played divided by # games each user played (per category)
    # Again, all weights should add up to one
    user_weight_df2 = train_df[[user_col, item_col, class_col, binary_class_col]]
    user_weight_df2 = pd.merge(user_weight_df2, item_weight_df, on=item_col, how="left").fillna(0)
    if include_other:
        agg_cols = include_other_cols
    else:
        agg_cols = include_cols
    play_agg = user_weight_df2[user_weight_df2[class_col] > user_weight_cutoff].groupby(user_col).sum()[agg_cols]
    count_agg = user_weight_df2[user_weight_df2[class_col] > user_weight_cutoff].groupby(user_col).count()[agg_cols]
    user_weight_df = play_agg * count_agg
    user_weight_df = user_weight_df.div(user_weight_df.sum(axis=1), axis=0).fillna(0).reset_index(drop=False)

if n_categories==0:
    include_cols = []
else:
    if include_other:
        include_cols = include_other_cols

In [13]:
item_weight_df.head(6)

Unnamed: 0,item_id,Indie,Action,Adventure,Singleplayer,Casual,Strategy,RPG,Simulation,Multiplayer,...,Atmospheric,Sci-fi,Platformer,Co-op,Open World,Shooter,Story Rich,Fantasy,Horror,Other
0,282010.0,0.125,0.125,0.0,0.125,0.0,0.0,0.0,0.0,0.125,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5
1,70.0,0.0,0.05,0.05,0.05,0.0,0.0,0.0,0.0,0.05,...,0.05,0.05,0.0,0.0,0.0,0.05,0.05,0.0,0.0,0.55
2,1640.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.076923,0.0,0.076923,...,0.076923,0.0,0.0,0.0,0.0,0.0,0.0,0.076923,0.0,0.538462
3,1630.0,0.0,0.0,0.0,0.0,0.0,0.125,0.125,0.0,0.0,...,0.125,0.0,0.0,0.0,0.0,0.0,0.0,0.125,0.0,0.5
4,2400.0,0.05,0.05,0.05,0.05,0.0,0.05,0.05,0.05,0.05,...,0.0,0.0,0.0,0.0,0.0,0.05,0.0,0.0,0.0,0.55
5,3800.0,0.0,0.166667,0.0,0.166667,0.0,0.0,0.0,0.0,0.0,...,0.0,0.166667,0.0,0.0,0.0,0.0,0.166667,0.0,0.0,0.166667


In [14]:
user_weight_df.head(6) 

Unnamed: 0,user_id,Indie,Action,Adventure,Singleplayer,Casual,Strategy,RPG,Simulation,Multiplayer,...,Atmospheric,Sci-fi,Platformer,Co-op,Open World,Shooter,Story Rich,Fantasy,Horror,Other
0,u0,0.028645,0.044587,0.041304,0.04676,0.011253,0.017775,0.01087,0.022123,0.033717,...,0.019565,0.006522,0.006522,0.021739,0.01087,0.019565,0.015942,0.006522,0.015217,0.580648
1,u1,0.016667,0.055556,0.047222,0.030556,0.025,0.016667,0.038889,0.016667,0.055556,...,0.0,0.008333,0.008333,0.041667,0.038889,0.025,0.0,0.022222,0.0,0.519444
2,u10,0.013636,0.036364,0.034266,0.043357,0.013636,0.034266,0.018182,0.025175,0.038811,...,0.022727,0.02972,0.0,0.031818,0.02972,0.031818,0.013636,0.004545,0.0,0.56014
3,u100,0.053571,0.042857,0.035714,0.039286,0.0,0.032143,0.028571,0.007143,0.042857,...,0.035714,0.046429,0.014286,0.028571,0.035714,0.021429,0.0,0.0,0.014286,0.485714
4,u1000,0.003846,0.045455,0.050155,0.039316,0.003846,0.016239,0.038617,0.003846,0.033916,...,0.026923,0.011538,0.003846,0.026923,0.03007,0.026923,0.03547,0.023232,0.007692,0.564452
5,u10000,0.059259,0.064815,0.037037,0.059259,0.022222,0.037037,0.033333,0.022222,0.042593,...,0.016667,0.011111,0.005556,0.027778,0.016667,0.011111,0.011111,0.005556,0.005556,0.457407


In [15]:
##### Further split the data into X/y pairs for convenience
X_cols = [user_col, item_col]# + genre_cols
X_train, y_train = train_df[X_cols], train_df[class_col]
# X_valid, y_valid = valid_df[X_cols], valid_df[class_col]
X_test, y_test = test_df[X_cols], test_df[class_col]

print(f"# rows in training set: {len(X_train)}")
print(f"# rows in test set: {len(X_test)}")
print()

##### GET GENRE REC STRUCTS
category_dicts = {}
weight_cols = include_cols
for weight_col in weight_cols:
    category_dicts[weight_col] = (get_id_weights(user_weight_df, user_col, weight_col, only_nonzero=True), 
                               get_id_weights(item_weight_df, item_col, weight_col, only_nonzero=True))
    # category_dicts[weight_col] = (get_id_weights(user_weight_df, user_col, weight_col, only_nonzero=True), 
    #                            {})
    # category_dicts[weight_col] = (get_id_weights(train_df, user_col, weight_col, only_nonzero=True), 
    #                            get_id_weights(train_df, item_col, weight_col, only_nonzero=True))
    # category_dicts[weight_col] = ({}, 
    #                            get_id_weights(train_df, item_col, weight_col, only_nonzero=True))

print(f"k = {k}")
# print(f"Included {weight_df_type}: {include_cols}")
for category in include_cols:
    print(f"# Params for {category} --> Users: {len(category_dicts[category][0])} Items: {len(category_dicts[category][1])}")
print()

##### USERS/ITEMS
weight_col = binary_class_col
rec_structs = pd_get_rec_structs(train_df, user_col, item_col, class_col, only_nonzero=False)
(itemsPerUser, userAverages), (usersPerItem, itemAverages), valueDict, meanValue, medianValue = rec_structs
userWeights = get_id_weights(train_df, user_col, weight_type=1)
itemWeights = get_id_weights(train_df, item_col, weight_type=1)

print(f"# of regular user weights: {len(userWeights)}")
print(f"# of regular item weights: {len(itemWeights)}")
print()

##### Run the code
### Initialize parameters
lambda_user_bias = lambda_bias
lambda_user_gamma = lambda_gamma
lambda_item_bias = lambda_bias
lambda_item_gamma = lambda_gamma
lambdas = {"lambda_user_bias":lambda_user_bias, "lambda_user_gamma":lambda_user_gamma,
          "lambda_item_bias":lambda_item_bias, "lambda_item_gamma":lambda_item_gamma}

params = {
         "lambdas":lambdas, "k": k, "ep": ep, "iter_limit": iter_limit, "output_bounds": output_bounds
         }

# Get theta
alpha = meanValue
user_bias, item_bias, user_gamma, item_gamma = {}, {}, {}, {}
user_bias, item_bias, user_gamma, item_gamma = initialize_weighted_params(userWeights, itemWeights, k, init_bounds1)
guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys = {}, {}, {}, {}, []
for category in include_cols:
    ubias, ibias, ugamma, igamma = initialize_weighted_params(category_dicts[category][0], category_dicts[category][1], k, init_bounds2)
    guser_bias[category] = ubias
    gitem_bias[category] = ibias
    guser_gamma[category] = ugamma
    gitem_gamma[category] = igamma
    g_keys.append(category)
theta = alpha, user_bias, item_bias, user_gamma, item_gamma, guser_bias, gitem_bias, guser_gamma, gitem_gamma, g_keys
print(f"g_keys: {g_keys}")
print()

# rows in training set: 676910
# rows in test set: 169227

k = 5
# Params for Indie --> Users: 13196 Items: 4958
# Params for Action --> Users: 14238 Items: 3952
# Params for Adventure --> Users: 13936 Items: 3391
# Params for Singleplayer --> Users: 13995 Items: 2923
# Params for Casual --> Users: 11920 Items: 2452
# Params for Strategy --> Users: 13229 Items: 2013
# Params for RPG --> Users: 12550 Items: 1617
# Params for Simulation --> Users: 12612 Items: 1509
# Params for Multiplayer --> Users: 14194 Items: 1546
# Params for Great Soundtrack --> Users: 12082 Items: 1300
# Params for Puzzle --> Users: 9115 Items: 1131
# Params for 2D --> Users: 10519 Items: 1121
# Params for Atmospheric --> Users: 13000 Items: 1125
# Params for Sci-fi --> Users: 11792 Items: 885
# Params for Platformer --> Users: 9557 Items: 835
# Params for Co-op --> Users: 14026 Items: 841
# Params for Open World --> Users: 13141 Items: 827
# Params for Shooter --> Users: 13811 Items: 826
# Params for Story Rich -

In [16]:
### Fit the model to the training data
print(datetime.now())
theta, cost, mse = fit_parameters(X_train.values.tolist(), y_train, theta, quiet=quiet, quiet2=quiet2, **params)

### Predict using the learned parameters on the test set
t0 = time.time()
predictions = predict_latent_factor_batch(X_test.values.tolist(), theta, k, output_bounds)
print(f"Prediction time: {time.time() - t0}")
testMSE = get_MSE(predictions, y_test)

print()
print(f"Test MSE = {testMSE}")

2025-01-03 02:16:19.471445
Fitting parameters...
-----
-----> Epoch 1: Cost = 3092170.163531657, Train MSE = 4.436093898834942, Time Elapsed = 193.72585487365723
-----> Epoch 2: Cost = 2980946.1868234114, Train MSE = 4.21012081775692, Time Elapsed = 183.6024534702301
-----> Epoch 3: Cost = 2938226.405641048, Train MSE = 4.135051584968841, Time Elapsed = 168.87027597427368
-----> Epoch 4: Cost = 2914789.685964188, Train MSE = 4.09831519089181, Time Elapsed = 170.07898569107056
-----> Epoch 5: Cost = 2899996.647315487, Train MSE = 4.077388032427215, Time Elapsed = 168.8663091659546
-----> Epoch 6: Cost = 2889593.2900178987, Train MSE = 4.064203709117427, Time Elapsed = 170.2659933567047
-----> Epoch 7: Cost = 2881838.6600503284, Train MSE = 4.0550792981586365, Time Elapsed = 170.35956382751465
-----> Epoch 8: Cost = 2875534.820130243, Train MSE = 4.048390600881008, Time Elapsed = 170.59467720985413
-----> Epoch 9: Cost = 2870176.6508963173, Train MSE = 4.043080943836841, Time Elapsed = 1

In [17]:
t0 = time.time()
predictions = predict_latent_factor_batch(X_test.values.tolist(), theta, k, output_bounds)
print(f"Prediction time: {time.time() - t0}")
testMSE = get_MSE(predictions, y_test)

print()
print(f"Test MSE = {testMSE}")

Prediction time: 10.191340208053589

Test MSE = 4.255251569932521


In [18]:
### Save results
output = test_df[["user_id", "item_id", "playtime_log"]].copy()
if (len(user_weight_df)==0) and (len(item_weight_df)==0):
    output[f"pred_FM_NoOtherParams__k{k}_lb{lambda_bias}_lg{lambda_gamma}"] = predictions
    output.to_csv(f"data/test_results_FM_NoOtherParams_k{k}_lb{lambda_bias}_lg{lambda_gamma}.csv", index=False)
else:
    if include_other:
        output[f"pred_{n_categories}{weight_df_type}_withOther__k{k}_lb{lambda_bias}_lg{lambda_gamma}"] = predictions
        output.to_csv(f"data/test_results_FM_{n_categories}{weight_df_type}_withOther_k{k}_lb{lambda_bias}_lg{lambda_gamma}.csv", index=False)
    else:
        output[f"pred_{n_categories}{weight_df_type}__k{k}_lb{lambda_bias}_lg{lambda_gamma}"] = predictions
        output.to_csv(f"data/test_results_FM_{n_categories}{weight_df_type}_k{k}_lb{lambda_bias}_lg{lambda_gamma}.csv", index=False)
output

Unnamed: 0,user_id,item_id,playtime_log,pred_21tags_withOther__k5_lb1_lg5
0,u5084,31130,0.000000,0.873749
1,u11861,233270,6.573680,3.437156
2,u4949,220200,5.451038,6.513286
3,u1808,4000,5.446737,7.794845
4,u5401,1280,0.000000,1.397015
...,...,...,...,...
169222,u11442,238960,3.044522,3.077500
169223,u12710,4000,9.467847,8.135887
169224,u10496,8190,5.337538,4.922942
169225,u6112,48700,0.000000,6.590327
