<a href="https://colab.research.google.com/github/dymiyata/erdos2023_million_playlist_challenge/blob/master/matrix_factorization/matrix_factorization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
print("Testing Colab and Github integration")

In [2]:
import numpy as np
import pandas as pd
import json
import copy
import random
from numba import njit

# Old functions
I'm keeping old code here here in case we need it again

In [None]:
# use gradient descent to minimize MSE (with l2 regularization)
def update_params_loop(R_list, P, Q, alpha, llambda):
    newP = P
    newQ = Q
    m , f = np.shape(P)
    n = np.shape(Q)[0]

    for u,i in R_list:
        newP[u,:] -= alpha * 2 * (P[u,:] @ Q[i,:] - 1) * Q[i,:]
        newQ[i,:] -= alpha * 2 * (P[u,:] @ Q[i,:] - 1) * P[u,:]
    for u in range(m):
        newP[u,:] -= alpha * 2 * llambda * P[u,:]
    for i in range(n):
        newQ[i,:] -= alpha * 2 * llambda * Q[i,:]
    return (newP, newQ)

# use gradient descent with where R_list has triples (u,i,score)
def update_params_loop_score(R_list, P, Q, alpha, llambda):
    newP = P
    newQ = Q
    m , f = np.shape(P)
    n = np.shape(Q)[0]

    for u,i, score in R_list:
        newP[u,:] -= alpha * 2 * (P[u,:] @ Q[i,:] - score) * Q[i,:]
        newQ[i,:] -= alpha * 2 * (P[u,:] @ Q[i,:] - score) * P[u,:]
    for u in range(m):
        newP[u,:] -= alpha * 2 * llambda * P[u,:]
    for i in range(n):
        newQ[i,:] -= alpha * 2 * llambda * Q[i,:]
    return (newP, newQ)

#runs the gradient descent loop in batches
def gd_batch(R_list, P, Q, alpha, llambda, batch_num, iterations, R = None, verbose=False):
    #make copies of P and Q
    P_current = P.copy()
    Q_current = Q.copy()

    #shuffle R_list
    #divide R_list into batch_num subsets
    random.shuffle(R_list)
    batch_size = int(np.ceil(len(R_list)/batch_num))
    R_batch = [R_list[i:i+batch_size] for i in range(0,len(R_list), batch_size) ]

    #loop over total iterations
    for i in range(iterations):
        #if verbose == true print out error function
        if verbose:
            print(f'Step {i*batch_num}: Error function={error_function(R_list,R , P_current, Q_current)}')
        #loop over batch_num
        for batch in R_batch:
            #run update_param_loop on batch
            P_current , Q_current = update_params_loop(batch, P_current, Q_current, alpha, llambda)

    return (P_current , Q_current)

#error function without l2 normalization factor
def error_function( R_list,R, P , Q ):
    result = 0
    #sum over R_list
    for row, col in R_list:
        result = result + (R[row,col] - P[row,:]@Q[col,:])**2

    return result

# Old validation result
def val_error(R_list_new, Pval, Q, llambda):
    result = 0
    
    _, f = np.shape(Q)
    result = 0
    
    for pid, tid in R_list_new:
        result += (1 - Pval[pid, :] @ Q[tid, :])**2
    
    result += llambda * (np.linalg.norm(Pval)**2 + np.linalg.norm(Q)**2)
    return result

def new_user_vec_old(tid_list, Q, llambda):
    Y = Q[tid_list,:]
    f = np.shape(Q)[1]
    d = len(tid_list)
    vec = np.linalg.inv(np.transpose(Y) @ Y + llambda * np.identity(f)) @ np.transpose(Y) @ np.ones((d,1))
    return np.transpose(vec)

def make_Pval_old(R_list_new, Q, llambda):
    _, f = np.shape(Q)
    
    new_pids = np.unique(R_list_new[:,0])
    P_val = np.zeros((len(new_pids), f))
    
    for pid in new_pids:
        # get list of tracks in the playlist
        tid_list = R_list_new[ R_list_new[:,0]==pid, 1]

        # x is the row of Pval corresponding to this pid
        x = new_user_vec_old(tid_list, Q, llambda)
        
        for feature in range(f):
            P_val[pid, feature] = x[0,feature]
        
    return P_val

Use the next code to test that `make_Pval` and `make_Pval_old` give the same answer

In [None]:
# Run gradient descent algorithm with alpha = 0.001, llambda = 0.005 for 100 iterations
start_time = time()
P_val = make_Pval(R_idx_val, Q_trained, 0.005)
print(MSE(R_idx_val, P_val, Q_trained))
end_time = time()
print('Done (numba): {:.3f} sec'.format(end_time-start_time))
print()

# Run gradient descent algorithm with alpha = 0.001, llambda = 0.005 for 100 iterations
start_time = time()
P_val_old = make_Pval_old(R_idx_val, Q_trained, 0.005)
print(MSE(R_idx_val, P_val_old, Q_trained))
end_time = time()
print('Done (old): {:.3f} sec'.format(end_time-start_time))
print()

print('Same answer:', np.isclose(P_val, P_val_old).all())