# Alternating Least Square Implementation

Matrix factorization by alternating least squares.

In [1]:
import numpy as np
import pandas as pd

## Intuition

First build some intuition by manually executing the 2 iterations

In [6]:
df = pd.read_csv("../data/critics/critics.csv")
df.head()

Unnamed: 0,User,Movie,Rating
0,Lisa Rose,Lady in the Water,2.5
1,Lisa Rose,Snakes on a Plane,3.5
2,Lisa Rose,Just My Luck,3.0
3,Lisa Rose,Superman Returns,3.5
4,Lisa Rose,"You, Me and Dupree",2.5


In [9]:
user_product_matrix = df.pivot(index="User", columns="Movie", values="Rating").to_numpy()
user_product_matrix

array([[3. , nan, 3.5, 4. , 4.5, 2.5],
       [1.5, 3. , 3.5, 5. , 3. , 3.5],
       [nan, 3. , 4. , 5. , 3. , 3.5],
       [3. , 2.5, 3.5, 3.5, 3. , 2.5],
       [nan, 2.5, 3. , 3.5, 4. , nan],
       [2. , 3. , 4. , 3. , 3. , 2. ],
       [nan, nan, 4.5, 4. , nan, 1. ]])

We will try to factorize the matrix above with 3 latent factors.

### 1. Initialize User Matrix

In [17]:
# initialize user matrix

u_init = np.random.normal(0, 1/np.sqrt(3), size = (user_product_matrix.shape[0], 3))
u_init

array([[ 0.1216306 , -0.10749638,  0.72123486],
       [ 0.0926787 ,  0.21991047,  0.34158726],
       [-0.96480358,  0.07098226, -0.32778319],
       [ 0.258449  , -0.53302782,  0.12613497],
       [ 0.21969446,  0.21058718,  0.04744654],
       [-0.09961878,  0.57733349, -0.0266567 ],
       [ 0.03578411, -0.63026162, -1.32150535]])

In [12]:
def calculate_ols_coefficients(X, y, l=0):
    X_2 = X.T @ X
    X_y = X.T @ y
    l_i = l * np.eye(X_2.shape[0])

    coeff = np.linalg.inv(X_2 + l_i) @ X_y
    return coeff

### 2. Calculate product matrix v with initialized user matrix

In [24]:
v = []

for j in range(0, user_product_matrix.shape[1]):
    dataset = np.hstack((u_init, np.expand_dims(user_product_matrix[:, j], axis = 1)))
    dataset = dataset[~np.isnan(dataset).any(axis = 1)]
    X = dataset[:, :-1]
    y = dataset[:, -1]
    coefficients = calculate_ols_coefficients(X, y, 0.1)
    v.append(coefficients)

v = np.array(v)
v


array([[ 3.38897536e+00,  6.24595334e-01,  3.35804637e+00],
       [-2.64180009e+00,  1.67681628e+00,  4.28579224e+00],
       [-1.37250691e+00, -6.96831085e-01, -7.48507916e-01],
       [-2.22513505e+00, -1.15503910e+00,  3.72720980e-03],
       [-2.81082578e+00,  1.66450288e+00,  6.13050280e+00],
       [-2.77827116e+00, -1.41178583e+00,  1.14745125e+00]])

### 3. (2nd iteration) calculate user matrix u with estimated v matrix in previous iteration

In [26]:
user_product_matrix_T = user_product_matrix.T
u = []

for j in range(0, user_product_matrix_T.shape[1]):
    dataset = np.hstack((v, np.expand_dims(user_product_matrix_T[:, j], axis = 1)))
    dataset = dataset[~np.isnan(dataset).any(axis = 1)]
    X = dataset[:, :-1]
    y = dataset[:, -1]
    coefficients = calculate_ols_coefficients(X, y, 0.1)
    u.append(coefficients)

u = np.array(u)
u

array([[-0.14297498, -1.87299564,  1.1360763 ],
       [-0.43361733, -1.95649191,  0.94335882],
       [-2.32432532,  0.74513015, -0.8848415 ],
       [-0.09645928, -1.7778043 ,  1.01528145],
       [-1.27765585, -0.93977701,  0.2588414 ],
       [-0.28141686, -1.22609567,  0.78573035],
       [-1.28162354, -0.72834402, -2.92816591]])

### 3. (2nd iteration) calculate product matrix v with estimated u matrix in 1st half of iteration

In [27]:
v = []

for j in range(0, user_product_matrix.shape[1]):
    dataset = np.hstack((u, np.expand_dims(user_product_matrix[:, j], axis = 1)))
    dataset = dataset[~np.isnan(dataset).any(axis = 1)]
    X = dataset[:, :-1]
    y = dataset[:, -1]
    coefficients = calculate_ols_coefficients(X, y, 0.1)
    v.append(coefficients)

v = np.array(v)
v


array([[ 1.3506325 , -0.52797871,  1.80305739],
       [-1.81097424, -0.53246773,  1.31526758],
       [-1.97503966, -1.75499814, -0.192945  ],
       [-2.46165792, -1.69624775,  0.14125572],
       [-2.14828396, -0.63967846,  1.80613601],
       [-2.01718059, -0.83394851,  0.73425451]])

Alternating least squares algorithm repeats this until the values of U and V matrices converge.

As you see below, u and v_T are then multiplied to reconstruct the user product rating matrix.

In [32]:
print(u @ v.T)
(u @ v.T).shape

[[ 2.97780298  3.26109754  3.3720582   3.87163753  4.21840481  3.05218037]
 [ 1.32594373  2.82892904  4.04579241  4.44389266  3.5317617   3.0864871 ]
 [-2.41890596  2.7812106   4.1626996   4.80664741  3.22892769  3.45009578]
 [ 2.89715273  2.40758435  3.35832076  3.57677321  3.12118589  2.43015023]
 [ 0.70264408  2.91280236  2.92844464  3.5280586   3.64773771  2.89374734]
 [ 2.01533196  2.39903968  3.27930011  3.56869592  3.0578991   2.48769636]
 [-2.40568572 -0.36996645  4.37200495  3.92553673 -0.81352464  1.19652474]]


(7, 6)

In [33]:
print(user_product_matrix)
print(user_product_matrix.shape)

[[3.  nan 3.5 4.  4.5 2.5]
 [1.5 3.  3.5 5.  3.  3.5]
 [nan 3.  4.  5.  3.  3.5]
 [3.  2.5 3.5 3.5 3.  2.5]
 [nan 2.5 3.  3.5 4.  nan]
 [2.  3.  4.  3.  3.  2. ]
 [nan nan 4.5 4.  nan 1. ]]
(7, 6)
