Numerical Methods Term Project
Ali Davut Eskiocak

Necessary Libraries

In [121]:
import json
from numba import njit
from scipy import linalg
import jupyter
import pandas
import numpy as np 
k = 20

Necessary Functions

Data Cleaning

In [122]:
data = pandas.read_json("Desktop/sample.json")
data = data[['reviewer', 'movie', 'rating']]

Elimination of some users

In [123]:
user_counts = data['reviewer'].value_counts()
valid_users = user_counts[user_counts >= 20].index.tolist()
filtered_data = data[data['reviewer'].isin(valid_users)]
data = filtered_data
#print(data.head())

Creating a matrix

In [124]:
pivot_table = data.pivot_table(index='reviewer', columns='movie', values='rating')

pivot_table = pivot_table.apply(lambda row: row.fillna(row.mean()), axis=1)

old_matrix = pivot_table.to_numpy()
matrix = old_matrix

Subtracting Row_avg

In [125]:
   average_rows = np.nanmean(matrix, axis=1)
   matrix_norm = matrix
   for i in range(matrix.shape[0]):
       matrix_norm[i] = matrix[i] - average_rows[i]

In [126]:
matrix = matrix_norm

SVD Decomposition

In [127]:
U, S, Vt = np.linalg.svd(matrix)
Sd = np.diag(S)

In [140]:
U_rows, U_cols = U.shape
print("Number of rows:", U_rows)
print("Number of columns:", U_cols)

Number of rows: 362
Number of columns: 362


In [141]:
Sd_rows, Sd_cols = Sd.shape
print("Number of rows:", Sd_rows)
print("Number of columns:", Sd_cols)

Number of rows: 362
Number of columns: 362


In [142]:
Vt_rows, Vt_cols = Vt.shape
print("Number of rows:", Vt_rows)
print("Number of columns:", Vt_cols)

Number of rows: 11321
Number of columns: 11321


Expanding S to enable multiplication

In [131]:
new_cols = Vt_rows
expanded_S = np.zeros((Sd_rows, new_cols))

# Fill the diagonal of the expanded matrix with the diagonal elements of the original S
np.fill_diagonal(expanded_S[:, :Sd_rows], Sd)

In [132]:
Uk = np.transpose(U)[:k].T
Sk = np.diag(S[:k])                     
Vtk = Vt[:k]

In [133]:
R_reduced = Uk @ Sk @ Vtk

Users and Movies

In [134]:
movies = np.sqrt(Sk) @ Vtk

Function that calculates similarity

In [135]:
@njit
def similarity(i_J, i_k, array):
    a, b, c = 0, 0, 0 
    global k
    for i in range(k):
        a += array[i][i_J] * array[i][i_k] 
        b += array[i][i_k] * array[i][i_k]
        c += array[i][i_J] * array[i][i_J]
    
    s = a / ((b * c) ** (1/2))
    return s

Function which makes prediction

In [136]:
@njit
def predict(a, j, array): 
    
    s = np.array([similarity(j, i, array) for i in range(array.shape[1])])
    m, n = 0, 0
    neighborhood = 20
    sorted_list = np.flip(np.argsort(s))
    for i in range(neighborhood):
        index = sorted_list[i]
        m += similarity(j, index, array) * (array[a][index] + average_rows[a])
        n += np.abs(similarity(j, index, array))
    x = m/n
    return x


Creating Prediction Table

In [143]:
@njit
def table(array):
    Prediction = [[0] * len(array[0]) for _ in range(len(array))]
    for i in range(len(array)):
        for j in range(len(array[0])):
            Prediction[i][j] = predict(i, j, matrix)
    return Prediction

In [138]:
@njit
def calculate_mae(matrix):
    mae = 0
    a = table(matrix)
    for i in range(matrix.shape[0]):
        for j in range(matrix.shape[1]):
            mae += np.abs(a[i][j] - (matrix[i][j] + average_rows[i]))
    mae /= float(matrix.size)
    return mae

In [139]:
print(calculate_mae(R_reduced))

0.46976290663945197
