In [15]:
import numpy as np
import scipy.sparse as sp
from scipy.sparse import csc_matrix as csc
import pandas as pd

In [16]:
NAME = r'BasicMatrixFactorization' 
DATA_DIR = r'./data/'
Y_HAT_PATH = DATA_DIR+r'/'+NAME+r'-y_hat.npz'
TEST_DATA_PATH = DATA_DIR+r'uxm_test.npz'
LOG_DIR = r'C:\TensorLogs'
TEST_RESULTS_PATH = LOG_DIR+'\\'+NAME+'\\test-results.csv'

In [17]:
log = pd.Series(dtype='float64')
y_hat = sp.load_npz(Y_HAT_PATH)
y = sp.load_npz(TEST_DATA_PATH)
assert y_hat.shape == y.shape, 'The shape of Y and Y_hat must match, otherwise they are not comparable.'
print(f"Shape of the matrices: {y.shape}")
print("Number of non-zero values:")
print(f"Y: {y.nnz:8,}")
print(f"Ŷ: {y_hat.nnz:8,}")

Shape of the matrices: (177592, 44780)
Number of non-zero values:
Y:  552,255
Ŷ:  552,255


In [18]:
# Usually, the CSC is used when there are more rows than columns. (If there are more columns, use CSR instead.)
y_hat = y_hat.tocsc()
y = y.tocsc()

In [19]:
MSE = csc.sum(csc.power(y_hat-y,2))/y.nnz
print(f"Mean Square Error: {MSE}")
log["MSE"]=MSE

Mean Square Error: 6.32073362848684


In [21]:
RMSE = np.sqrt(MSE)
print(f"Root Mean Square Error: {RMSE}")
log["RMSE"]=RMSE

Root Mean Square Error: 2.5141069246328485


In [22]:
MAE = csc.sum(abs(y_hat-y))/y.nnz
print(f"Mean Absolute Error: {MAE}")
log["MAE"]=MAE

Mean Absolute Error: 1.7183024599143513


In [23]:
log

MSE     6.320734
RMSE    2.514107
MAE     1.718302
dtype: float64

In [24]:
log.to_csv(TEST_RESULTS_PATH, index = True, header=False)