In [1]:
import numpy as np
import scipy.sparse as sp
from scipy.sparse import csc_matrix as csc
import pandas as pd
pd.options.display.float_format = '{:,.6f}'.format
from sklearn.metrics import (
    mean_squared_error,
    mean_absolute_error,
    r2_score,
    explained_variance_score,
    roc_auc_score,
    log_loss,
)
from sklearn.preprocessing import minmax_scale
from constants import (SEED, EPSILON, EVENT_THRESHOLD, DEFAULT_K, DEFAULT_THRESHOLD, LOG_DIR, 
                       DATA_DIR, TEST_DATA_PATH, DATA_OCT, DATA_NOV, USECOLS, USER, ITEM, RATING, PREDICTION)
from utilities.ms_evaluation import (rmse, auc, logloss, precision_at_k, recall_at_k, ndcg_at_k, map_at_k, mae, rsquared, exp_var)

In [2]:
NAME = r'BasicMatrixFactorization' 
Y_HAT_PATH = DATA_DIR+r'/'+NAME+r'-y_hat.npz'
TEST_RESULTS_PATH = LOG_DIR+'\\'+NAME+'\\test-results.csv'
SKL = "(calculated using sklearn.metrics on non-zero values of sparse matrices)"
SPA = "(calculated using CSC sparse matrix operations)"
MSE = "(calculated using the Microsoft Evaluation method)"

In [3]:
log = pd.Series(dtype='float64')
# y_hat = sp.load_npz(Y_HAT_PATH) 
y_hat = sp.load_npz(TEST_DATA_PATH) # use this to test the data preparation

y = sp.load_npz(TEST_DATA_PATH)
assert y_hat.shape == y.shape, 'The shape of Y and Y_hat must match, otherwise they are not comparable.'
print(f"Shape of the matrices: {y.shape}")
print("Number of non-zero values:")
print(f"Y: {y.nnz:8,}")
print(f"Ŷ: {y_hat.nnz:8,}")

Shape of the matrices: (177592, 44780)
Number of non-zero values:
Y:  552,255
Ŷ:  552,255


In [4]:
# Usually, the CSC is used when there are more rows than columns. (If there are more columns, use CSR instead.)
y_hat = y_hat.tocsc()
y = y.tocsc()
y_nz = np.array(y[y.nonzero()]).reshape(-1)
y_hat_nz = np.array(y_hat[y_hat.nonzero()]).reshape(-1)

# Test run for the recommender engine

In [5]:
input_df = pd.concat([pd.read_csv(DATA_OCT, engine='c', sep=',',usecols=USECOLS)
                ,pd.read_csv(DATA_NOV, engine='c', sep=',',usecols=USECOLS)])
drop_visitors = set(input_df.user_id.value_counts()[input_df.user_id.value_counts()<EVENT_THRESHOLD].index)
input_df = input_df[~input_df.user_id.isin(drop_visitors)]
input_df.reset_index(inplace=True,drop=True)
new_user_id = pd.Series(pd.read_csv(DATA_DIR+r'new_user_id.csv', index_col=1, squeeze=True), dtype='int32')
new_product_id = pd.Series(pd.read_csv(DATA_DIR+r'new_product_id.csv', index_col=1, squeeze=True), dtype='int32')
input_df = input_df[input_df.event_type=='purchase']
input_df = input_df.drop(columns=['event_type'])
purchases = set()

for row in input_df.itertuples(): 
    uid = new_user_id[row.user_id]
    pid = new_product_id[row.product_id]
    purchases.add((uid,pid))    
print(f"Number of purchase events in test the dataset: {len(purchases)}")

Number of purchase events in test the dataset: 547912


In [6]:
df_true = []
ydok = y.todok()
rows,cols = ydok.nonzero()
for row,col in zip(rows,cols):
    if (row,col) in purchases:
        df_true.append([row,col,1])
    else:
        df_true.append([row,col,0])
df_true = pd.DataFrame(data=df_true,columns=[USER, ITEM, RATING])
df_true.head(1)

Unnamed: 0,userID,itemID,rating
0,0,0,1


In [7]:
df_pred = []
y_hat_dok = y_hat.todok()
rows,cols = y_hat_dok.nonzero()
for row,col in zip(rows,cols):
    df_pred.append([row,col,y_hat_dok[row,col]])
df_pred = pd.DataFrame(data=df_pred,columns=[USER, ITEM, PREDICTION])
df_pred.head(1)  

Unnamed: 0,userID,itemID,prediction
0,0,0,0.831493


In [8]:
# We must convert the predicted ratings into a [0, 1] scale for auc and logloss metrics
df_pred_bin = df_pred.copy()
df_pred_bin[PREDICTION] = minmax_scale(df_pred_bin[PREDICTION].astype(float))
df_pred_bin.head(1) 

Unnamed: 0,userID,itemID,prediction
0,0,0,0.858894


# Standard metrics

### Mean Square Error

In [9]:
mse_spa = csc.sum(csc.power(y_hat-y,2))/y.nnz
mse_skl = mean_squared_error(y_nz,y_hat_nz)
print(f"Mean Square Error: {mse_spa} {SPA}")
print(f"Mean Square Error: {mse_skl} {SKL}")
print('Note: The smaller the better.')
log["mse"]=mse_spa

Mean Square Error: 0.0 (calculated using CSC sparse matrix operations)
Mean Square Error: 0.0 (calculated using sklearn.metrics on non-zero values of sparse matrices)
Note: The smaller the better.


### Root Mean Square Error

In [10]:
rmse_spa = np.sqrt(mse_spa)
rmse_skl = np.sqrt(mse_skl)
rmse_mse = rmse(df_true,df_pred)
print(f"Root Mean Square Error: {rmse_spa} {SPA}")
print(f"Root Mean Square Error: {rmse_skl} {SKL}")
print(f"Root Mean Square Error: {rmse_mse} {MSE}")
print('Note: The smaller the better.')
log["rmse"]=rmse_spa

Root Mean Square Error: 0.0 (calculated using CSC sparse matrix operations)
Root Mean Square Error: 0.0 (calculated using sklearn.metrics on non-zero values of sparse matrices)
Root Mean Square Error: 0.19223674526303544 (calculated using the Microsoft Evaluation method)
Note: The smaller the better.


### Mean Absolute Error

In [11]:
mae_spa = csc.sum(abs(y_hat-y))/y.nnz
mae_skl = mean_absolute_error(y_nz,y_hat_nz)
mae_mse = mae(df_true,df_pred)
print(f"Mean Absolute Error: {mae_spa} {SPA}")
print(f"Mean Absolute Error: {mae_skl} {SKL}")
print(f"Mean Absolute Error: {mae_mse} {MSE}")
print('Note: The smaller the better.')
log["mae"]=mae_spa

Mean Absolute Error: 0.0 (calculated using CSC sparse matrix operations)
Mean Absolute Error: 0.0 (calculated using sklearn.metrics on non-zero values of sparse matrices)
Mean Absolute Error: 0.15693475282212682 (calculated using the Microsoft Evaluation method)
Note: The smaller the better.


### R²

In [12]:
r2_skl = r2_score(y_nz,y_hat_nz)
r2_mse = rsquared(df_true,df_pred)
print(f"Coefficient of determination (R\u00B2): {r2_skl} {SKL}")
print(f"Coefficient of determination (R\u00B2): {r2_mse} {MSE}")
print("Note: The closer to 1 the better.")
log["r-squared"]=r2_skl

Coefficient of determination (R²): 1.0 (calculated using sklearn.metrics on non-zero values of sparse matrices)
Coefficient of determination (R²): 0.7082944397370843 (calculated using the Microsoft Evaluation method)
Note: The closer to 1 the better.


### Explained variance

In [13]:
exp_var_skl = explained_variance_score(y_nz,y_hat_nz)
exp_var_mse = exp_var(df_true,df_pred)
print(f"Explained variance: {exp_var_skl}")
print(f"Explained variance: {exp_var_mse} {MSE}")
print("Note: The closer to 1 the better.")
log["exp_var"]=exp_var_skl

Explained variance: 1.0
Explained variance: 0.7994728055627172 (calculated using the Microsoft Evaluation method)
Note: The closer to 1 the better.


### Arear Under Curve (AUC) - integral area under the receiver operating characteristic curve


In [14]:
auc_mse = auc(df_true,df_pred_bin)
print(f"Arear Under Curve (AUC): {auc_mse} {MSE}")
print("Note: The closer to 1 the better. 0.5 indicates an uninformative classifier")
log["auc"]=auc_mse

Arear Under Curve (AUC): 0.9999187163872164 (calculated using the Microsoft Evaluation method)
Note: The closer to 1 the better. 0.5 indicates an uninformative classifier


### Logistic loss (logloss)

In [15]:
logloss_mse = logloss(df_true,df_pred_bin)
print(f"Logistic loss (logloss): {logloss_mse} {MSE}")
print("Note: The closer to 0 the better.")
log["logloss"]=logloss_mse

Logistic loss (logloss): 0.14514958198559486 (calculated using the Microsoft Evaluation method)
Note: The closer to 0 the better.


### Precision @ K

In [16]:
precision_at_k_mse = precision_at_k(df_true,df_pred)
print(f"Precision @ {DEFAULT_K}: {precision_at_k_mse} {MSE}")
print("Note: The closer to 1 the better.")
log[f"precision-at-{DEFAULT_K}"]=precision_at_k_mse

Precision @ 10: 0.34191752705560396 (calculated using the Microsoft Evaluation method)
Note: The closer to 1 the better.


### Recall @ K

In [17]:
recall_at_k_mse = recall_at_k(df_true,df_pred)
print(f"Recall @ {DEFAULT_K}: {recall_at_k_mse} {MSE}")
print("Note: The closer to 1 the better.")
log[f"recall-at-{DEFAULT_K}"]=recall_at_k_mse

Recall @ 10: 0.9668412602660089 (calculated using the Microsoft Evaluation method)
Note: The closer to 1 the better.


### normalized Discounted Cumulative Gain

In [18]:
ndcg_mse = ndcg_at_k(df_true,df_pred)
print(f"normalized Discounted Cumulative Gain (nDCG@{DEFAULT_K}): {ndcg_mse} {MSE}")
print("Note: The closer to 1 the better.")
log[f"ndcg-at-{DEFAULT_K}"]=ndcg_mse

normalized Discounted Cumulative Gain (nDCG@10): 1.0 (calculated using the Microsoft Evaluation method)
Note: The closer to 1 the better.


### mAP (mean Average Precision) 

In [19]:
map_mse = map_at_k(df_true,df_pred)
print(f"mean Average Precision (mAP@{DEFAULT_K}): {map_mse} {MSE}")
print("Note: The closer to 1 the better.")
log[f"map-at-{DEFAULT_K}"]=map_mse

mean Average Precision (mAP@10): 0.9668412602660089 (calculated using the Microsoft Evaluation method)
Note: The closer to 1 the better.


In [20]:
log.to_csv(TEST_RESULTS_PATH, index = True, header=False)
log

mse               0.000000
rmse              0.000000
mae               0.000000
r-squared         1.000000
exp_var           1.000000
auc               0.999919
logloss           0.145150
precision-at-10   0.341918
recall-at-10      0.966841
ndcg-at-10        1.000000
map-at-10         0.966841
dtype: float64