In [396]:
import numpy as np
import scipy
from scipy.sparse import csc_matrix, bmat, load_npz
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

## Load Data

In [397]:
s = load_npz("data/s.npz")
a_train = load_npz("data/a_train.npz")
a_test = load_npz("data/a_test.npz")
a_val = load_npz("data/a_val.npz")
n_groups = a_train.shape[1]
n_users = s.shape[0]

In [398]:
def prepare_train_data(alpha, s, a_train, a_val):
    c_train = bmat([[alpha*s, a_train], [a_train.transpose(), None]])
    c_val = bmat([[alpha*s, a_val], [a_val.transpose(), None]])
    #c_test = bmat([[alpha*s, a_test], [a_test.transpose(), None]])    
    return c_train, c_val

## SVD

In [427]:
"vrati listu (za k = 1:n) precisiona i recalla na testu za jednog usera"
def evaluate_latent_factor_user(i, n, u, sig, vt, c_train, c_val, n_groups):
    "i = user za kojeg generiramo recommendatione"
    score = (u[i,:]@np.diag(sig)@vt)[-n_groups:]
    true_labels = c_val.getrow(i).toarray().flatten()[-n_groups:]
    train_labels = c_train.getrow(i).toarray().flatten()[-n_groups:]
    score = np.multiply(score, np.logical_not(train_labels))
    score_index = np.flip(np.argsort(score))
    positives = np.sum(true_labels)
    negatives = n_groups - positives
    user_i_stats = []
    for predictions in range(1, n+1):
        recommendations = score_index[:predictions]
        true_positives = np.sum(true_labels[recommendations] == 1)
        true_negatives = negatives - (predictions - true_positives)
        precision = true_positives/predictions
        sensitivity = true_positives/positives if positives != 0 else 1
        specificity = true_negatives/negatives
        user_i_stats.append((precision, sensitivity, specificity))
    return user_i_stats

In [428]:
def evaluate_latent_factors(u, sig, vt, c_train, c_val, n_users, n_groups):
    stats = []
    for i in tqdm(range(n_users)):
        stats.append(evaluate_latent_factor_user(i, 100, u, sig, vt, c_train, c_val, n_groups))
    "stats mi je lista duljine broj usera, svaki element je lista duljine n koja sadrzi tupleove oblika (pr, se, sp)"
    pr_se_sp = []
    "pr_se_sp ce biti lista tupleova duljine n, tuple je oblika (mean_pr, mean_se, mean_sp) gdje je prosjek uzet po userima"
    for n in zip(*stats):
        pr_se_sp.append((np.mean([i for i,j,k in n]), np.mean([j for i,j,k in n]), np.mean([k for i,j,k in n])))
    "pss ce biti numpy array dimenzija n x 3, svaki stupac odgovara jednom od (pr, se, sp)"
    pss = np.array(pr_se_sp)
    return pss[:,0], pss[:,1], pss[:,2]

In [1]:
def get_score(precision, sensitivity, specificity):
    x=[(1-spec) for spec in specificity]
    area = np.trapz(y=sensitivity, x=x)
    return abs(area)

In [430]:
def latent_factors_model(alpha, svd_rank, s, a_train, a_val, n_users, n_groups):
    c_train, c_val = prepare_train_data(alpha, s, a_train, a_val)
    c_train = c_train.astype(np.float64)
    c_val = c_val.astype(np.float64)
    u, sig, vt = svds(c_train, k = svd_rank)
    precision, sensitivity, specificity = evaluate_latent_factors(u, sig, vt, c_train, c_val, n_users, n_groups)
    score = get_score(precision, sensitivity, specificity)
    return {"alpha" : alpha, "svd_rank" : svd_rank, "score" : score, "precision" : precision, "sensitivity" : sensitivity, "specificity" : specificity}

In [431]:
def validate_latent_factor_model(alphas, svd_ranks, s, a_train, a_val, n_users, n_groups):
    validation_scores = []
    for alpha in alphas:
        for svd_rank in svd_ranks:
            validation_scores.append(latent_factors_model(alpha, svd_rank, s, a_train, a_val, n_users, n_groups))
    return validation_scores

In [435]:
alphas = [3]
svd_ranks = [10]

In [436]:
validation_scores = validate_latent_factor_model(alphas, svd_ranks, s, a_train, a_val, n_users, n_groups)

100%|██████████| 10000/10000 [01:47<00:00, 93.19it/s]


In [445]:
validation_scores

[{'alpha': 3,
  'svd_rank': 10,
  'score': 0.28565990285781884,
  'precision': array([0.0557    , 0.0448    , 0.0385    , 0.03515   , 0.03176   ,
         0.02913333, 0.02717143, 0.0252625 , 0.02362222, 0.02259   ,
         0.02165455, 0.02071667, 0.01977692, 0.01897857, 0.01825333,
         0.0176125 , 0.01705294, 0.01647778, 0.01598947, 0.01553   ,
         0.01521429, 0.01486364, 0.0145087 , 0.014225  , 0.013868  ,
         0.01362308, 0.01332222, 0.01304286, 0.01273448, 0.01247667,
         0.01220323, 0.01199375, 0.01176667, 0.01160588, 0.01142   ,
         0.01121111, 0.01103243, 0.01084737, 0.01066154, 0.01052   ,
         0.01034634, 0.01016429, 0.01001395, 0.00986591, 0.00973333,
         0.00957391, 0.00942979, 0.00931667, 0.00920612, 0.00907   ,
         0.00896471, 0.00884808, 0.00874717, 0.00863148, 0.00851273,
         0.00841964, 0.00832105, 0.00822759, 0.00812881, 0.00805333,
         0.00796066, 0.00787742, 0.00778571, 0.00770781, 0.00761846,
         0.00757121, 0.007

## Katz

In [21]:
def katz(t, beta, k):
    katz = beta*t
    a = beta*t
    for i in range(k-1):
        a = beta*a@t
        katz = katz + a
    return katz

In [22]:
def katz_2(t, beta, k):
    t = t.astype(np.float64)
    u, s, vt = svds(t, k=3)
    s = np.diag(s)
    
    vtu = vt@u
    svtu = s@vtu
    
    katz = beta*s
    #a = beta*svtu
    small_matrix = beta * s @ vtu
    for i in range(k-1):
        small_matrix = beta * small_matrix @ s
        katz = katz + small_matrix
        small_matrix = small_matrix @ vtu
    katz = u @ katz @ vt
    return katz