In [None]:
import numpy as np
import scipy
from scipy.sparse import csc_matrix, bmat, load_npz
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt
from tqdm import tqdm
import random

## Load Data

In [None]:
s = load_npz("data/yt_s.npz")
a_train = load_npz("data/yt_a_train.npz")
a_test = load_npz("data/yt_a_test.npz")
a_val = load_npz("data/yt_a_val.npz")
n_groups = a_train.shape[1]
n_users = s.shape[0]

In [None]:
def prepare_train_data(alpha, s, a_train, a_val):
    c_train = bmat([[alpha*s, a_train], [a_train.transpose(), None]])
    c_val = bmat([[alpha*s, a_val], [a_val.transpose(), None]])
    #c_test = bmat([[alpha*s, a_test], [a_test.transpose(), None]])    
    return c_train, c_val

## SVD

In [None]:
"vrati listu (za k = 1:n) precisiona i recalla na testu za jednog usera"
def evaluate_latent_factor_user(i, n, u, sig, vt, c_train, c_val, n_groups):
    "i = user za kojeg generiramo recommendatione"
    score = (u[i,:]@np.diag(sig)@vt)[-n_groups:]
    true_labels = c_val.getrow(i).toarray().flatten()[-n_groups:]
    train_labels = c_train.getrow(i).toarray().flatten()[-n_groups:]
    score = np.multiply(score, np.logical_not(train_labels))
    score_index = np.flip(np.argsort(score))
    positives = np.sum(true_labels)
    negatives = n_groups - positives
    user_i_stats = []
    for predictions in range(1, n+1):
        recommendations = score_index[:predictions]
        true_positives = np.sum(true_labels[recommendations] == 1)
        true_negatives = negatives - (predictions - true_positives)
        precision = true_positives/predictions
        sensitivity = true_positives/positives if positives != 0 else 1
        specificity = true_negatives/negatives
        user_i_stats.append((precision, sensitivity, specificity))
    return user_i_stats

In [None]:
def evaluate_latent_factors(u, sig, vt, c_train, c_val, n_users, n_groups):
    stats = []
    for i in tqdm(range(n_users)):
        stats.append(evaluate_latent_factor_user(i, 100, u, sig, vt, c_train, c_val, n_groups))
    "stats mi je lista duljine broj usera, svaki element je lista duljine n koja sadrzi tupleove oblika (pr, se, sp)"
    pr_se_sp = []
    "pr_se_sp ce biti lista tupleova duljine n, tuple je oblika (mean_pr, mean_se, mean_sp) gdje je prosjek uzet po userima"
    for n in zip(*stats):
        pr_se_sp.append((np.mean([i for i,j,k in n]), np.mean([j for i,j,k in n]), np.mean([k for i,j,k in n])))
    "pss ce biti numpy array dimenzija n x 3, svaki stupac odgovara jednom od (pr, se, sp)"
    pss = np.array(pr_se_sp)
    return pss[:,0], pss[:,1], pss[:,2]

In [None]:
def get_score(precision, sensitivity, specificity):
    x=[(1-spec) for spec in specificity]
    area = np.trapz(y=sensitivity, x=x)
    return abs(area)

In [None]:
def latent_factors_model(alpha, svd_rank, s, a_train, a_val, n_users, n_groups):
    c_train, c_val = prepare_train_data(alpha, s, a_train, a_val)
    c_train = c_train.astype(np.float64)
    c_val = c_val.astype(np.float64)
    u, sig, vt = svds(c_train, k = svd_rank)
    precision, sensitivity, specificity = evaluate_latent_factors(u, sig, vt, c_train, c_val, n_users, n_groups)
    score = get_score(precision, sensitivity, specificity)
    return {"alpha" : alpha, "svd_rank" : svd_rank, "score" : score, "precision" : precision, "sensitivity" : sensitivity, "specificity" : specificity}

In [None]:
def validate_latent_factor_model(alphas, svd_ranks, s, a_train, a_val, n_users, n_groups):
    validation_scores = []
    for alpha in alphas:
        for svd_rank in svd_ranks:
            validation_scores.append(latent_factors_model(alpha, svd_rank, s, a_train, a_val, n_users, n_groups))
    return validation_scores

In [None]:
alphas = [3]
svd_ranks = [10]

In [None]:
validation_scores = validate_latent_factor_model(alphas, svd_ranks, s, a_train, a_val, n_users, n_groups)

In [None]:
validation_scores

## Katz

In [None]:
def katz(t, beta, k):
    katz = beta*t
    a = beta*t
    for i in range(k-1):
        a = beta*a@t
        katz = katz + a
    return katz

In [None]:
def katz_2(t, beta, k):
    t = t.astype(np.float64)
    u, s, vt = svds(t, k=3)
    s = np.diag(s)
    
    vtu = vt@u
    svtu = s@vtu
    
    katz = beta*s
    #a = beta*svtu
    small_matrix = beta * s @ vtu
    for i in range(k-1):
        small_matrix = beta * small_matrix @ s
        katz = katz + small_matrix
        small_matrix = small_matrix @ vtu
    katz = u @ katz @ vt
    return katz