In [None]:
import numpy as np
import scipy
from scipy.sparse import csc_matrix, bmat, load_npz, csr_matrix
from scipy.sparse.linalg import svds
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
import implicit

## Load Data

In [None]:
s = load_npz("data/yt_s.npz")
a_train = load_npz("data/yt_a_train.npz")
a_test = load_npz("data/yt_a_test.npz")
a_val = load_npz("data/yt_a_val.npz")
n_groups = a_train.shape[1]
n_users = s.shape[0]

In [None]:
def prepare_train_data(alpha, s, a_train, a_val):
    c_train = bmat([[alpha*s, a_train], [a_train.transpose(), None]])
    c_val = bmat([[alpha*s, a_val], [a_val.transpose(), None]])
    #c_test = bmat([[alpha*s, a_test], [a_test.transpose(), None]])    
    return c_train.astype(np.float64), c_val.astype(np.float64)

## SVD

In [None]:
def svd_get_recs(i, model, train_labels, n_groups):
        u, sig, vt = model
        score = (u[i,:]@np.diag(sig)@vt)[-n_groups:]
        score = np.multiply(score, np.logical_not(train_labels))
        score_index = np.flip(np.argsort(score))
        return score_index

In [None]:
"vrati listu (za k = 1:n) precisiona i recalla na testu za jednog usera"
def evaluate_model_user(i, n, model, c_train, c_val, n_groups, model_type):
    "i = user za kojeg generiramo recommendatione"
    true_labels = c_val.getrow(i).toarray().flatten()[-n_groups:]
    train_labels = c_train.getrow(i).toarray().flatten()[-n_groups:]
    
    "tu se dodaju novi modeli"
    if model_type == "SVD":
        score_index = svd_get_recs(i, model, train_labels, n_groups)
    if model_type == "ALS":
        score_index = als_get_recs(i, n, model, c_train, n_groups)
        
    positives = np.sum(true_labels)
    negatives = n_groups - positives
    user_i_stats = []
    for predictions in range(1, n+1):
        recommendations = score_index[:predictions]
        true_positives = np.sum(true_labels[recommendations] == 1)
        true_negatives = negatives - (predictions - true_positives)
        precision = true_positives/predictions
        sensitivity = true_positives/positives if positives != 0 else 1
        specificity = true_negatives/negatives
        user_i_stats.append((precision, sensitivity, specificity))
    return user_i_stats

In [None]:
def evaluate_model(model, c_train, c_val, n_users, n_groups, model_type):
    stats = []
    for i in tqdm(range(n_users)):
            if np.sum(c_val.getrow(i).toarray().flatten()[-n_groups:]) != 0:
                stats.append(evaluate_model_user(i, 50, model, c_train, c_val, n_groups, model_type))
    "stats mi je lista duljine broj usera, svaki element je lista duljine n koja sadrzi tupleove oblika (pr, se, sp)"
    pr_se_sp = []
    "pr_se_sp ce biti lista tupleova duljine n, tuple je oblika (mean_pr, mean_se, mean_sp) gdje je prosjek uzet po userima"
    for n in zip(*stats):
        pr_se_sp.append((np.mean([i for i,j,k in n]), np.mean([j for i,j,k in n]), np.mean([k for i,j,k in n])))
    "pss ce biti numpy array dimenzija n x 3, svaki stupac odgovara jednom od (pr, se, sp)"
    pss = np.array(pr_se_sp)
    return pss[:,0], pss[:,1], pss[:,2]

In [None]:
def get_score(precision, sensitivity, specificity):
    x=[(1-spec) for spec in specificity]
    area = np.trapz(y=sensitivity, x=x)
    return abs(area)

In [None]:
def svd_model(alpha, svd_rank, s, a_train, a_val, n_users, n_groups):
    c_train, c_val = prepare_train_data(alpha, s, a_train, a_val)
    model = svds(c_train, k = svd_rank)
    precision, sensitivity, specificity = evaluate_model(model, c_train, c_val, n_users, n_groups, model_type = "SVD")
    score = get_score(precision, sensitivity, specificity)
    return {"alpha" : alpha, "svd_rank" : svd_rank, "score" : score, "precision" : precision, "sensitivity" : sensitivity, "specificity" : specificity}

In [None]:
def validate_svd_model(alphas, svd_ranks, s, a_train, a_val, n_users, n_groups):
    validation_scores = []
    for alpha in alphas:
        for svd_rank in svd_ranks:
            validation_scores.append(svd_model(alpha, svd_rank, s, a_train, a_val, n_users, n_groups))
    return validation_scores

In [None]:
alphas = [3]
svd_ranks = [10]

In [None]:
validation_scores_svd = validate_svd_model(alphas, svd_ranks, s, a_train, a_val, n_users, n_groups)

In [None]:
validation_scores_svd

In [None]:
plt.plot(1 - validation_scores_svd[0]["specificity"], validation_scores_svd[0]["sensitivity"])

## EVALUACIJA GENERALNO

In [None]:
"treba u evaluate_model_user za svaki model dodati granu u if-u u funkciji evaluate_model_user u kojoj se napravi score_index"
"score_index je lista/np.array koji sadrži indekse grupa sortirane po scoreu koji model daje, dakle sortirana lista grupa za recommendat"
"ideja je da se dotad sve sto ti treba za evaluirati model prenosi u varijabli model, a onda unutar tog ifa se pozove neka funkcija koja evaluira"
"za validaciju i kreiranje modela predlazem da se rade posebne funkcije za svaki jer nije bas zgodno napravit generalno, mogu biti po uzoru na ove"
precision, sensitivity, specificity = evaluate_model(model, c_train, c_val, n_users, n_groups, model_type = "SVD")
score = get_score(precision, sensitivity, specificity)
"dole primjer za als - 3 modificirane funkcije i onda se samo pozove - nije bas savrseno al mislim da ce bit skroz ok za nasih par modela"

## ALS

In [None]:
def als_get_recs(i, n, model, c_train, n_groups):
    recs = model.recommend(i, user_items = csr_matrix(c_train), N = n, filter_already_liked_items = True, filter_items = [j for j in range(c_train.shape[0]-n_groups)])
    indices = [rec[0]-n_users for rec in recs]
    scores = [rec[1] for rec in recs]
    score = np.zeros(n_groups)
    score[indices] = scores
    score_index = np.flip(np.argsort(score))
    return score_index

In [None]:
def als_model(alpha, svd_rank, s, a_train, a_val, n_users, n_groups):
    c_train, c_val = prepare_train_data(alpha, s, a_train, a_val)
    model = implicit.als.AlternatingLeastSquares(factors = n_factors, regularization = 2)
    model.fit(c_train)    
    precision, sensitivity, specificity = evaluate_model(model, c_train, c_val, n_users, n_groups, model_type = "ALS")
    score = get_score(precision, sensitivity, specificity)
    return {"alpha" : alpha, "n_factors" : n_factors, "score" : score, "precision" : precision, "sensitivity" : sensitivity, "specificity" : specificity}

In [None]:
def validate_als_model(alphas, n_factors, s, a_train, a_val, n_users, n_groups):
    validation_scores = []
    for alpha in alphas:
        for nf in n_factors:
            validation_scores.append(als_model(alpha, nf, s, a_train, a_val, n_users, n_groups))
    return validation_scores

In [None]:
alphas = [3]
n_factors = [10]

In [None]:
validation_scores_als = validate_als_model(alphas, n_factors, s, a_train, a_val, n_users, n_groups)

In [None]:
validation_scores_als

In [None]:
plt.plot(1 - validation_scores_als[0]["specificity"], validation_scores_als[0]["sensitivity"])

## Katz

In [None]:
def katz(t, beta, k):
    katz = beta*t
    a = beta*t
    for i in range(k-1):
        a = beta*a@t
        katz = katz + a
    return katz

In [None]:
def katz_2(t, beta, k):
    t = t.astype(np.float64)
    u, s, vt = svds(t, k=3)
    s = np.diag(s)
    
    vtu = vt@u
    svtu = s@vtu
    
    katz = beta*s
    #a = beta*svtu
    small_matrix = beta * s @ vtu
    for i in range(k-1):
        small_matrix = beta * small_matrix @ s
        katz = katz + small_matrix
        small_matrix = small_matrix @ vtu
    katz = u @ katz @ vt
    return katz