In [None]:
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from scipy.linalg import lu
from numpy.linalg import svd

In [None]:
# load data

data_path = "lastfmlog.npy"

ratings = np.load(data_path)

print("Loaded dataset: {}".format(ratings.shape))

n_users, n_items = ratings.shape
print("ratings: max {0} - min {1}".format(ratings.max(), ratings.min()))

In [None]:
# load representation

repr_path = "representations/lastfm_k103_d15_maxdev0.62.npz"
f = np.load(repr_path)

phi = f["features"]
theta = f["theta"]
mu = phi.dot(theta)
gap = np.max(mu, axis=1)[:, np.newaxis] - mu
gap_tmp = np.max(mu, axis=1)[:, np.newaxis] - mu
gap_tmp[gap_tmp == 0] = 100
min_gaps = np.min(gap_tmp, axis=1)

dev = np.abs(ratings - mu)
max_dev = np.max(dev, axis=1)

In [None]:
activity_thresh = 35
user_activity = np.sum(ratings > 0, axis=1)

# keep only users that listened to at least activity_thresh artists
most_active_users = np.argwhere(user_activity > activity_thresh).ravel()

# generate problems

for n,i in enumerate(most_active_users):
    print("[User {0}] max_dev: {1} min_gap: {2}".format(i,max_dev[i],min_gaps[i]))
    ground_truth = ratings[i, :]
    
    fmat = phi[i, :, :]
    d = fmat.shape[1]
    n_items = fmat.shape[0]
    
    # remove redundant dimensions
    U, s, Vt = svd(fmat, full_matrices=False)
    sp = np.sum(s > 1e-8)
    print("[Dim reduction] d={0}, span={1}".format(d,sp))
    s = s[:sp]
    U = U[:, :sp]
    Vt = Vt[:sp, :]
    s = np.diag(s)
    U = np.dot(U, s)
    M = U.dot(Vt)
    rmse = np.sqrt(np.mean(np.abs(M - fmat) ** 2))
    print("[Dim reduction] Reconstruction rmse: {}".format(rmse))
    new_phi = U
    new_theta = Vt.dot(theta)
    new_mu = new_phi.dot(new_theta)
    mu_rmse = np.sqrt(np.mean(np.abs(mu[i, :] - new_mu) ** 2))
    print("[Dim reduction] mu rmse: {}".format(mu_rmse))
    print()
    
    np.savez_compressed('lastfm_problem{0}_k{1}_d{2}_maxdev{3}.npz'.format(n, n_items, sp, round(max_dev[i],3)), 
                        features=new_phi, theta=new_theta, ground_truth=ground_truth)
