In [None]:
import numpy as np
from scipy.sparse.linalg import svds
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split
from scipy.linalg import lu
from numpy.linalg import svd

In [None]:
# load data

data_path = "lastfmlog.npy"

ratings = np.load(data_path)
#print(np.mean(ratings), np.sum(ratings > 0), ratings.size)
#ratings = (ratings - np.mean(ratings)) / np.std(ratings)


import matplotlib.pyplot as plt
plt.hist(ratings.flatten(), bins=10)
plt.show()

print("Loaded dataset: {}".format(ratings.shape))

n_users, n_items = ratings.shape
print("ratings: max {0} - min {1}".format(ratings.max(), ratings.min()))

In [None]:
# SVD

K = 80
U, s, Vt = svds(ratings, k=K)
s = np.diag(s)
U = np.dot(U, s)

# MSE
Yhat = U.dot(Vt)
rmse = np.sqrt(np.mean(np.abs(Yhat - ratings) ** 2))
print("K: ", K)
print("RMSE:", rmse)
print("MAX_ERR:", np.abs(Yhat - ratings).max())

In [None]:
def save_model(net, normalize=False):

    # Build features
    X_pred = X

    hidden_layer_sizes = list(net.hidden_layer_sizes)

    layer_units = [X_pred.shape[1]] + hidden_layer_sizes + [1]
    activations = [X_pred]
    for i in range(net.n_layers_ - 1):
        activations.append(np.empty((X_pred.shape[0], layer_units[i + 1])))

    net._forward_pass(activations)
    y_pred = activations[-1]
    print("MSE (original):", np.mean((y_pred.flatten() - y) ** 2))

    # get weights
    last_w = net.coefs_[-1]
    bias = np.array(net.intercepts_[-1]).reshape((1, 1))
    last_w = np.concatenate([last_w, bias])

    # get last-layer features
    last_feat = np.array(activations[-2], dtype=np.float32)
    last_feat = np.concatenate([last_feat, np.ones((X_pred.shape[0], 1))], axis=1)

    # get prediction
    pred = last_feat.dot(last_w)
    print("MSE (recomputed with last layer only):", np.mean((pred.flatten() - y) ** 2))

    # get feature matrix
    d = hidden_layer_sizes[-1] + 1
    print("d={0}".format(d))
    phi = np.empty((n_users, n_items, d), dtype=np.float32)
    idx = 0
    for t in range(n_users):
        for z in range(n_items):
            phi[t, z, :] = last_feat[idx, :]
            idx += 1
    assert idx == last_feat.shape[0]

    # get param
    theta = np.array(last_w, dtype=np.float32).squeeze()
        
    phi_norm = round(np.linalg.norm(phi, axis=2).max(), 2)
    print("phi max norm:", phi_norm)
    theta_norm = round(np.linalg.norm(theta), 2)
    print("theta norm:", theta_norm)

    # check predictions
    mu = phi.dot(theta)
    l2_dev = np.abs(ratings - mu).flatten()**2
    print("l2 deviation (mu): max {} min {} mean {}".format(l2_dev.max(), l2_dev.min(), l2_dev.mean()))
    l1_dev = np.abs(ratings - mu).flatten()
    print("l1 deviation (mu): max {} min {} mean {}".format(l1_dev.max(), l1_dev.min(), l1_dev.mean()))
    max_dev = round(l1_dev.max(),3)
    print("mu: max {0} - min {1}".format(mu.max(), mu.min()))
    gap = np.max(mu, axis=1)[:, np.newaxis] - mu
    print("gap max:", gap.max())
    gap[gap == 0] = 100
    print("gap min:", gap.min())
    gap = np.min(gap, axis=1)
    print("# contexts with gap_min > 0.001:", np.sum(gap > 0.001))
    print("# contexts with gap_min > 0.01:", np.sum(gap > 0.01))
    print("# contexts with gap_min > 0.1:", np.sum(gap > 0.1))
        
    # remove redundant dimensions
    fmat = phi.reshape(-1, d)
    U, s, Vt = svd(fmat, full_matrices=False)
    sp = np.sum(s > 1e-8)
    print("[Dim reduction] d={0}, span={1}".format(d,sp))
    s = s[:sp]
    U = U[:, :sp]
    Vt = Vt[:sp, :]
    s = np.diag(s)
    U = np.dot(U, s)
    M = U.dot(Vt)
    rmse = np.sqrt(np.mean(np.abs(M - fmat) ** 2))
    print("[Dim reduction] Reconstruction rmse: {}".format(rmse))
    new_phi = U.reshape(phi.shape[0], phi.shape[1], sp)
    new_theta = Vt.dot(theta)
    new_mu = new_phi.dot(new_theta)
    mu_rmse = np.sqrt(np.mean(np.abs(mu - new_mu) ** 2))
    print("[Dim reduction] mu rmse: {}".format(mu_rmse))

    np.savez_compressed('lastfm_k{0}_d{1}_maxdev{2}.npz'.format(n_items, sp, max_dev), features=new_phi, theta=new_theta)
    
    return mu

In [None]:
# generate datasets

X, y = [], []
for t in range(n_users):
    for z in range(n_items):
        feat = np.concatenate([U[t], Vt[:, z]]).ravel()
        X.append(feat)
        y.append(ratings[t, z])
X = np.array(X)
X = (X - np.mean(X, axis=0, keepdims=True)) / np.std(X, axis=0, keepdims=True)
y = np.array(y)

In [None]:
hidden = [256, 256]
ds = [8, 16, 32, 64]
test_size = 0.25
nets = {}

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)

best_d = None
max_score = 0

for j in ds:
    size = hidden + [j]
    print("Training NN -- Size {0}".format(size))
    net = MLPRegressor(hidden_layer_sizes=size, max_iter=500, tol=1e-4, verbose=True).fit(X_train, y_train)
    print("R^2 full (size {0}): {1}".format(j, net.score(X, y)))
    score = net.score(X_test, y_test)
    print("R^2 test (size {0}): {1}".format(j, score))
    if score > max_score:
        max_score = score
        best_d = j
    print("Best so so far: d={0} with R^2: {1}".format(best_d, max_score))
    print()
    print("Saving model...")
    save_model(net)
    print()

print("Maximum R^2: {0} - d={1}".format(max_score, best_d))