In [51]:
import pickle 
import matplotlib.pyplot as plt

from operator import itemgetter
from blending import blend
from baselines import *
from matrix_factorization import *

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Load common datasets

In [2]:
with open('../data/pickle/train.pickle', 'rb') as file:
    train = pickle.load(file)
with open('../data/pickle/test.pickle', 'rb') as file:
    test = pickle.load(file)

### Train all the models on it

In [5]:
g_mean = global_mean(train)
user_means = compute_user_means(train)
item_means = compute_item_means(train)

with open(b'../data/pickle/blending/g_mean_train.pickle', 'wb') as f:
    pickle.dump(g_mean, f)
with open(b'../data/pickle/blending/user_means_train.pickle', 'wb') as f:
    pickle.dump(user_means, f)
with open(b'../data/pickle/blending/item_means_train.pickle', 'wb') as f:
    pickle.dump(item_means, f)

In [None]:
_, user_features_sgd, item_features_sgd = matrix_factorization_sgd(train, test=None, num_features=25,
                                                                   lambda_user=0.011, lambda_item=0.25,
                                                                   verbose=True, num_epochs=50)
with open(b'../data/pickle/blending/sgd_item_train.pickle', 'wb') as f:
    pickle.dump(item_features_sgd, f)
with open(b'../data/pickle/blending/sgd_user_train.pickle', 'wb') as f:
    pickle.dump(user_features_sgd, f)

In [6]:
_, user_features_als, item_features_als = matrix_factorization_als(train, test=None, num_features=20,
                                                                   lambda_user=0.014, lambda_item=0.575,
                                                                   stop_criterion=0.00001, verbose=True)
with open(b'../data/pickle/blending/als_item_train.pickle', 'wb') as f:
    pickle.dump(item_features_als, f)
with open(b'../data/pickle/blending/als_user_train.pickle', 'wb') as f:
    pickle.dump(user_features_als, f)

Learning the matrix factorization using ALS...


### Load all the trained models

In [3]:
with open(b'../data/pickle/blending/g_mean_train.pickle', 'rb') as f:
    g_mean = pickle.load(f)
with open(b'../data/pickle/blending//user_means_train.pickle', 'rb') as f:
    user_means = pickle.load(f)
with open(b'../data/pickle/blending/item_means_train.pickle', 'rb') as f:
    item_means = pickle.load(f)
with open(b'../data/pickle/blending/als_item_train.pickle', 'rb') as f:
    item_features_als = pickle.load(f)
with open(b'../data/pickle/blending/als_user_train.pickle', 'rb') as f:
    user_features_als = pickle.load(f)
with open(b'../data/pickle/blending/sgd_item_train.pickle', 'rb') as f:
    item_features_sgd = pickle.load(f)
with open(b'../data/pickle/blending/sgd_user_train.pickle', 'rb') as f:
    user_features_sgd = pickle.load(f)

In [4]:
cf_item = load_data('../data/films_CF_train_inf.csv')

In [5]:
cf_user = load_data('../data/users_CF_train_inf.csv')

### Compute blending coefficients

In [6]:
nnz_row, nnz_col = train.nonzero()
nnz_train = list(zip(nnz_row, nnz_col))

nnz_row, nnz_col = test.nonzero()
nnz_test = list(zip(nnz_row, nnz_col))

als = np.zeros(shape=train.shape)
sgd = np.zeros(shape=train.shape)
for i, (item, user) in enumerate(nnz_train):
    als[item, user] = user_features_als[:, user].T.dot(item_features_als[:, item])
    sgd[item, user] = user_features_sgd[:, user].T.dot(item_features_sgd[:, item])    
for i, (item, user) in enumerate(nnz_test):
    als[item, user] = user_features_als[:, user].T.dot(item_features_als[:, item])
    sgd[item, user] = user_features_sgd[:, user].T.dot(item_features_sgd[:, item])

In [7]:
methods = ['als', 'cf_item', 'cf_user']

In [8]:
blend(methods, 1, nnz_train, nnz_test, train, test, 
      g_mean, user_means, item_means, sgd, als, cf_item, cf_user, predict=False)

(array([ 1.02889231,  0.34081636, -0.34453513]),
 0.89749167682417741,
 0.9780226355600159)

### Finding the best combination

In [9]:
all_methods = ['global_mean', 'user_mean', 'item_mean', 'sgd', 'als', 'cf_item', 'cf_user']

In [10]:
def sub_lists(my_list):
    if len(my_list) > 0:
        tmp = sub_lists(my_list[1:])
        return tmp + [[my_list[0]] + k for k in tmp]
    else:
        return [my_list]

In [11]:
l = []
for methods in sub_lists(all_methods):
    if methods == []:
        continue
    w, tr, te = blend(methods, 1, nnz_train, nnz_test, train, test, 
                g_mean, user_means, item_means, sgd, als, cf_item, cf_user, predict=False)
    l.append([tr, te, methods, w])

In [19]:
ll = sorted(l, key=itemgetter(1))

In [195]:
best_methods = ll[1]

In [196]:
best_methods[2]

['als']