In [1]:
import pickle
from lightfm import LightFM
from scipy.io import mmread
from collections import defaultdict
import logging
from sklearn.metrics import roc_auc_score


logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s',
                   level=logging.INFO)



In [None]:
def measure_auc(X, model):
    preds = model.predict(user_ids=X.row,
                  item_ids=X.col,
                  user_features=member_features,
                  item_features=song_features)
    return roc_auc_score(X.data, preds)

In [2]:
X_train = mmread('data/processed/X_train.mtx')
X_val = mmread('data/processed/X_validation.mtx')
member_features = mmread('data/processed/member_X.mtx')
song_features = mmread('data/processed/song_X.mtx')

In [None]:
alphas = [3**x for x in range(-6, 0)]
components = [1, 5, 10]
iterations = [5, 20]
results = []

In [None]:
    
for a in alphas:
    for nc in components:
        logging.info('fitting alpha = %f, %d components', a, nc)
        these_results = {'alpha': a, 'no_components': nc}
        model = LightFM(no_components=nc,
                        learning_schedule='adagrad',
                        loss='logistic',
                        user_alpha=a,
                        item_alpha=a)
        model.fit_partial(X_train,
                     user_features=member_features,
                     item_features=song_features,
                     epochs=iterations[0],
                     verbose=False)
        these_results['train_auc%d' % iterations[0]] = measure_auc(X_train, model)
        these_results['val_auc%d' % iterations[0]] = measure_auc(X_val, model)
        # fit rest of iterations
        model.fit_partial(X_train,
                     user_features=member_features,
                     item_features=song_features,
                     epochs=iterations[1] - iterations[0],
                     verbose=False)
        these_results['train_auc%d' % iterations[1]] = measure_auc(X_train, model)
        these_results['val_auc%d' % iterations[1]] = measure_auc(X_val, model)
        results.append(these_results)

2017-09-30 11:48:01,171 INFO:fitting alpha = 0.001372, 1 components
2017-09-30 11:50:52,958 INFO:fitting alpha = 0.001372, 5 components
2017-09-30 11:54:39,906 INFO:fitting alpha = 0.001372, 10 components
2017-09-30 12:00:23,100 INFO:fitting alpha = 0.004115, 1 components
2017-09-30 12:03:15,263 INFO:fitting alpha = 0.004115, 5 components
2017-09-30 12:09:30,289 INFO:fitting alpha = 0.004115, 10 components
2017-09-30 12:18:36,095 INFO:fitting alpha = 0.012346, 1 components
2017-09-30 12:20:49,781 INFO:fitting alpha = 0.012346, 5 components
2017-09-30 12:24:37,918 INFO:fitting alpha = 0.012346, 10 components
2017-09-30 12:30:33,562 INFO:fitting alpha = 0.037037, 1 components
2017-09-30 12:33:05,887 INFO:fitting alpha = 0.037037, 5 components
2017-09-30 12:38:07,355 INFO:fitting alpha = 0.037037, 10 components
2017-09-30 12:45:59,805 INFO:fitting alpha = 0.111111, 1 components
2017-09-30 12:50:33,273 INFO:fitting alpha = 0.111111, 5 components
2017-09-30 13:04:18,932 INFO:fitting alpha =

This took XXX minutes single-threaded