# Kiva collaborative filtering
## Polara framework

***
### Imports

In [1]:
# essentials
import os
import sys
import csv
import itertools
import copy
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
from scipy.sparse import csr_matrix, lil_matrix

# polara framework
from polara.recommender.data import RecommenderData
from polara.recommender.models import SVDModel, PopularityModel, RandomModel, CooccurrenceModel
from polara.recommender.external.implicit.ialswrapper import ImplicitALS
from polara.recommender.external.implicit.ibprwrapper import ImplicitBPR
from polara.datasets.movielens import get_movielens_data
from polara.evaluation import evaluation_engine as ee
from polara.evaluation.plotting import show_hit_rates, show_precision_recall, show_ranking, show_relevance

# utilities
import codecs
import logging
import time
import tqdm

# visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt

# serialization
import pickle

In [19]:
pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', 300)

In [3]:
%env MKL_NUM_THREADS=1
logging.basicConfig(level=logging.DEBUG)

env: MKL_NUM_THREADS=1


***
### Pickle loads

In [None]:
loans_table = pickle.load(open("pickle/loans_table.p", "rb"))
funded_loans_table = pickle.load(open("pickle/funded_loans_table.p", "rb"))
funded_loan_ids_set = pickle.load(open("pickle/funded_loan_ids_set.p", "rb"))

In [None]:
utility_matrix = pickle.load(open("pickle/utility_matrix.p", "rb"))

In [None]:
utility_matrix_df = pickle.load(open("pickle/utility_matrix_df.p", "rb"))

In [None]:
utility_matrix_df.head()

***
### Dataset reading

In [None]:
loans_table = pd.read_csv('additional-kiva-snapshot/loans.csv')
loans_table = loans_table.sort_values(by='raised_time')

In [None]:
funded_loans_table = loans_table[loans_table.status == 'funded']

In [None]:
START_DATE = '2013-10-01'
END_DATE = '2015-05-01'

mask = (funded_loans_table['raised_time'] > START_DATE) & (funded_loans_table['raised_time'] <= END_DATE)
funded_loans_table = funded_loans_table.loc[mask]
del mask

funded_loan_ids_set = set(funded_loans_table['loan_id'])

In [None]:
# pickle.dump(loans_table, open("pickle/loans_table.p", "wb"))
# pickle.dump(funded_loans_table, open("pickle/funded_loans_table.p", "wb"))
# pickle.dump(funded_loan_ids_set, open("pickle/funded_loan_ids_set.p", "wb"))

In [None]:
# free ram
del loans_table
del funded_loans_table

In [None]:
loans = set()
lenders = set()
loans_lenders_dict = {}

with open('additional-kiva-snapshot/loans_lenders.csv', newline='', encoding="utf8") as csvfile:
    csv_reader = csv.reader(csvfile)
    line_num = 0
    for row in csv_reader:
        if line_num == 0:
            line_num += 1
            continue
        loan_id, lender_ids = row
        loan_id = int(loan_id)
        if loan_id not in funded_loan_ids_set:
            continue
        
        loans.add(loan_id)
        new_lenders = set(lender_ids.split(", "))
        loans_lenders_dict[loan_id] = new_lenders
        lenders.update(new_lenders)
        line_num += 1

loans = list(loans)
lenders = list(lenders)

print('Loans-lenders dict filled')

***
### Utility matrix creation

In [None]:
lenders_reverse_index = {k: v for v, k in enumerate(lenders)}
utility_matrix = lil_matrix((len(loans), len(lenders)), dtype=np.float64)

In [None]:
for loan_index, loan in enumerate(loans):
    for lender in loans_lenders_dict[loan]:
        lender_index = lenders_reverse_index[lender]
        utility_matrix[loan_index, lender_index] = 1.0

print('Filled utility matrix')

In [None]:
utility_matrix = utility_matrix.tocsr()

In [None]:
# pickle.dump(utility_matrix, open("pickle/utility_matrix.p", "wb"))

#### Conversion to SparseDataFrame

In [None]:
utility_matrix_df = pd.SparseDataFrame(utility_matrix)

In [None]:
# pickle.dump(utility_matrix_df, open("pickle/utility_matrix_df.p", "wb"))

In [None]:
def write_matrix_to_csv(matrix, filename):
    if not isinstance(matrix, scipy.sparse.coo_matrix):
        matrix = matrix.tocoo()
    
    with open(filename, 'w', encoding='utf-8') as f:
        f.write("%s,%s,%s\n" % ('itemid', 'userid', 'feedback'))
        for i,j,v in zip(matrix.row, matrix.col, matrix.data):
            f.write("%s,%s,%s\n" % (i, j, v))

#### Load Kiva dataframe

In [4]:
kiva_dataframe = pd.read_csv('kiva_dataframe.csv', engine='c')

In [5]:
kiva_dataframe.shape

(5339455, 3)

In [6]:
kiva_dataframe.head()

Unnamed: 0,itemid,userid,feedback
0,0,7703,1.0
1,0,8354,1.0
2,0,9000,1.0
3,0,10247,1.0
4,0,21284,1.0


***
### Polara example

In [None]:
# define models
ml_data = get_movielens_data(get_genres=False)
data_model = RecommenderData(ml_data, 'userid', 'movieid', 'rating')

# data_model = RecommenderData(kiva_dataframe, 'userid', 'itemid', 'feedback')

svd = SVDModel(data_model)
popular = PopularityModel(data_model)
random = RandomModel(data_model)
models = [svd, popular, random]

metrics = ['ranking', 'relevance'] # metrics for evaluation: NDGC, Precision, Recall, etc.
folds = [1, 2, 3, 4, 5] # use all 5 folds for cross-validation (default)
topk_values = [1, 5, 10, 20, 50] # values of k to experiment with

# run 5-fold CV experiment
result = ee.run_cv_experiment(models, folds, metrics,
                              fold_experiment=ee.topk_test,
                              topk_list=topk_values)

In [None]:
# calculate average values across all folds for e.g. relevance metrics
scores = result.mean(axis=0, level=['top-n', 'model']) # use .std instead of .mean for standard deviation
# scores.xs('nDCG', level='metric', axis=1).unstack('model')

In [None]:
scores

***
### Cross-validation

In [7]:
data_model = RecommenderData(kiva_dataframe, 'userid', 'itemid', 'feedback', seed=0)
data_model.warm_start = False

start = time.time()
data_model.prepare()
logging.debug("Prepared data in %0.2fs", time.time() - start)

Preparing data...
207 unique itemid's within 228 holdout interactions were filtered. Reason: not in the training data.
85452 unique userid's within 136096 holdout interactions were filtered. Reason: not in the training data.
114 of 44225 userid's were filtered out from holdout. Reason: incompatible number of items.
Done.
There are 5070574 events in the training and 132333 events in the holdout.


DEBUG:root:Prepared data in 132.61s


In [None]:
random = RandomModel(data_model)
popular = PopularityModel(data_model)
svd = SVDModel(data_model)

bpr = ImplicitBPR(data_model)
bpr.rank = 200
bpr.num_epochs = 100
bpr.use_gpu = False

In [None]:
bpr.learning_rate

Generate a list of ALS models by parameter grid:

In [9]:
def get_base_model():
    model = ImplicitALS(data_model)
    model.epsilon = 1e-8
    model.weight_func = np.log2
    model.use_gpu = False
    return model

def get_grid_models(cv_param_grid):
    models = []
    keys, values = zip(*cv_param_grid.items())
    for v in itertools.product(*values):
        params = dict(zip(keys, v))
        next_model = get_base_model()
        name = 'ALS'
        for k, v in params.items():
            setattr(next_model, k, v)
            name += '_%s-%s' % (k, v)
        
        next_model.method = name
        models.append(next_model)
    return models

Cross-validation setup:

In [37]:
cv_param_grid = {
    'rank': [200, 250],
    'regularization': [0.01],
    'alpha': [50],
    'num_epochs': [20, 50, 100],
}

In [38]:
basic_models = [random, popular, svd, bpr]
als_models = get_grid_models(cv_param_grid)
models = basic_models + als_models

metrics = ['ranking', 'relevance'] # metrics for evaluation: NDGC, Precision, Recall, etc.
folds = [1, 2, 3]
topk_values = [5, 10, 20] # values of k to experiment with

In [39]:
start = time.time()

# run 5-fold CV experiment
result = ee.run_cv_experiment(models, folds, metrics,
                              fold_experiment=ee.topk_test,
                              topk_list=topk_values)


logging.debug("Cross-validation experiment finished in %0.2fs", time.time() - start)

Preparing data...
211 unique itemid's within 234 holdout interactions were filtered. Reason: not in the training data.
85443 unique userid's within 136172 holdout interactions were filtered. Reason: not in the training data.
100 of 44237 userid's were filtered out from holdout. Reason: incompatible number of items.
Done.
There are 5070446 events in the training and 132411 events in the holdout.
PureSVD training time: 3.653s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.250s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.2831928730010986
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [01:45<00:00,  2.43s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-40 training time: 01m:47s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.2496850490570068
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [02:39<00:00,  2.50s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-60 training time: 02m:41s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.2653238773345947
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [03:31<00:00,  2.65s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-80 training time: 03m:33s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.3121559619903564
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [04:26<00:00,  2.43s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-100 training time: 04m:28s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.8937618732452393
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [02:33<00:00,  3.40s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-40 training time: 02m:35s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.952660322189331
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [03:49<00:00,  3.46s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-60 training time: 03m:52s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.219s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.8643686771392822
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [05:03<00:00,  3.42s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-80 training time: 05m:05s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.9228496551513672
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [06:12<00:00,  3.53s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-100 training time: 06m:15s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.6883513927459717
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [03:05<00:00,  4.32s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-40 training time: 03m:09s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.203s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.6087849140167236
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [04:42<00:00,  4.28s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-60 training time: 04m:46s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 3.561655282974243
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [06:13<00:00,  4.39s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-80 training time: 06m:17s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.7024545669555664
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [08:04<00:00,  4.33s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 training time: 08m:07s
Evaluated model in 260.15s
Evaluated model in 108.11s
Evaluated model in 168.75s
Evaluated model in 203.08s
Evaluated model in 179.54s
Evaluated model in 181.71s
Evaluated model in 197.90s
Evaluated model in 203.98s
Evaluated model in 174.26s
Evaluated model in 174.17s
Evaluated model in 187.64s
Evaluated model in 218.63s
Evaluated model in 201.76s
Evaluated model in 211.66s
Evaluated model in 195.28s
Evaluated model in 0.50s
Evaluated model in 0.47s
Evaluated model in 0.50s
Evaluated model in 0.48s
Evaluated model in 0.50s
Evaluated model in 0.48s
Evaluated model in 0.48s
Evaluated model in 0.49s
Evaluated model in 0.49s
Evaluated model in 0.48s
Evaluated model in 0.48s
Evaluated model in 0.48s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.48s
Evaluated model in 0.47s
Evaluated model in 0.45s
Evaluated model in 0.45s
Evaluated model in 0.51s
Evaluated model in 0.48s
Evaluated model

DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.2496976852416992
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [01:42<00:00,  2.35s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-40 training time: 01m:44s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.2028393745422363
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [02:40<00:00,  2.36s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-60 training time: 02m:42s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.187s
DEBUG:implicit:Initialized factors in 1.259425401687622
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [03:24<00:00,  2.40s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-80 training time: 03m:26s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.2367868423461914
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [04:23<00:00,  2.49s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-100 training time: 04m:26s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.8745579719543457
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [02:27<00:00,  3.41s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-40 training time: 02m:30s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.9682509899139404
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [03:43<00:00,  3.46s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-60 training time: 03m:46s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.8737566471099854
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [04:56<00:00,  3.53s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-80 training time: 04m:59s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.8588998317718506
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [06:04<00:00,  3.40s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-100 training time: 06m:07s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.158s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.45253586769104
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [03:04<00:00,  4.26s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-40 training time: 03m:07s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.923424482345581
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [04:36<00:00,  4.34s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-60 training time: 04m:40s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 3.392364025115967
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [06:09<00:00,  4.33s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-80 training time: 06m:13s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 3.280439853668213
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [07:37<00:00,  4.40s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 training time: 07m:41s
Evaluated model in 250.93s
Evaluated model in 104.55s
Evaluated model in 179.87s
Evaluated model in 162.09s
Evaluated model in 159.58s
Evaluated model in 160.62s
Evaluated model in 161.12s
Evaluated model in 171.53s
Evaluated model in 180.33s
Evaluated model in 169.47s
Evaluated model in 174.57s
Evaluated model in 184.98s
Evaluated model in 185.06s
Evaluated model in 185.19s
Evaluated model in 180.98s
Evaluated model in 0.46s
Evaluated model in 0.48s
Evaluated model in 0.48s
Evaluated model in 0.48s
Evaluated model in 0.47s
Evaluated model in 0.48s
Evaluated model in 0.49s
Evaluated model in 0.48s
Evaluated model in 0.52s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.45s
Evaluated model in 0.45s
Evaluated model in 0.45s
Evaluated model in 0.46s
Evaluated model in 0.47s
Evaluated model

DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.2965328693389893
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [01:42<00:00,  2.34s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-40 training time: 01m:44s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.23410964012146
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [02:33<00:00,  2.38s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-60 training time: 02m:35s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.2093708515167236
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [03:25<00:00,  2.35s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-80 training time: 03m:27s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.193s
DEBUG:implicit:Calculated transpose in 0.156s
DEBUG:implicit:Initialized factors in 1.2340517044067383
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [04:17<00:00,  2.41s/it]


ALS_rank-100_regularization-0.01_alpha-50_num_epochs-100 training time: 04m:19s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 1.8589308261871338
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [02:24<00:00,  3.35s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-40 training time: 02m:26s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.203s
DEBUG:implicit:Calculated transpose in 0.156s
DEBUG:implicit:Initialized factors in 2.436920642852783
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [03:37<00:00,  3.53s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-60 training time: 03m:40s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.9370312690734863
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [04:49<00:00,  3.41s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-80 training time: 04m:52s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 1.9838993549346924
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [05:59<00:00,  3.33s/it]


ALS_rank-150_regularization-0.01_alpha-50_num_epochs-100 training time: 06m:01s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.203s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 3.3453621864318848
DEBUG:implicit:Running 40 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 40.0/40 [03:06<00:00,  4.37s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-40 training time: 03m:10s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 3.4679253101348877
DEBUG:implicit:Running 60 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 60.0/60 [04:36<00:00,  4.22s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-60 training time: 04m:41s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.5462698936462402
DEBUG:implicit:Running 80 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 80.0/80 [06:04<00:00,  4.38s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-80 training time: 06m:07s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 2.858694314956665
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [07:36<00:00,  4.24s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 training time: 07m:40s
Evaluated model in 254.70s
Evaluated model in 107.96s
Evaluated model in 164.45s
Evaluated model in 156.68s
Evaluated model in 159.96s
Evaluated model in 159.74s
Evaluated model in 159.77s
Evaluated model in 172.31s
Evaluated model in 172.61s
Evaluated model in 169.88s
Evaluated model in 172.45s
Evaluated model in 184.49s
Evaluated model in 185.18s
Evaluated model in 183.39s
Evaluated model in 181.58s
Evaluated model in 0.47s
Evaluated model in 0.45s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.48s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.48s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.47s
Evaluated model in 0.45s
Evaluated model in 0.45s
Evaluated model

DEBUG:root:Cross-validation experiment finished in 17790.02s


In [40]:
pickle.dump(result, open("eval_results/result_4.p", "wb"))

In [None]:
# holdout_sizes = [1, 2, 5]

# result = ee.run_cv_experiment(models, folds, metrics,
#                               fold_experiment=ee.holdout_test,
#                               holdout_sizes=holdout_sizes)

In [None]:
# calculate average values across all folds for e.g. relevance metrics
scores = result.mean(axis=0, level=['top-n', 'model']) # use .std instead of .mean for standard deviation
# scores.xs('recall', level='metric', axis=1).unstack('model')
scores

***
### Variable sizes

In [14]:
import sys
def sizeof_fmt(num, suffix='B'):
    ''' By Fred Cirera, after https://stackoverflow.com/a/1094933/1870254'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

for name, size in sorted(((name, sys.getsizeof(value)) for name,value in locals().items()),
                         key= lambda x: -x[1])[:10]:
    print("{:>30}: {:>8}".format(name,sizeof_fmt(size)))

               RecommenderData:   2.0KiB
                   ImplicitALS:   2.0KiB
                      SVDModel:   1.4KiB
               PopularityModel:   1.4KiB
                   RandomModel:   1.4KiB
                    csr_matrix:   1.0KiB
                    lil_matrix:   1.0KiB
             CooccurrenceModel:   1.0KiB
                           _i1:   917.0B
                    als_models:   768.0B


In [13]:
del kiva_dataframe