# Kiva collaborative filtering
## Polara framework

***
### Imports

In [1]:
# essentials
import os
import sys
import csv
import itertools
import copy
import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
from scipy.sparse import csr_matrix, lil_matrix

# polara framework
from polara.recommender.data import RecommenderData
from polara.recommender.models import SVDModel, PopularityModel, RandomModel, CooccurrenceModel
from polara.recommender.external.implicit.ialswrapper import ImplicitALS
from polara.recommender.external.implicit.ibprwrapper import ImplicitBPR
from polara.datasets.movielens import get_movielens_data
from polara.evaluation import evaluation_engine as ee
from polara.evaluation.plotting import show_hit_rates, show_precision_recall, show_ranking, show_relevance

# utilities
import codecs
import logging
import time
import tqdm

# visualization libraries
import seaborn as sns
import matplotlib.pyplot as plt

# serialization
import pickle

In [2]:
pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', 300)

In [3]:
%env MKL_NUM_THREADS=1
logging.basicConfig(level=logging.DEBUG)

env: MKL_NUM_THREADS=1


***
### Pickle loads

In [None]:
loans_table = pickle.load(open("pickle/loans_table.p", "rb"))
funded_loans_table = pickle.load(open("pickle/funded_loans_table.p", "rb"))
funded_loan_ids_set = pickle.load(open("pickle/funded_loan_ids_set.p", "rb"))

In [None]:
utility_matrix = pickle.load(open("pickle/utility_matrix.p", "rb"))

In [None]:
utility_matrix_df = pickle.load(open("pickle/utility_matrix_df.p", "rb"))

In [None]:
utility_matrix_df.head()

***
### Dataset reading

In [None]:
loans_table = pd.read_csv('additional-kiva-snapshot/loans.csv')
loans_table = loans_table.sort_values(by='raised_time')

In [None]:
funded_loans_table = loans_table[loans_table.status == 'funded']

In [None]:
START_DATE = '2013-10-01'
END_DATE = '2015-05-01'

mask = (funded_loans_table['raised_time'] > START_DATE) & (funded_loans_table['raised_time'] <= END_DATE)
funded_loans_table = funded_loans_table.loc[mask]
del mask

funded_loan_ids_set = set(funded_loans_table['loan_id'])

In [None]:
# pickle.dump(loans_table, open("pickle/loans_table.p", "wb"))
# pickle.dump(funded_loans_table, open("pickle/funded_loans_table.p", "wb"))
# pickle.dump(funded_loan_ids_set, open("pickle/funded_loan_ids_set.p", "wb"))

In [None]:
# free ram
del loans_table
del funded_loans_table

In [None]:
loans = set()
lenders = set()
loans_lenders_dict = {}

with open('additional-kiva-snapshot/loans_lenders.csv', newline='', encoding="utf8") as csvfile:
    csv_reader = csv.reader(csvfile)
    line_num = 0
    for row in csv_reader:
        if line_num == 0:
            line_num += 1
            continue
        loan_id, lender_ids = row
        loan_id = int(loan_id)
        if loan_id not in funded_loan_ids_set:
            continue
        
        loans.add(loan_id)
        new_lenders = set(lender_ids.split(", "))
        loans_lenders_dict[loan_id] = new_lenders
        lenders.update(new_lenders)
        line_num += 1

loans = list(loans)
lenders = list(lenders)

print('Loans-lenders dict filled')

***
### Utility matrix creation

In [None]:
lenders_reverse_index = {k: v for v, k in enumerate(lenders)}
utility_matrix = lil_matrix((len(loans), len(lenders)), dtype=np.float64)

In [None]:
for loan_index, loan in enumerate(loans):
    for lender in loans_lenders_dict[loan]:
        lender_index = lenders_reverse_index[lender]
        utility_matrix[loan_index, lender_index] = 1.0

print('Filled utility matrix')

In [None]:
utility_matrix = utility_matrix.tocsr()

In [None]:
# pickle.dump(utility_matrix, open("pickle/utility_matrix.p", "wb"))

#### Conversion to SparseDataFrame

In [None]:
utility_matrix_df = pd.SparseDataFrame(utility_matrix)

In [None]:
# pickle.dump(utility_matrix_df, open("pickle/utility_matrix_df.p", "wb"))

In [None]:
def write_matrix_to_csv(matrix, filename):
    if not isinstance(matrix, scipy.sparse.coo_matrix):
        matrix = matrix.tocoo()
    
    with open(filename, 'w', encoding='utf-8') as f:
        f.write("%s,%s,%s\n" % ('itemid', 'userid', 'feedback'))
        for i,j,v in zip(matrix.row, matrix.col, matrix.data):
            f.write("%s,%s,%s\n" % (i, j, v))

#### Load Kiva dataframe

In [4]:
kiva_dataframe = pd.read_csv('kiva_dataframe.csv', engine='c')

In [5]:
kiva_dataframe.shape

(5339455, 3)

In [6]:
kiva_dataframe.head()

Unnamed: 0,itemid,userid,feedback
0,0,7703,1.0
1,0,8354,1.0
2,0,9000,1.0
3,0,10247,1.0
4,0,21284,1.0


***
### Polara example

In [None]:
# define models
ml_data = get_movielens_data(get_genres=False)
data_model = RecommenderData(ml_data, 'userid', 'movieid', 'rating')

# data_model = RecommenderData(kiva_dataframe, 'userid', 'itemid', 'feedback')

svd = SVDModel(data_model)
popular = PopularityModel(data_model)
random = RandomModel(data_model)
models = [svd, popular, random]

metrics = ['ranking', 'relevance'] # metrics for evaluation: NDGC, Precision, Recall, etc.
folds = [1, 2, 3, 4, 5] # use all 5 folds for cross-validation (default)
topk_values = [1, 5, 10, 20, 50] # values of k to experiment with

# run 5-fold CV experiment
result = ee.run_cv_experiment(models, folds, metrics,
                              fold_experiment=ee.topk_test,
                              topk_list=topk_values)

In [None]:
# calculate average values across all folds for e.g. relevance metrics
scores = result.mean(axis=0, level=['top-n', 'model']) # use .std instead of .mean for standard deviation
# scores.xs('nDCG', level='metric', axis=1).unstack('model')

In [None]:
scores

***
### Cross-validation

In [7]:
data_model = RecommenderData(kiva_dataframe, 'userid', 'itemid', 'feedback', seed=0)
data_model.warm_start = False

start = time.time()
data_model.prepare()
logging.debug("Prepared data in %0.2fs", time.time() - start)

Preparing data...
207 unique itemid's within 228 holdout interactions were filtered. Reason: not in the training data.
85452 unique userid's within 136096 holdout interactions were filtered. Reason: not in the training data.
114 of 44225 userid's were filtered out from holdout. Reason: incompatible number of items.
Done.
There are 5070574 events in the training and 132333 events in the holdout.


DEBUG:root:Prepared data in 166.46s


In [8]:
random = RandomModel(data_model)
popular = PopularityModel(data_model)
svd = SVDModel(data_model)

bpr = ImplicitBPR(data_model)
bpr.rank = 200
bpr.num_epochs = 100
bpr.use_gpu = False

Generate a list of ALS models by parameter grid:

In [9]:
def get_base_model():
    model = ImplicitALS(data_model)
    model.epsilon = 1e-8
    model.weight_func = np.log2
    model.use_gpu = False
    return model

def get_grid_models(cv_param_grid):
    models = []
    keys, values = zip(*cv_param_grid.items())
    for v in itertools.product(*values):
        params = dict(zip(keys, v))
        next_model = get_base_model()
        name = 'ALS'
        for k, v in params.items():
            setattr(next_model, k, v)
            name += '_%s-%s' % (k, v)
        
        next_model.method = name
        models.append(next_model)
    return models

Cross-validation setup:

In [10]:
cv_param_grid = {
    'rank': [200, 250],
    'regularization': [0.01],
    'alpha': [50],
    'num_epochs': [20, 50, 100],
}

In [11]:
basic_models = [random, popular, svd, bpr]
als_models = get_grid_models(cv_param_grid)
models = basic_models + als_models

metrics = ['ranking', 'relevance'] # metrics for evaluation: NDGC, Precision, Recall, etc.
folds = [1, 2, 3]
topk_values = [5, 10, 20] # values of k to experiment with

In [12]:
start = time.time()

# run 5-fold CV experiment
result = ee.run_cv_experiment(models, folds, metrics,
                              fold_experiment=ee.topk_test,
                              topk_list=topk_values)


logging.debug("Cross-validation experiment finished in %0.2fs", time.time() - start)

Preparing data...
211 unique itemid's within 234 holdout interactions were filtered. Reason: not in the training data.
85443 unique userid's within 136172 holdout interactions were filtered. Reason: not in the training data.
100 of 44237 userid's were filtered out from holdout. Reason: incompatible number of items.
Done.
There are 5070446 events in the training and 132411 events in the holdout.
PureSVD training time: 3.435s


DEBUG:implicit:Running 100 BPR training epochs
100%|█████████████████████████████████████████████████| 100/100 [05:18<00:00,  4.15s/it, correct=93.81%, skipped=0.73%]


BPR training time: 05m:21s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.200s
DEBUG:implicit:Calculated transpose in 0.169s
DEBUG:implicit:Initialized factors in 2.6365675926208496
DEBUG:implicit:Running 20 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 20.0/20 [02:18<00:00,  6.67s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 training time: 02m:21s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.376s
DEBUG:implicit:Calculated transpose in 0.202s
DEBUG:implicit:Initialized factors in 3.4615726470947266
DEBUG:implicit:Running 50 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 50.0/50 [04:41<00:00,  4.25s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 training time: 04m:45s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.219s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.6556150913238525
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [07:41<00:00,  4.34s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 training time: 07m:45s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 3.735739231109619
DEBUG:implicit:Running 20 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 20.0/20 [02:06<00:00,  5.95s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-20 training time: 02m:11s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 4.033125877380371
DEBUG:implicit:Running 50 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 50.0/50 [05:18<00:00,  6.06s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-50 training time: 05m:23s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 5.210134506225586
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [10:34<00:00,  6.03s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-100 training time: 10m:40s
Evaluated model RND in 256.59s
Evaluated model MP in 107.93s
Evaluated model PureSVD in 169.58s
Evaluated model BPR in 193.76s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 in 185.07s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 in 185.96s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 in 182.36s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-20 in 195.61s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-50 in 196.37s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-100 in 194.43s
Evaluated model RND in 0.50s
Evaluated model MP in 0.50s
Evaluated model PureSVD in 0.50s
Evaluated model BPR in 0.48s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 in 0.52s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 in 0.48s
Evaluated mo

DEBUG:implicit:Running 100 BPR training epochs
100%|█████████████████████████████████████████████████| 100/100 [04:58<00:00,  2.97s/it, correct=93.83%, skipped=0.73%]


BPR training time: 05m:01s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.219s
DEBUG:implicit:Calculated transpose in 0.172s
DEBUG:implicit:Initialized factors in 2.6881814002990723
DEBUG:implicit:Running 20 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 20.0/20 [01:32<00:00,  4.33s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 training time: 01m:36s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.7517035007476807
DEBUG:implicit:Running 50 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 50.0/50 [03:49<00:00,  4.31s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 training time: 03m:53s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.514996290206909
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [07:41<00:00,  4.29s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 training time: 07m:44s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 4.626121520996094
DEBUG:implicit:Running 20 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 20.0/20 [02:06<00:00,  5.87s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-20 training time: 02m:11s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 3.76010799407959
DEBUG:implicit:Running 50 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 50.0/50 [05:13<00:00,  6.07s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-50 training time: 05m:18s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.187s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 4.1262781620025635
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [10:30<00:00,  6.12s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-100 training time: 10m:35s
Evaluated model RND in 251.47s
Evaluated model MP in 106.28s
Evaluated model PureSVD in 182.22s
Evaluated model BPR in 170.79s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 in 184.06s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 in 180.88s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 in 181.74s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-20 in 193.14s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-50 in 194.86s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-100 in 190.34s
Evaluated model RND in 0.47s
Evaluated model MP in 0.49s
Evaluated model PureSVD in 0.48s
Evaluated model BPR in 0.47s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 in 0.47s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 in 0.48s
Evaluated mo

DEBUG:implicit:Running 100 BPR training epochs
100%|█████████████████████████████████████████████████| 100/100 [04:57<00:00,  2.98s/it, correct=93.79%, skipped=0.74%]


BPR training time: 05m:01s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.161s
DEBUG:implicit:Calculated transpose in 0.141s
DEBUG:implicit:Initialized factors in 2.624375581741333
DEBUG:implicit:Running 20 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 20.0/20 [01:30<00:00,  4.25s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 training time: 01m:34s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.173s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 2.9523916244506836
DEBUG:implicit:Running 50 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 50.0/50 [03:53<00:00,  4.40s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 training time: 03m:57s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 2.7805871963500977
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [07:36<00:00,  4.28s/it]


ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 training time: 07m:40s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.172s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 3.6103036403656006
DEBUG:implicit:Running 20 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 20.0/20 [02:07<00:00,  6.01s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-20 training time: 02m:12s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.156s
DEBUG:implicit:Calculated transpose in 0.125s
DEBUG:implicit:Initialized factors in 4.420793771743774
DEBUG:implicit:Running 50 ALS iterations
100%|████████████████████████████████████████████████████████████████████████████████| 50.0/50 [05:16<00:00,  5.98s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-50 training time: 05m:21s


DEBUG:implicit:Converting input to CSR format
DEBUG:implicit:Converted input to CSR in 0.203s
DEBUG:implicit:Calculated transpose in 0.156s
DEBUG:implicit:Initialized factors in 3.5460293292999268
DEBUG:implicit:Running 100 ALS iterations
100%|██████████████████████████████████████████████████████████████████████████████| 100.0/100 [10:31<00:00,  6.10s/it]


ALS_rank-250_regularization-0.01_alpha-50_num_epochs-100 training time: 10m:36s
Evaluated model RND in 252.11s
Evaluated model MP in 106.78s
Evaluated model PureSVD in 159.81s
Evaluated model BPR in 187.59s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 in 191.48s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 in 184.43s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100 in 186.86s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-20 in 227.17s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-50 in 221.90s
Evaluated model ALS_rank-250_regularization-0.01_alpha-50_num_epochs-100 in 212.92s
Evaluated model RND in 0.52s
Evaluated model MP in 0.50s
Evaluated model PureSVD in 0.51s
Evaluated model BPR in 0.52s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20 in 0.51s
Evaluated model ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50 in 0.51s
Evaluated mo

DEBUG:root:Cross-validation experiment finished in 12736.37s


In [13]:
pickle.dump(result, open("eval_results/result_5.p", "wb"))

In [14]:
result

Unnamed: 0_level_0,Unnamed: 1_level_0,type,relevance,relevance,relevance,relevance,relevance,ranking,ranking
Unnamed: 0_level_1,Unnamed: 1_level_1,metric,precision,recall,fallout,specifity,miss_rate,nDCG,nDCL
fold,top-n,model,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
1,5,RND,2.3e-05,3.8e-05,,,0.999962,2.4e-05,
1,5,MP,0.001115,0.001858,,,0.998142,0.001654,
1,5,PureSVD,0.001355,0.002258,,,0.997742,0.002124,
1,5,BPR,0.000517,0.000861,,,0.999139,0.000733,
1,5,ALS_rank-200_regularization-0.01_alpha-50_num_epochs-20,0.004776,0.00796,,,0.99204,0.00724,
1,5,ALS_rank-200_regularization-0.01_alpha-50_num_epochs-50,0.00508,0.008466,,,0.991534,0.007657,
1,5,ALS_rank-200_regularization-0.01_alpha-50_num_epochs-100,0.005139,0.008564,,,0.991436,0.007684,
1,5,ALS_rank-250_regularization-0.01_alpha-50_num_epochs-20,0.005406,0.00901,,,0.99099,0.008191,
1,5,ALS_rank-250_regularization-0.01_alpha-50_num_epochs-50,0.005646,0.00941,,,0.99059,0.008617,
1,5,ALS_rank-250_regularization-0.01_alpha-50_num_epochs-100,0.005637,0.009395,,,0.990605,0.008483,


In [None]:
# holdout_sizes = [1, 2, 5]

# result = ee.run_cv_experiment(models, folds, metrics,
#                               fold_experiment=ee.holdout_test,
#                               holdout_sizes=holdout_sizes)

In [None]:
# calculate average values across all folds for e.g. relevance metrics
scores = result.mean(axis=0, level=['top-n', 'model']) # use .std instead of .mean for standard deviation
# scores.xs('recall', level='metric', axis=1).unstack('model')
scores

***
### Variable sizes

In [14]:
import sys
def sizeof_fmt(num, suffix='B'):
    ''' By Fred Cirera, after https://stackoverflow.com/a/1094933/1870254'''
    for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
        if abs(num) < 1024.0:
            return "%3.1f%s%s" % (num, unit, suffix)
        num /= 1024.0
    return "%.1f%s%s" % (num, 'Yi', suffix)

for name, size in sorted(((name, sys.getsizeof(value)) for name,value in locals().items()),
                         key= lambda x: -x[1])[:10]:
    print("{:>30}: {:>8}".format(name,sizeof_fmt(size)))

               RecommenderData:   2.0KiB
                   ImplicitALS:   2.0KiB
                      SVDModel:   1.4KiB
               PopularityModel:   1.4KiB
                   RandomModel:   1.4KiB
                    csr_matrix:   1.0KiB
                    lil_matrix:   1.0KiB
             CooccurrenceModel:   1.0KiB
                           _i1:   917.0B
                    als_models:   768.0B


In [13]:
del kiva_dataframe