## Imports 

In [None]:
# Make the import
import os, sys
import pandas as pd
import joblib
import numpy as np
from caserec.utils.split_database import SplitDatabase
import matplotlib.pyplot as plt
%matplotlib inline 
from IPython.display import clear_output

lib_path = './../Sources/Utilities'
if (lib_path not in sys.path):
    sys.path.append(lib_path) #src directory

from messaging.print_functions import ProgressBar
from messaging.telegrambot import Bot
bot = Bot(user_credentials='./JFGS.json')

# Checking if bot is ok
bot.send_message(text="Hello, John")
progbar = ProgressBar(bar_length=20, bar_fill='#', elapsed_time=True)

## Setting Dataset

In [None]:
# dataset, dataset_type = 'MovieLens', '100k'
# dataset, dataset_type = 'BookCrossing', 'Standard'
# dataset, dataset_type = 'Amazon', 'MoviesTV'
dataset, dataset_type = 'Amazon', 'InstantVideo'
# dataset, dataset_type = 'Jester', 'jester'

dataset_folder = "../Datasets/" + dataset + "/" + dataset_type + "/"
dataset_output_folder = dataset_folder + 'outputs/'

df_overall_sparsity = pd.read_csv(dataset_output_folder + 'df_overall_sparsity.tsv', sep='\t', header=0)
df_overall_sparsity.head()

In [None]:
sparsity_datasets_folder = dataset_output_folder + 'sparsity_datasets/'
sparsity_folders = os.listdir(sparsity_datasets_folder)
if 'desktop.ini' in sparsity_folders:    
    sparsity_folders.remove('desktop.ini')

## Creating Evaluation Function

In [None]:

def eval_model(**kwargs):
    
    if (kwargs['model_name'].lower() == 'item-knn'):
        from caserec.recommenders.item_recommendation.itemknn import ItemKNN

        model = ItemKNN(
                        train_file=kwargs['train_filepath'], 
                        test_file=kwargs['test_filepath'], 
        #                 as_binary=True, # If True, the explicit feedback will be transform to binary
                        k_neighbors=kwargs['k_neighbors'],
                        similarity_metric=kwargs['similarity_metric'],
                        rank_length=kwargs['top_n'])

        model.compute(
            metrics=None, 
            as_table=True,
            n_ranks=[kwargs['top_n']],
            verbose=False)
        
        eval_results = model.evaluation_results

    elif (kwargs['model_name'].lower() == 'nnmf'):  
        from caserec.recommenders.rating_prediction.nnmf import NNMF
        from caserec.utils.process_data import ReadFile
        from caserec.evaluation.rating_prediction import RatingPredictionEvaluation
        
        predictions_output_filepath = './predictions_output.dat'
        
        model = NNMF(kwargs['train_filepath'], 
                      kwargs['test_filepath'], 
                      factors=kwargs['n_factors'],
                     output_file = predictions_output_filepath)
        
        model.compute(verbose=False)
        
        # Using ReadFile class to read predictions from file
        reader = ReadFile(input_file=predictions_output_filepath)
        predictions = reader.read()
        
        # Creating evaluator with item-recommendation parameters
        evaluator = RatingPredictionEvaluation(sep = '\t', 
                                               n_rank = [kwargs['top_n']], 
                                               as_rank = True,
                                               metrics = list(kwargs['metrics']))

        # Getting evaluation
        eval_results = evaluator.evaluate(predictions['feedback'], model.test_set)
        
        
    elif (kwargs['model_name'].lower() == 'bprmf'):  
        from caserec.recommenders.item_recommendation.bprmf import BprMF
        
        model = BprMF(kwargs['train_filepath'], 
                      kwargs['test_filepath'], 
                      batch_size=30, 
                      rank_length = kwargs['top_n'])
        
        model.compute(
            metrics=None, 
            as_table=True)
        
        eval_results = model.evaluation_results

    eval_results['uss_limit'] = kwargs['uss_limit']
    eval_results['iss_limit'] = kwargs['iss_limit']

    return eval_results    


## Creating list of argument to evaluation function

In [None]:
top_n = 10
k_neighbors = 30
n_factors = 30
similarity_metric= 'cosine'
metrics = ('PREC', 'RECALL', 'NDCG', 'MAP')
model_name = 'nnmf'

arr_eval_args = []

for index, row in df_overall_sparsity.iterrows():    
    progbar.update_progress(index/float(df_overall_sparsity.shape[0]))
        
#     if (index > 15):
#         break
        
    fold_num = 0
    n_folds = 2
    
    uss_limit = row['uss_limit']
    iss_limit = row['iss_limit']
    
    target_folder = 'usslimit_{}_isslimit_{}'.format(uss_limit, iss_limit)

    if target_folder not in sparsity_folders:
        print ("Error findind " + target_folder + " folder")    
        break
    else:        
        # Visualize file content
        ratings_filepath = sparsity_datasets_folder + target_folder + '/u.data'
        cross_validation_folder = sparsity_datasets_folder + target_folder + '/'

        try:
        
            SplitDatabase(input_file=ratings_filepath, dir_folds = cross_validation_folder, n_splits=n_folds).k_fold_cross_validation()

        except : 
            print ("Erro em ", ratings_filepath)
            break
            
        fold_path = cross_validation_folder + 'folds/' + str(fold_num)

        train_filepath = fold_path + '/train.dat'
        test_filepath = fold_path + '/test.dat'

        temp_eval_args = {'uss_limit': uss_limit,
                          'iss_limit': iss_limit,
                          'model_name': model_name, 
                          'train_filepath': train_filepath, 
                          'test_filepath': test_filepath, 
                          'top_n': top_n, 
#                           'k_neighbors': k_neighbors, 
                          'n_factors': n_factors,
                          'similarity_metric': similarity_metric,
                          'metrics': metrics}

        arr_eval_args.append(temp_eval_args)

# del sparsity_folders   
del df_overall_sparsity
del sparsity_folders        
text = "Finished creating arr_eval_args for {} on {}/{} dataset in {}".format(model_name, dataset, dataset_type, progbar.get_elapsed_time())
bot.send_message(text=text)
joblib.dump(arr_eval_args, dataset_output_folder + 'arr_eval_args_' + model_name + '.joblib')

### Testing evaluation function

In [None]:
os.listdir('../Datasets/Amazon/InstantVideo/outputs/sparsity_datasets/usslimit_1.0_isslimit_1.0')

In [None]:
arr_eval_args[-1]

In [None]:
eval_model(**arr_eval_args[-1])

In [None]:
joblib.dump(arr_eval_args, dataset_output_folder + 'arr_eval_args_' + model_name + '.joblib')


In [None]:
arr_eval_args = joblib.load(dataset_output_folder + 'arr_eval_args_' + model_name + '.joblib')

In [None]:

max_evals = len(arr_eval_args)
for index, row in enumerate(arr_eval_args):
    current_save = int(10*index/float(max_evals)) 
    
    clear_output()
    update_progress(index/float(max_evals))    

    print (row)
    
#     if (index >= 15):
#         break
    
    eval_result = eval_model(**arr_eval_args[index])

    if (index == 0):
        colnames = list(eval_result.keys())
        df_eval_metadata = pd.DataFrame(columns = colnames)
        
    df_eval_metadata.loc[index] = [eval_result[x] for x in colnames]

text = "Finished creating df_eval_metadata for {} on {}/{} dataset".format(model_name, dataset, dataset_type)
bot.sendMessage(bot_credentials['chat_id'], text=text)

In [None]:
df_eval_metadata.tail()

### Checking memory usage

In [None]:
import sys

# These are the usual ipython objects, including this one you are creating
ipython_vars = ['In', 'Out', 'exit', 'quit', 'get_ipython', 'ipython_vars']

# Get a sorted list of the objects and their sizes
sorted([(x, sys.getsizeof(globals().get(x))) for x in dir() if not x.startswith('_') and x not in sys.modules and x not in ipython_vars], key=lambda x: x[1], reverse=True)

## Plotting Results

In [None]:
uss_limits = df_eval_metadata['uss_limit'].unique()
iss_limits = df_eval_metadata['iss_limit'].unique()

# column = 'MAP@10'

for column in ['PREC@' + str(top_n), 'RECALL@' + str(top_n), 'NDCG@' + str(top_n), 'MAP@' + str(top_n)]:

    arr_prec = np.zeros([len(uss_limits), len(iss_limits)])
    for uss_index, uss_limit in enumerate(uss_limits):
        for iss_index, iss_limit in enumerate(iss_limits):
            arr_prec[uss_index, iss_index] = df_eval_metadata[(df_overall_sparsity['uss_limit'] == uss_limit) & (df_overall_sparsity['iss_limit'] == iss_limit)][column].reset_index(drop = True)[0]

    joblib.dump(arr_prec, dataset_output_folder + 'arr_' + column.lower() + '_' + model_name + '.joblib')        

In [None]:
for column in ['PREC@' + str(top_n), 'RECALL@' + str(top_n), 'NDCG@' + str(top_n), 'MAP@' + str(top_n)]:

    arr_prec = joblib.load(dataset_output_folder + 'arr_' + column.lower() + '_' + model_name + '.joblib')        

    cmapping = "jet"
    fig, ax = plt.subplots(figsize=(10,10))

    # plt.subplot(1, 1, 1)
    cax = plt.imshow(arr_prec, cmap=cmapping)
    plt.gca().invert_yaxis()
    cbar = plt.colorbar(cax, ticks = [x/100.0 for x in np.arange(0,1000,10)], shrink = 0.83)
    # plt.colorbar.set_label('OS', labelpad=-50,  y=1.05, rotation=0, fontsize = label_fontsize)

    tick_step = int(10)

    ax.set_xticklabels(uss_limits[0:len(uss_limits):tick_step])
    ax.set_yticklabels(iss_limits[0:len(uss_limits):tick_step])

    plt.xticks(np.arange(0, len(uss_limits), tick_step))
    plt.yticks(np.arange(0, len(iss_limits), tick_step))

    plt.clim(0, 1)


    tick_fontsize = 20
    label_fontsize = 25

    # # Setting Labels
    ax.set_xlabel('Last User Specific Sparsity', fontsize = label_fontsize)
    ax.set_ylabel('Last Item Specific Sparsity', fontsize = label_fontsize)
    # cbar = fig.colorbar(cax, ticks = [x/100.0 for x in np.arange(0,1000,5)])

    plt.xticks(rotation = 'vertical')
    ax.tick_params(axis='both', which='major', labelsize=tick_fontsize)
    cbar.set_label(column.title(), labelpad=-50,  y=1.08, rotation=0, fontsize = label_fontsize)
    cbar.ax.tick_params(labelsize = tick_fontsize)


    # Saving figure
    filename = '2d-' + column + '-' + model_name + '.png';
    fullpath = dataset_output_folder+'Figures/';
    print ("[*] Saving " + filename + " figure to " + fullpath + " folder...")
    fig.savefig(fullpath + filename, bbox_inches = 'tight')
    print ("[+] Results saved.")
    
    bot.send_photo(bot_credentials['chat_id'], photo=open(fullpath + filename, 'rb'))

In [None]:
from multiprocessing import Pool

def f(x):
    return x*x

pool = Pool(processes=4)              # start 4 worker processes

# # print "[0, 1, 4,..., 81]"
print (pool.map(f, [1]))

In [None]:
eval_model(**temp_eval_args)    

In [None]:
df_ratings.head()

In [None]:
sparsity_folders

In [None]:
# filepath = "{}ratings.csv".format(folder)
# output_folder = '../Datasets/' + dataset + '/' + dataset_type + '/outputs/'
# dataset_output_folder = output_folder + 'sparsity_dataset/'

# # Visualize file content
# # df_whole = pd.read_csv(filepath, sep='\t', header=0, names=['user_id', 'item_id', 'rating', 'timestamp']) 
# df_whole = pd.read_csv(filepath, sep='\t', header=0) 
# df_whole.head()

In [None]:

if not os.path.exists(dataset_output_folder):
    os.makedirs(dataset_output_folder)

In [None]:
from multiprocessing import Pool

def f(x):
    return x*x

pool = Pool(processes=4)              # start 4 worker processes

# print "[0, 1, 4,..., 81]"
print (pool.map(f, range(10)))