In [1]:
import pandas as pd
import numpy as np
import os.path
import recsys as rs
import notipy
import json
import math
import sys
import TopSimilarRecommender as TSR
import ItemBasedRecommender as IBR
import UserBasedRecommender as UBR
import random
from scipy import sparse as sps
import importlib

# Test split

In [2]:
data = pd.read_csv('Data/train_final.csv','\t')
tracks = pd.read_csv('Data/tracks_final.csv','\t')
pl_info = pd.read_csv('Data/playlists_final.csv','\t')
tgt_playlists = pd.read_csv('Data/target_playlists.csv','\t')
tgt_tracks = pd.read_csv('Data/target_tracks.csv','\t')

In [3]:
train, test, tgt_tracks, tgt_playlists = rs.train_test_split_interface(data, 10, 20, 5, 2517)

100%|██████████| 45649/45649 [05:13<00:00, 145.69it/s]


# Models fitting

### Parameters

In [4]:
fit_dict_tsr = {'attributes' : ['artist_id', 'album', 'playcount'],
                'n_min_attr' : 90,
                'idf' : True,
                'measure' : 'dot',
                'shrinkage' : 0,
                'n_el_sim' : 65}

fit_dict_ibr = {'idf' : True,
                'measure' : 'dot',
                'shrinkage' : 10,
                'n_el_sim' : 65}

fit_dict_ubr = {'idf' : True,
            'measure' : 'imp_cos',
            'shrinkage' : 20,
            'n_el_sim' : 50}

## Precomputed similarities loading

In [5]:
ibr = IBR.ItemBasedRecommender(**fit_dict_ibr)
tsr= TSR.TopSimilarRecommender(**fit_dict_tsr)
ubr = UBR.UserBasedRecommender(**fit_dict_ubr)
tsr.S = sps.load_npz('BuiltStructures/cbf_Smatrix_Luca.npz')
ibr.S = sps.load_npz('BuiltStructures/ibr_Smatrix_Luca.npz')
ubr.S = sps.load_npz('BuiltStructures/ubr_Smatrix_Luca.npz')

# Auxiliary structures

In [6]:
IX_items, IX_tgt_items, IX_tgt_playlists, _ = rs.create_sparse_indexes(tracks_info=tracks, playlists=tgt_playlists, tracks_reduced=tgt_tracks)
_, _, IX_playlists, _ = rs.create_sparse_indexes(playlists=train)

URM = rs.create_tgt_URM(IX_tgt_playlists, IX_items, train)
URM = URM.tocsr()

UBR_URM = rs.create_UBR_URM(IX_playlists, IX_tgt_items, train)
UBR_URM = UBR_URM.tocsr()

100%|██████████| 9129/9129 [00:33<00:00, 270.55it/s]
100%|██████████| 45649/45649 [05:48<00:00, 131.07it/s]


# Iterative ensemble parameters fast evaluation

## Cicle setup

In [7]:
alpha1 = [0.3, 0.3, 0.3, 0.3, 0.2, 0.1, 0.4, 0.05, 0.4, 0.4, 0.4, 0.4, 0.5, 0.2, 0.1, 0.6]
alpha2 = [0.2, 0.1, 0.3, 0.05, 0.2, 0.2, 0.2, 0.2, 0.3, 0.2, 0.4, 0.1, 0.3, 0.3, 0.3, 0.3]
beta1 = [0.6, 0.6, 0.6, 0.6, 0.5, 0.7, 0.4, 0.8, 0.7, 0.7, 0.7, 0.7, 0.6, 0.5, 0.7, 0.8]
beta2 = [0.01, 0.001, 0.1, 0.05, 0.01, 0.01, 0.01, 0.01, 0.001, 0.1, 0.15, 0.05, 0.15, 0.15, 0.15, 0.15]

## Iteration

In [9]:
for i in range(len(alpha1)):
    S_ensemble = rs.merge_similarities(tsr.S, ibr.S, beta1[i])

    #Recommendation with only ratings level combination

    recommendetions = np.array([])
    div_t = tsr.S.sum(axis=0)
    div_i = ibr.S.sum(axis=0)
    div_u = ubr.S.sum(axis=0)
    H_t=10
    H_i=30
    H_u=20
    for p in IX_tgt_playlists.values:
        avg_sims_t = (URM[p,:].dot(tsr.S).toarray().ravel())/(div_t+H_t)
        avg_sims_i = (URM[p,:].dot(ibr.S).toarray().ravel())/(div_i+H_i)
        avg_sims_u = (ubr.S[p,:].multiply(1/(div_u+H_u)).dot(UBR_URM).toarray().ravel())
        avg_sims = np.array(avg_sims_t*alpha1[i] + avg_sims_i*alpha2[i] + avg_sims_u*(1-alpha1[i]-alpha2[i])).ravel()
        top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, IX_tgt_items, False, False)
        recommendetions = np.append(recommendetions, rs.sub_format(top))
    rec_only_ratings =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})


    #Recommendetion with S matrix and ratings level combination

    recommendetions = np.array([])
    div_e = S_ensemble.sum(axis=0)
    H_e=30
    H_u=20
    for p in IX_tgt_playlists.values:
        avg_sims_e = (URM[p,:].dot(S_ensemble).toarray().ravel())#/(div_e+H_e)
        avg_sims_u = (ubr.S[p,:].multiply(1/(div_u+H_u)).dot(UBR_URM).toarray().ravel())
        avg_sims = np.array(avg_sims_e*beta2[i] + avg_sims_u*(1-beta2[i])).ravel()
        top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, IX_tgt_items, False, False)
        recommendetions = np.append(recommendetions, rs.sub_format(top))
    rec_bilevel =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})


    # Evaluation

    map_eval_only_ratings = rs.evaluate(rec_only_ratings, test, 'MAP')
    map_eval_bilevel = rs.evaluate(rec_bilevel, test, 'MAP')


    # Saving run data

    run_data = {'recommender_1' : tsr.__class__.__name__,
                'recommender_2' : ibr.__class__.__name__,
                'recommender_3' : ubr.__class__.__name__,
                'fit_parameters_1' : fit_dict_tsr,
                'fit_parameters_2' : fit_dict_ibr,
                'fit_parameters_3' : fit_dict_ubr,
                'alpha1_only_ratings' : alpha1[i],
                'alpha2_only_ratings' : alpha2[i],
                'beta1_bilevel' : beta1[i],
                'beta2_bilevel' : beta2[i],
                'evaluation_only_ratings' : map_eval_only_ratings,
                'evaluation_bilevel' : map_eval_bilevel}

    with open('runs_data.json', 'a') as fp:
        json.dump(run_data, fp, indent=2)
        fp.write('\n')
    
    print('Completed iteration #' + str(i) + ' over ' + str(len(alpha1)))

Completed iteration #0 over 16
Completed iteration #1 over 16
Completed iteration #2 over 16
Completed iteration #3 over 16
Completed iteration #4 over 16
Completed iteration #5 over 16
Completed iteration #6 over 16
Completed iteration #7 over 16
Completed iteration #8 over 16
Completed iteration #9 over 16
Completed iteration #10 over 16
Completed iteration #11 over 16
Completed iteration #12 over 16
Completed iteration #13 over 16
Completed iteration #14 over 16
Completed iteration #15 over 16
