In [1]:
import pandas as pd
import numpy as np
import os.path
import recsys as rs
import notipy
import json
import math
import sys
import TopSimilarRecommender as TSR
import ItemBasedRecommender as IBR
import UserBasedRecommender as UBR
import random
from scipy import sparse as sps
import importlib

# Test split

In [2]:
data = pd.read_csv('Data/train_final.csv','\t')
tracks = pd.read_csv('Data/tracks_final.csv','\t')
pl_info = pd.read_csv('Data/playlists_final.csv','\t')
tgt_playlists = pd.read_csv('Data/target_playlists.csv','\t')
tgt_tracks = pd.read_csv('Data/target_tracks.csv','\t')

In [3]:
train, test, tgt_tracks, tgt_playlists = rs.train_test_split_interface(data, 10, 20, 5, 2517)

100%|██████████| 45649/45649 [05:32<00:00, 137.14it/s]


# Models fitting

### Parameters

In [4]:
fit_dict_tsr = {'attributes' : ['artist_id', 'album', 'playcount'],
                'n_min_attr' : 90,
                'idf' : True,
                'measure' : 'dot',
                'shrinkage' : 0,
                'n_el_sim' : 65}

fit_dict_ibr = {'idf' : True,
                'measure' : 'dot',
                'shrinkage' : 10,
                'n_el_sim' : 65}

fit_dict_ubr = {'idf' : True,
            'measure' : 'imp_cos',
            'shrinkage' : 20,
            'n_el_sim' : 50}

alpha1=0.3
alpha2=0.2

beta1=0.6
beta2=0.01

In [6]:
ibr = IBR.ItemBasedRecommender(**fit_dict_ibr)
ibr.fit(tracks,train,tgt_tracks)

  0%|          | 33/45649 [00:00<02:19, 326.05it/s]

Calculated Indices


100%|██████████| 45649/45649 [05:05<00:00, 149.55it/s]


(45649, 100000)
Model URM built
Model URM regularized with IDF!


100%|██████████| 100000/100000 [1:03:11<00:00, 26.38it/s]

(100000, 30319)





In [7]:
tsr= TSR.TopSimilarRecommender(**fit_dict_tsr)
tsr.fit(tracks,tgt_tracks)

Fixed dataset
Calculated Indices


  0%|          | 35/100000 [00:00<04:51, 342.77it/s]

ICM built
ICM regularized with IDF!


100%|██████████| 100000/100000 [41:58<00:00, 39.71it/s]

Similarity built





In [8]:
ubr = UBR.UserBasedRecommender(**fit_dict_ubr)
ubr.fit(tracks, train, tgt_playlists)

  0%|          | 0/45649 [00:00<?, ?it/s]

Calculated Indices


100%|██████████| 45649/45649 [04:55<00:00, 154.62it/s]


(45649, 100000)
Model URM built
Model URM regularized with IDF!


100%|██████████| 45649/45649 [14:03<00:00, 54.11it/s]


Similarity built


# Auxiliary structures

In [8]:
IX_items, IX_tgt_items, IX_tgt_playlists, _ = rs.create_sparse_indexes(tracks_info=tracks, playlists=tgt_playlists, tracks_reduced=tgt_tracks)
_, _, IX_playlists, _ = rs.create_sparse_indexes(playlists=train)

URM = rs.create_tgt_URM(IX_tgt_playlists, IX_items, train)
URM = URM.tocsr()

UBR_URM = rs.create_UBR_URM(IX_playlists, IX_tgt_items, train)
UBR_URM = UBR_URM.tocsr()

100%|██████████| 9129/9129 [00:35<00:00, 255.29it/s]
 16%|█▌        | 7103/45649 [00:47<04:16, 150.55it/s]

KeyboardInterrupt: 

 16%|█▌        | 7103/45649 [01:00<05:25, 118.35it/s]

In [10]:
ubr.S = ubr.S.T

In [None]:
S_ensemble = rs.merge_similarities(tsr.S, ibr.S, beta1)

# Recommendation with only ratings level combination

In [12]:
recommendetions = np.array([])
URM = URM.tocsr()
tsr.S = tsr.S.tocsr()
ibr.S = ibr.S.tocsr()
ubr.S = ubr.S.tocsr()
div_t = tsr.S.sum(axis=0)
div_i = ibr.S.sum(axis=0)
div_u = ubr.S.sum(axis=0)
H_t=10
H_i=30
H_u=20

for p in IX_tgt_playlists.values:
    avg_sims_t = (URM[p,:].dot(tsr.S).toarray().ravel())/(div_t+H_t)
    avg_sims_i = (URM[p,:].dot(ibr.S).toarray().ravel())/(div_i+H_i)
    avg_sims_u = (ubr.S[p,:].multiply(1/(div_u+H_u)).dot(UBR_URM).toarray().ravel())
    
    avg_sims = np.array(avg_sims_t*alpha1 + avg_sims_i*alpha2 + avg_sims_u*(1-alpha1-alpha2)).ravel()
    
    top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, tsr.IX_tgt_items, False, False)
    recommendetions = np.append(recommendetions, rs.sub_format(top))
    if (p % 1000 == 0):
        print('Recommended ' + str(p) + ' users over ' + str(IX_tgt_playlists.values.shape[0]))

rec_only_ratings =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})

Recommended 0 users over 9129
Recommended 1000 users over 9129
Recommended 2000 users over 9129
Recommended 3000 users over 9129
Recommended 4000 users over 9129
Recommended 5000 users over 9129
Recommended 6000 users over 9129
Recommended 7000 users over 9129
Recommended 8000 users over 9129
Recommended 9000 users over 9129


# Recommendetion with S matrix and ratings level combination

In [13]:
recommendetions = np.array([])
ubr.S = ubr.S.tocsr()
div_e = S_ensemble.sum(axis=0)
div_u = ubr.S.sum(axis=0)
H_e=30
H_u=20

for p in IX_tgt_playlists.values:
    avg_sims_e = (URM[p,:].dot(S_ensemble).toarray().ravel())#/(div_e+H_e)
    avg_sims_u = (ubr.S[p,:].multiply(1/(div_u+H_u)).dot(UBR_URM).toarray().ravel())
    
    avg_sims = np.array(avg_sims_e*beta2 + avg_sims_u*(1-beta2)).ravel()
    
    top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, tsr.IX_tgt_items, False, False)
    recommendetions = np.append(recommendetions, rs.sub_format(top))
    if (p % 1000 == 0):
        print('Recommended ' + str(p) + ' users over ' + str(IX_tgt_playlists.values.shape[0]))

rec_bilevel =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})

Recommended 0 users over 9129
Recommended 1000 users over 9129
Recommended 2000 users over 9129
Recommended 3000 users over 9129
Recommended 4000 users over 9129
Recommended 5000 users over 9129
Recommended 6000 users over 9129
Recommended 7000 users over 9129
Recommended 8000 users over 9129
Recommended 9000 users over 9129


# Evaluation

In [14]:
map_eval_only_ratings = rs.evaluate(rec_only_ratings, test, 'MAP')

In [15]:
map_eval_bilevel = rs.evaluate(rec_bilevel, test, 'MAP')

In [16]:
print(map_eval_only_ratings)
print(map_eval_bilevel)

0.08813378610289664
0.08361923540365943


# Saving run data

In [17]:
run_data = {'recommender_1' : tsr.__class__.__name__,
            'recommender_2' : ibr.__class__.__name__,
            'recommender_3' : ubr.__class__.__name__,
            'fit_parameters_1' : fit_dict_tsr,
            'fit_parameters_2' : fit_dict_ibr,
            'fit_parameters_3' : fit_dict_ubr,
            'alpha1_only_ratings' : alpha1,
            'alpha2_only_ratings' : alpha2,
            'beta1_bilevel' : beta1,
            'beta2_bilevel' : beta2,
            'evaluation_only_ratings' : map_eval_only_ratings,
            'evaluation_bilevel' : map_eval_bilevel}

In [18]:
with open('runs_data.json', 'a') as fp:
    json.dump(run_data, fp, indent=2)
    fp.write('\n')

# Save similarities for reuse

In [19]:
sps.save_npz('BuiltStructures/cbf_Smatrix_Luca.npz', tsr.S)
sps.save_npz('BuiltStructures/ibr_Smatrix_Luca.npz', ibr.S)
sps.save_npz('BuiltStructures/ubr_Smatrix_Luca.npz', ubr.S)

# Auxiliary code to speed up computation

In [None]:
ibr = IBR.ItemBasedRecommender(**fit_dict_ibr)
tsr= TSR.TopSimilarRecommender(**fit_dict_tsr)
ubr = UBR.UserBasedRecommender(**fit_dict_ubr)
tsr.S = sps.load_npz('BuiltStructures/cbf_Smatrix_Luca.npz')
ibr.S = sps.load_npz('BuiltStructures/ibr_Smatrix_Luca.npz')
ubr.S = sps.load_npz('BuiltStructures/ubr_Smatrix_Luca.npz')