In [19]:
import importlib
rs = importlib.reload(rs)

In [2]:
import numpy as np
import pandas as pd
import recsys as rs
import json
import notipy
import random
import TopSimilarRecommender as TSR
import ItemBasedRecommender as IBR
import UserBasedRecommender as UBR

# Train/Test split

In [3]:
train = pd.read_csv('Data/train_final.csv','\t')
tr_info = pd.read_csv('Data/tracks_final.csv','\t')

In [4]:
train, test, tgt_tracks, tgt_playlists = rs.split_train_test(train, 10, 20, 5, 2517)

# Ensemble parameters

In [5]:
fit_dict_tsr = {'attributes' : ['artist_id', 'album', 'playcount'],
                'n_min_attr' : 90,
                'idf' : True,
                'measure' : 'dot',
                'shrinkage' : 0,
                'n_el_sim' : 20}

fit_dict_ibr = {'idf' : True,
                'measure' : 'dot',
                'shrinkage' : 10,
                'n_el_sim' : 20}

fit_dict_ubr = {'idf' : True,
            'measure' : 'imp_cos',
            'shrinkage' : 20,
            'n_el_sim' : 50}

alpha=0.6
beta=0.6

# Models fitting

In [6]:
tsr = TSR.TopSimilarRecommender(**fit_dict_tsr)
tsr.fit(tr_info, tgt_tracks)

Fixed dataset
Calculated Indices
Eliminated low frequency attributes!
ICM built
ICM regularized with IDF!


  return np.log10(n_items / frequencies)
100%|██████████| 100000/100000 [14:47<00:00, 112.69it/s]


Similarity built


In [7]:
ibr = IBR.ItemBasedRecommender(**fit_dict_ibr)
ibr.fit(tr_info, train, tgt_tracks)

  1%|          | 48/4420 [00:00<00:09, 474.50it/s]

Calculated Indices


100%|██████████| 4420/4420 [00:09<00:00, 442.08it/s]
  0%|          | 40/100000 [00:00<04:15, 391.76it/s]

Model URM built
Model URM regularized with IDF!


100%|██████████| 100000/100000 [13:29<00:00, 123.49it/s]


Similarity built


In [None]:
ubr = UBR.UserBasedRecommender(**fit_dict_ubr)
ubr.fit(tracks, train, tgt_playlists)

# Similarity level ensemble

In [92]:
S_ensemble = rs.merge_similarities(tsr.S, ibr.S, alpha)

# Recommendetion with ratings level ensemble

In [None]:
_, IX_tgt_items, IX_tgt_playlists, _ = rs.create_sparse_indexes(playlists=tgt_playlists, tracks_reduced=tgt_tracks)

URM = rs.create_tgt_URM(IX_tgt_playlists, tsr.IX_items, train)
URM = URM.tocsr()

UBR_URM = rs.create_UBR_URM(ubr.IX_playlists, IX_tgt_items, train)
UBR_URM = UBR_URM.tocsr()

In [93]:
recommendetions = np.array([])
ubr.S = ubr.S.T.tocsr()
div_e = S_ensemble.sum(axis=0)
div_u = ubr.S.sum(aixs=0)
H_e=30
H_u=20
for p in IX_tgt_playlists.values:
    avg_sims_e = (URM[p,:].dot(S_ensemble).toarray().ravel())/(div_e+H_e)
    avg_sims_u = (ubr.S[p,:].dot(UBR_URM).toarray().ravel())/(div_u+H_u)
    
    avg_sims = np.array(avg_sims_e*beta + avg_sims_u*(1-beta)).ravel()
    top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, tsr.IX_tgt_items, False, False)
    recommendetions = np.append(recommendetions, rs.sub_format(top))
    if (p % 1000 == 0):
        print('Recommended ' + str(p) + ' users over ' + str(IX_tgt_playlists.values.shape[0]))

rec_ensemble =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})

100%|██████████| 4420/4420 [00:08<00:00, 505.73it/s]
  1%|          | 25/4420 [00:00<00:18, 242.07it/s]

URM built


100%|██████████| 4420/4420 [00:19<00:00, 227.56it/s]


# Evaluation

In [94]:
map_eval = rs.evaluate(rec_ensemble, test, 'MAP')

# Saving run data

In [95]:
run_data = {'recommender_1' : tsr.__class__.__name__,
            'recommender_2' : ibr.__class__.__name__,
            'recommender_3' : ubr.__class__.__name__,
            'fit_parameters_1' : fit_dict_tsr,
            'fit_parameters_2' : fit_dict_ibr,
            'fit_parameters_3' : fit_dict_ubr,
            'alpha_ensemble' : alpha,
            'beta_ensemble' : beta,
            'evaluation_result' : map_eval}

In [96]:
with open('runs_data.json', 'a') as fp:
    json.dump(run_data, fp, indent=2)
    fp.write('\n')

In [97]:
map_eval

0.08471568627450975