In [1]:
import pandas as pd
import numpy as np
import os.path
import recsys as rs
import notipy
import json
import math
import sys
import TopSimilarRecommender as TSR
import ItemBasedRecommender as IBR
import UserBasedRecommender as UBR
import random
from scipy import sparse as sps
import importlib

# Test split

In [2]:
data = pd.read_csv('Data/train_final.csv','\t')
tracks = pd.read_csv('Data/tracks_final.csv','\t')
pl_info = pd.read_csv('Data/playlists_final.csv','\t')
tgt_playlists = pd.read_csv('Data/target_playlists.csv','\t')
tgt_tracks = pd.read_csv('Data/target_tracks.csv','\t')

In [3]:
train, test, tgt_tracks, tgt_playlists = rs.train_test_split_interface(data, 10, 20, 5, 2517)

100%|██████████| 45649/45649 [05:24<00:00, 140.51it/s]


# Models fitting

### Parameters

In [4]:
fit_dict_tsr = {'attributes' : ['artist_id', 'album', 'playcount'],
                'n_min_attr' : 90,
                'idf' : True,
                'measure' : 'imp_cos',
                'shrinkage' : 0,
                'n_el_sim' : 65}

fit_dict_ibr = {'idf' : True,
                'measure' : 'imp_cos',
                'shrinkage' : 10,
                'n_el_sim' : 65}

In [5]:
ibr = IBR.ItemBasedRecommender(**fit_dict_ibr)
ibr.fit(tracks,train,tgt_tracks)

  0%|          | 35/45649 [00:00<02:11, 347.06it/s]

Calculated Indices


100%|██████████| 45649/45649 [05:10<00:00, 146.87it/s]


(45649, 100000)
Model URM built
Model URM regularized with IDF!


100%|██████████| 100000/100000 [1:07:29<00:00, 24.69it/s]

(100000, 30319)





In [6]:
tsr= TSR.TopSimilarRecommender(**fit_dict_tsr)
tsr.fit(tracks,tgt_tracks)

Fixed dataset
Calculated Indices


  0%|          | 22/100000 [00:00<07:46, 214.54it/s]

ICM built
ICM regularized with IDF!


100%|██████████| 100000/100000 [44:45<00:00, 37.24it/s]

Similarity built





# Recommending

In [7]:
rec_cbf = tsr.recommend(tgt_playlists, train, sim_check=False, secondary_sorting=False)

100%|██████████| 9129/9129 [00:34<00:00, 267.91it/s]
  0%|          | 0/9129 [00:00<?, ?it/s]

URM built


100%|██████████| 9129/9129 [30:17<00:00,  5.02it/s]


In [8]:
rec_ibr = ibr.recommend(tgt_playlists, train, sim_check=False, secondary_sorting=False)

100%|██████████| 9129/9129 [00:34<00:00, 262.37it/s]


(9129, 100000)
URM built


# Evaluation

In [9]:
map_eval_ibr = rs.evaluate(rec_ibr, test, 'MAP')

In [10]:
map_eval_cbf = rs.evaluate(rec_cbf, test, 'MAP')

In [11]:
print(map_eval_ibr)
print(map_eval_cbf)

0.065837075984957
0.06567093876656833


# Saving run data

In [12]:
run_data_cbf = {'recommender' : tsr.__class__.__name__,
            'fit_parameters_1' : fit_dict_tsr,
            'evaluation_bilevel' : map_eval_cbf}

run_data_ibr = {'recommender' : ibr.__class__.__name__,
            'fit_parameters_1' : fit_dict_ibr,
            'evaluation_bilevel' : map_eval_ibr}

In [13]:
with open('runs_data.json', 'a') as fp:
    json.dump(run_data_cbf, fp, indent=2)
    fp.write('\n')
    json.dump(run_data_ibr, fp, indent=2)
    fp.write('\n')

# Save similarities for reuse

In [14]:
sps.save_npz('BuiltStructures/cbf_imp_cos_Smatrix_Luca.npz', tsr.S)
sps.save_npz('BuiltStructures/ibr_imp_cos_Smatrix_Luca.npz', ibr.S)