In [2]:
import pandas as pd
import numpy as np
import os.path
import recsys as rs
import notipy
import math
import sys
import TopSimilarRecommender as TSR
import ItemBasedRecommender as IBR
import random
from scipy import sparse as sps
import importlib

In [3]:
train = pd.read_csv('Data/train_final.csv','\t')
tracks = pd.read_csv('Data/tracks_final.csv','\t')
pl_info = pd.read_csv('Data/playlists_final.csv','\t')
tgt_playlists = pd.read_csv('Data/target_playlists.csv','\t')
tgt_tracks = pd.read_csv('Data/target_tracks.csv','\t')

In [4]:
train, test, tgt_tracks, tgt_playlists = rs.split_train_test(train, 10, 20, 5, 2517)
tgt_playlists.shape

(4420, 1)

In [5]:
ibr = IBR.ItemBasedRecommender(idf=True, shrinkage=10,n_el_sim=65)
ibr.fit(tracks,train,tgt_tracks, saved_similarity='BuiltStructures/ibr_sim_65el_h10_idfTrue.npz')
print('Model fitted!')

  2%|▏         | 84/4420 [00:00<00:05, 838.69it/s]

Calculated Indices


100%|██████████| 4420/4420 [00:08<00:00, 524.29it/s]


(4420, 100000)
Model URM built
Model URM regularized with IDF!
Model fitted!


In [7]:
tsr= TSR.TopSimilarRecommender(attributes=['artist_id', 'album', 'playcount'],idf=True, n_min_attr=90, n_el_sim=65)
tsr.fit(tracks,tgt_tracks, saved_similarity='BuiltStructures/tsr_sim_65el_idfTrue_artist_album_playcount.npz')
print('Model fitted!')

Fixed dataset
Calculated Indices
ICM built
ICM regularized with IDF!
Model fitted!


# Recommendation

In [None]:
_, _, IX_tgt_playlists, _ = rs.create_sparse_indexes(playlists=tgt_playlists)
URM = rs.create_tgt_URM(IX_tgt_playlists, tsr.IX_items, train)
URM = URM.tocsr()
print('URM built')

In [6]:
rs = importlib.reload(rs)
recommendetions = np.array([])
div_t = tsr.S.sum(axis=0)
div_i = ibr.S.sum(axis=0)
H=20
for p in IX_tgt_playlists.values:
    avg_sims_t = URM[p,:].dot(tsr.S).toarray().ravel()
    avg_sims_t = avg_sims_t/(div_t+H)
    avg_sims_i = URM[p,:].dot(ibr.S).toarray().ravel()
    avg_sims_i = avg_sims_i/(div_i+H)
    avg_sims = np.array(avg_sims_t*0.75 + avg_sims_i*0.25).ravel()
    top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, tsr.IX_tgt_items, False, False)
    recommendetions = np.append(recommendetions, rs.sub_format(top))
    if (p % 1000 == 0):
        print('Recommended ' + str(p) + ' users over ' + str(IX_tgt_playlists.values.shape[0]))

rec_tsr =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})

Recommended 0 users over 10000
Recommended 1000 users over 10000
Recommended 2000 users over 10000
Recommended 3000 users over 10000
Recommended 4000 users over 10000
Recommended 5000 users over 10000
Recommended 6000 users over 10000
Recommended 7000 users over 10000
Recommended 8000 users over 10000
Recommended 9000 users over 10000


# Evaluation

In [44]:
def evaluate(results, test, eval_metric='MAP'):
    if eval_metric == 'MAP':
        APs = results.apply(calculate_AP, axis=1, args=(test,))
        res = (APs.sum())/results.shape[0]
    return res

def calculate_AP(row, test):
    p_id = row['playlist_id']
    recs = np.fromstring(row['track_ids'], dtype=float, sep=' ')

    AP = 0
    rel_sum = 0
    n_rel_items = min(test[test['playlist_id'] == p_id].shape[0],5)
    for i in range(recs.size):
        rel = 1 if ((test['playlist_id'] == int(p_id)) & (test['track_id'] == recs[i])).any() else 0
        rel_sum += rel
        P = rel_sum/(i+1)
        AP += (P * rel)/n_rel_items

    return AP

In [45]:
map_eval = evaluate(rec_tsr, test, 'MAP')
print('Evaluation completed!')

Evaluation completed!


In [46]:
map_eval

0.10397209653092009

# Save to csv

In [10]:
rec_tsr.to_csv('Submissions/ensemble_submission_notags_alpha075_nel65,shr=20_final_ensemble' + str(0) + '.csv', index=False)
print('Results saved as csv!')

Results saved as csv!
