In [1]:
import pandas as pd
import numpy as np
import os.path
import recsys as rs
import notipy
import math
import sys
import TopSimilarRecommender as TSR
import ItemBasedRecommender as IBR
import UserBasedRecommender as UBR
import random
from scipy import sparse as sps
import importlib

In [2]:
train = pd.read_csv('Data/train_final.csv','\t')
tracks = pd.read_csv('Data/tracks_final.csv','\t')
pl_info = pd.read_csv('Data/playlists_final.csv','\t')
tgt_playlists = pd.read_csv('Data/target_playlists.csv','\t')
tgt_tracks = pd.read_csv('Data/target_tracks.csv','\t')

In [3]:
#train, test, tgt_tracks, tgt_playlists = rs.split_train_test(train, 10, 20, 5, 2517)
#tgt_playlists.shape

In [3]:
ibr = IBR.ItemBasedRecommender(idf=True, shrinkage=10,n_el_sim=20)
ibr.fit(tracks,train,tgt_tracks,saved_similarity='BuiltStructures/ibr_sim_65el_h10_idfTrue.npz')
print('Model fitted!')

  0%|          | 19/45649 [00:00<04:09, 183.11it/s]

Calculated Indices


100%|██████████| 45649/45649 [03:42<00:00, 205.50it/s]


(45649, 100000)
Model URM built
Model URM regularized with IDF!
Model fitted!


In [10]:
tsr= TSR.TopSimilarRecommender(attributes=['artist_id', 'album', 'playcount'],idf=True, n_min_attr=90, n_el_sim=20)
tsr.fit(tracks,tgt_tracks,saved_similarity='BuiltStructures/tsr_sim_65el_idfTrue_artist_album_playcount.npz')
print('Model fitted!')

Fixed dataset
Calculated Indices
ICM built
ICM regularized with IDF!
Model fitted!


In [None]:
ubr = UBR.UserBasedRecommender(measure='imp_cos', shrinkage=20,n_el_sim =65)
ubr.fit(tracks,train,tgt_playlists,saved_similarity='BuiltStructures/ubr_sim_65el_impcos.npz')
print('Model fitted!')

  0%|          | 20/45649 [00:00<03:48, 199.35it/s]

Calculated Indices


 38%|███▊      | 17266/45649 [01:19<02:10, 217.82it/s]

In [None]:
similarity_matrix = rs. merge_similarities(tsr.S, ibr.S, 0.6)

# Recommendation

In [None]:
_, _, IX_tgt_playlists, _ = rs.create_sparse_indexes(playlists=tgt_playlists)
_, _, IX_playlists, _ = rs.create_sparse_indexes(tracks_info=tracks, playlists=train, tracks_reduced=tracks)
URM = rs.create_tgt_URM(IX_tgt_playlists, tsr.IX_items, train)
URM = URM.tocsr()
URM_UBR =rs.create_UBR_URM(IX_playlists, tsr.IX_tgt_items, train)
print('URM built')

In [None]:
URM_UBR
ubr.S =ubr.S.T.tocsr()

In [None]:
rs = importlib.reload(rs)
URM_UBR = URM_UBR.tocsr()
recommendetions = np.array([])
div = similarity_matrix.sum(axis=0)
for p in IX_tgt_playlists.values:
    avg_sims = URM[p,:].dot(similarity_matrix).toarray().ravel()
    avg_sims =np.squeeze(np.asarray(avg_sims))
    avg_sims_u = ubr.S[p,:].dot(URM_UBR).toarray().ravel()
    avg_sims = np.array(avg_sims*0.5 + avg_sims_u*0.5).ravel()
    top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, tsr.IX_tgt_items, False, False)
    recommendetions = np.append(recommendetions, rs.sub_format(top))
    if (p % 1000 == 0):
        print('Recommended ' + str(p) + ' users over ' + str(IX_tgt_playlists.values.shape[0]))

rec_df =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})

# Evaluation

In [14]:
def evaluate(results, test, eval_metric='MAP'):
    if eval_metric == 'MAP':
        APs = results.apply(calculate_AP, axis=1, args=(test,))
        res = (APs.sum())/results.shape[0]
    return res

def calculate_AP(row, test):
    p_id = row['playlist_id']
    recs = np.fromstring(row['track_ids'], dtype=float, sep=' ')

    AP = 0
    rel_sum = 0
    n_rel_items = min(test[test['playlist_id'] == p_id].shape[0],5)
    for i in range(recs.size):
        rel = 1 if ((test['playlist_id'] == int(p_id)) & (test['track_id'] == recs[i])).any() else 0
        rel_sum += rel
        P = rel_sum/(i+1)
        AP += (P * rel)/n_rel_items

    return AP

In [15]:
map_eval = evaluate(rec_df, test, 'MAP')
print('Evaluation completed!')

Evaluation completed!


In [None]:
map_eval

# Save to csv

In [None]:
rec_df.to_csv('Submissions/ensemble_submission_notags_alpha_0605_mixed_ensemble_noDivision.csv', index=False)
print('Results saved as csv!')