# Imports

In [1]:
import pandas as pd
import numpy as np
import recsys as rs
import TopSimilarRecommender as TSR
import ItemBasedRecommender as IBR
import UserBasedRecommender as UBR
from scipy import sparse as sps

In [2]:
train = pd.read_csv('Data/train_final.csv','\t')
tracks = pd.read_csv('Data/tracks_final.csv','\t')
pl_info = pd.read_csv('Data/playlists_final.csv','\t')
tgt_playlists = pd.read_csv('Data/target_playlists.csv','\t')
tgt_tracks = pd.read_csv('Data/target_tracks.csv','\t')

# Content Based Recommender

In [3]:
tsr= TSR.TopSimilarRecommender(attributes=['artist_id', 'album'],idf=True, n_min_attr=90, n_el_sim=120)
tsr.fit(tracks,tgt_tracks,multiprocessing=True)
print('Model fitted!')

Fixed dataset
Calculated Indices
ICM built
ICM regularized with IDF!
Computed 0 similarities over chunk of 25000 elements.
Computed 0 similarities over chunk of 25000 elements.
Computed 0 similarities over chunk of 25000 elements.
Computed 0 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 20000 similari

# Item Based Recommender

In [4]:
ibr = IBR.ItemBasedRecommender(idf=True, shrinkage=10,n_el_sim=140)
ibr.fit(tracks,train,tgt_tracks, multiprocessing=True)
print('Model fitted!')

Calculated Indices
Model URM built
Model URM regularized with IDF!
Computed 0 similarities over chunk of 25000 elements.
Computed 0 similarities over chunk of 25000 elements.
Computed 0 similarities over chunk of 25000 elements.
Computed 0 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 5000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 10000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 15000 similarities over chunk of 25000 elements.
Computed 20000 similariti

# User Based Recommender

In [5]:
ubr = UBR.UserBasedRecommender(measure='imp_cos', shrinkage=10,n_el_sim =10)
ubr.fit(tracks,train,tgt_playlists,multiprocessing=True)
print('Model fitted!')

Calculated Indices
(45649, 100000)
Model URM built
Computed 0 similarities over chunk of 11412 elements.
Computed 0 similarities over chunk of 11412 elements.
Computed 0 similarities over chunk of 11412 elements.
Computed 0 similarities over chunk of 11413 elements.
Computed 5000 similarities over chunk of 11412 elements.
Computed 5000 similarities over chunk of 11412 elements.
Computed 5000 similarities over chunk of 11412 elements.
Computed 5000 similarities over chunk of 11413 elements.
Computed 10000 similarities over chunk of 11412 elements.
Computed 10000 similarities over chunk of 11412 elements.
Computed 10000 similarities over chunk of 11412 elements.
Computed 10000 similarities over chunk of 11413 elements.
Similarity built
Model fitted!


# URM Creation

In [6]:
_, _, IX_tgt_playlists, _ = rs.create_sparse_indexes(playlists=tgt_playlists)
_, _, IX_playlists, _ = rs.create_sparse_indexes(tracks_info=tracks, playlists=train, tracks_reduced=tracks)
URM_UBR =rs.create_UBR_URM(IX_playlists, tsr.IX_tgt_items, train)
URM = rs.create_tgt_URM(IX_tgt_playlists, tsr.IX_items, train)
print('URM built')

URM built


# Recommendation

In [7]:
tsr.S =tsr.S.tocsr()
ibr.S =ibr.S.tocsr()
ubr.S =ubr.S.T.tocsr()
URM = URM.tocsr()
URM_UBR= URM_UBR.tocsr()

In [8]:
div_t = tsr.S.sum(axis=0)
div_i = ibr.S.sum(axis=0)
H_t=60
H_i=10
recommendetions = np.array([])

In [9]:
for p in IX_tgt_playlists.values:
    avg_sims_t = URM[p,:].dot(tsr.S).toarray().ravel()
    avg_sims_t = np.array(avg_sims_t/(div_t+H_t+1e-6)).ravel()
    avg_sims_i = URM[p,:].dot(ibr.S).toarray().ravel()
    avg_sims_i = np.array(avg_sims_i/(div_i+H_i+1e-6)).ravel()
    avg_sims_u = ubr.S[p,:].dot(URM_UBR).toarray().ravel()
    avg_sims= np.array(avg_sims_t*0.4+avg_sims_i*0.5+avg_sims_u*0.1).ravel()
    top = rs.top5_outside_playlist(avg_sims, p, train, IX_tgt_playlists, tsr.IX_tgt_items, False, False)
    recommendetions = np.append(recommendetions, rs.sub_format(top))
    if (p % 1000 == 0):
        print('Recommended ' + str(p) + ' users over ' + str(IX_tgt_playlists.values.shape[0]))
rec_tsr =  pd.DataFrame({'playlist_id' : IX_tgt_playlists.index.values, 'track_ids' : recommendetions})

Recommended 0 users over 10000
Recommended 1000 users over 10000
Recommended 2000 users over 10000
Recommended 3000 users over 10000
Recommended 4000 users over 10000
Recommended 5000 users over 10000
Recommended 6000 users over 10000
Recommended 7000 users over 10000
Recommended 8000 users over 10000
Recommended 9000 users over 10000


# Save to csv

In [10]:
rec_tsr.to_csv('Submissions/submission_x.csv', index=False)
print('Results saved as csv!')

Results saved as csv!
