In [1]:
from SLIM_BPR.Cython.SLIM_BPR_Cython import SLIM_BPR_Cython
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
import random
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfTransformer
import utils
import CF_CBF_Ensemble as cf_cbf
 
BEST_ALFA = 0.9

In [2]:
class New_Splitted_Ensemble(object):
    
    def fit(self, URM_csr, URM_train, ICM_csr, slim_recommender, alfa):
        self.slim_recommender = slim_recommender
        transformer = TfidfTransformer()
        transformer.fit(URM_train)
        tf_idf_csr = transformer.transform(URM_csr)

        IRM = sparse.csr_matrix(tf_idf_csr.transpose())
        
        csr_similarities = sparse.csr_matrix(cosine_similarity(IRM, dense_output=False))
        

        transformer.fit(ICM_csr)
        tf_idf_icm = transformer.transform(ICM_csr)
        icm_similarities = sparse.csr_matrix(cosine_similarity(tf_idf_icm, dense_output=False))
        
        print("COMPUTING ENSEMBLE SIMILARITIES")
        self.item_similarities = alfa*csr_similarities + (1-alfa)*icm_similarities        
        self.URM_csr = URM_csr
        
    
    def recommend(self, user_id, at=10, remove_seen=True):
        
        user = self.URM_csr.getrow(user_id)
        itemPopularity = user.dot(self.item_similarities)
        slimPopularity = self.slim_recommender.compute_item_score(user_id)
        item_popularity = itemPopularity*self.beta + slimPopularity*(1-self.beta)
        popularItems = np.argsort(np.array(itemPopularity.todense())[0])
        popularItems = np.flip(popularItems, axis = 0)

        if remove_seen:
            unseen_items_mask = np.in1d(popularItems, self.URM_csr[user_id].indices,
                                        assume_unique=True, invert = True)

            unseen_items = popularItems[unseen_items_mask]
            
            recommended_items = unseen_items[0:at]

        else:
            recommended_items = popularItems[0:at]
            
        #recommended_items = " ".join(str(i) for i in recommended_items)
        return recommended_items

In [4]:
tracks = pd.read_csv('../input/tracks.csv')
train = pd.read_csv('../input/train.csv')
target = pd.read_csv('../input/target_playlists.csv')
sequential = pd.read_csv('../input/train_sequential.csv')

In [14]:
not_sequential = utils.build_urm_csr(train)
sequential = utils.build_urm_csr(sequential)

training_set_ns, test_set_ns = utils.split(not_sequential)
training_set_s, test_set_s = utils.split(sequential)

icm_csr = utils.build_icm_csr(tracks)

In [21]:
sequential.getnnz(axis = 1).nonzero()

(array([    7,    25,    29, ..., 50420, 50428, 50431], dtype=int64),)

In [6]:
test_set_playlists = np.unique(test_set_ns.nonzero()[0])

In [7]:
occurrencies_ns = training_set_ns.getnnz(axis = 1)

In [8]:
mask1_ns = np.where(occurrencies_ns < 15)
mask2_ns = np.where((occurrencies_ns >= 15) & (occurrencies_ns < 30))
mask3_ns = np.where(occurrencies_ns >= 30)


In [9]:
test_set_1_ns = np.intersect1d(mask1_ns, test_set_playlists)
test_set_2_ns = np.intersect1d(mask2_ns, test_set_playlists)
test_set_3_ns = np.intersect1d(mask3_ns, test_set_playlists)


In [10]:
below_train_ns = sparse.csr_matrix(not_sequential.shape, dtype = np.float32)
middle_train_ns = sparse.csr_matrix(not_sequential.shape, dtype = np.float32)
above_train_ns = sparse.csr_matrix(not_sequential.shape, dtype = np.float32)



In [11]:
below_train_ns = training_set_ns[mask1_ns]
middle_train_ns = training_set_ns[mask2_ns]
above_train_ns = training_set_ns[mask3_ns]


In [12]:
ensemble1 = New_Splitted_Ensemble()
ensemble2 = New_Splitted_Ensemble()
ensemble3 = New_Splitted_Ensemble()

sequential_recommender = Sequential_Recommender()


SLIM_BPR_Cython: Estimated memory required for similarity matrix of 20635 items is 1703.21 MB


In [13]:
print("FITTING SLIM...")
slim1.fit(epochs=150, batch_size=5,sgd_mode='adam',learning_rate=1e-4,topK=80)


FITTING SLIM...


ModuleNotFoundError: No module named 'SLIM_BPR.Cython.SLIM_BPR_Cython_Epoch'

In [None]:
betas = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
maps1 = []
maps2 = []
maps3 = []
for beta in betas:
    print("FITTING WITH ALPHA = " + str(beta))
    ensemble1.fit(training_set_ns, below_train, icm_csr, slim1, alfa = BEST_ALFA)
    ensemble2.fit(training_set_ns, middle_train, icm_csr, slim1, alfa = BEST_ALFA)
    ensemble3.fit(training_set_ns, above_train, icm_csr, slim1, alfa = BEST_ALFA)
    print("EVALUATING FIRST ALGORITHM")
    maps1.append(utils.evaluate_algorithm(test_set_ns, ensemble1, test_set_1))
    print("EVALUATING SECOND ALGORITHM")
    maps2.append(utils.evaluate_algorithm(test_set_ns, ensemble2, test_set_2))
    print("EVALUATING THIRD ALGORITHM")
    maps3.append(utils.evaluate_algorithm(test_set_ns, ensemble3, test_set_3))