In [29]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
import random
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfTransformer
import utils
from Notebooks_utils import Compute_Similarity_Python as sim

In [2]:
tracks = pd.read_csv('../input/tracks.csv')
train = pd.read_csv('../input/train.csv')
target = pd.read_csv('../input/target_playlists.csv')

In [10]:
urm_csr = utils.build_urm_csr(train)

In [34]:
class CollaborativeItemBasedRecommender(object):
    
    def fit(self, URM_csr, block_size = 100, **args):

        transformer = TfidfTransformer()
        transformer.fit(URM_csr)
        tf_idf_csr = transformer.transform(URM_csr)

        IRM = sparse.csr_matrix(tf_idf_csr.transpose())
        
        similarity_object = sim.Compute_Similarity_Python(IRM, **args)
        
        self.item_similarities = similarity_object.compute_similarity(block_size = block_size)
        self.URM_csr = URM_csr
        
    
    def recommend(self, user_id, at=10, remove_seen=True):
        
        user = self.URM_csr.getrow(user_id)
        itemPopularity = user.dot(self.item_similarities)
        popularItems = np.argsort(np.array(itemPopularity.todense())[0])
        popularItems = np.flip(popularItems, axis = 0)

        if remove_seen:
            unseen_items_mask = np.in1d(popularItems, self.URM_csr[user_id].indices,
                                        assume_unique=True, invert = True)

            unseen_items = popularItems[unseen_items_mask]
            
            recommended_items = unseen_items[0:at]

        else:
            recommended_items = popularItems[0:at]
        
        return recommended_items

In [37]:
rec = CollaborativeItemBasedRecommender()
rec.fit(urm_csr, similarity = "jaccard", topK= 50, tversky_alpha = 1.5, shrink = 50)

Similarity column 9700 ( 19 % ), 321.17 column/sec, elapsed time 0.50 min
Similarity column 19700 ( 39 % ), 326.21 column/sec, elapsed time 1.01 min
Similarity column 29600 ( 59 % ), 326.92 column/sec, elapsed time 1.51 min
Similarity column 39500 ( 78 % ), 327.19 column/sec, elapsed time 2.01 min
Similarity column 49400 ( 98 % ), 327.25 column/sec, elapsed time 2.52 min
Similarity column 50400 ( 100 % ), 327.36 column/sec, elapsed time 2.57 min
50
