# Recommender Systems 2017/18

### Practice 4 - Similarity with Cython


### Cython is a superset of Python, allowing you to use C-like operations and import C code. Cython files (.pyx) are compiled and support static typing.

In [2]:
import time
import numpy as np

### Let's implement something simple

In [3]:
def isPrime(n):
    
    i = 2
    
    # Usually you loop up to sqrt(n)
    while i < n:
        if n % i == 0:
            return False
        
        i += 1
        
    return True

In [4]:
print("Is prime 2? {}".format(isPrime(2)))
print("Is prime 3? {}".format(isPrime(3)))
print("Is prime 5? {}".format(isPrime(5)))
print("Is prime 15? {}".format(isPrime(15)))
print("Is prime 20? {}".format(isPrime(20)))

Is prime 2? True
Is prime 3? True
Is prime 5? True
Is prime 15? False
Is prime 20? False


In [5]:
start_time = time.time()

result = isPrime(50000017)

print("Is Prime 50000017? {}, time required {:.2f} sec".format(result, time.time()-start_time))

Is Prime 50000017? True, time required 4.71 sec


#### Load Cython magic command, this takes care of the compilation step. If you are writing code outside Jupyter you'll have to compile using other tools

In [6]:
%load_ext Cython

#### Declare Cython function, paste the same code as before. The function will be compiled and then executed with a Python interface

In [7]:
%%cython
def isPrime(n):
    
    i = 2
    
    # Usually you loop up to sqrt(n)
    while i < n:
        if n % i == 0:
            return False
        
        i += 1
        
    return True

In [8]:
start_time = time.time()

result = isPrime(50000017)

print("Is Prime 50000017? {}, time required {:.2f} sec".format(result, time.time()-start_time))

Is Prime 50000017? True, time required 2.45 sec


#### As you can see by just compiling the same code we got some improvement.
#### To go seriously higher, we have to use some static tiping

In [9]:
%%cython
# Declare the tipe of the arguments
def isPrime(long n):
    
    # Declare index of for loop
    cdef long i
    
    i = 2
    
    # Usually you loop up to sqrt(n)
    while i < n:
        if n % i == 0:
            return False
        
        i += 1
        
    return True

In [10]:
start_time = time.time()

result = isPrime(50000017)

print("Is Prime 50000017? {}, time required {:.2f} sec".format(result, time.time()-start_time))

Is Prime 50000017? True, time required 0.43 sec


#### Cython code with two tipe declaration, for n and i, runs 50x faster than Python

#### Main benefits of Cython:
* Compiled, no interpreter
* Static typing, no overhead
* Fast loops, no need to vectorize. Vectorization sometimes performes lots of useless operations
* Numpy, which is fast in python, becomes often slooooow compared to a carefully written Cython code

### Similarity with Cython

#### Load the usual data. I created a reader to encapsulate the data-specific format

In [11]:
from Movielens10MReader import Movielens10MReader

dataReader = Movielens10MReader()

URM_train = dataReader.get_URM_train()
URM_test = dataReader.get_URM_test()


Movielens10MReader: loading data...
Processed 1000000 cells
Processed 2000000 cells
Processed 3000000 cells
Processed 4000000 cells
Processed 5000000 cells
Processed 6000000 cells
Processed 7000000 cells
Processed 8000000 cells
Processed 1000000 cells


In [12]:
URM_train

<71568x65134 sparse matrix of type '<class 'numpy.float32'>'
	with 8000085 stored elements in Compressed Sparse Row format>

#### Since we cannot store in memory the whole similarity, we compute it one row at a time

In [13]:
itemIndex=1
item_ratings = URM_train[:,itemIndex]
item_ratings = item_ratings.toarray().squeeze()

item_ratings.shape

(71568,)

In [14]:
this_item_weights = URM_train.T.dot(item_ratings)
this_item_weights.shape

(65134,)

#### Once we have the scores for that row, we get the TopK

In [15]:
k=10

top_k_idx = np.argsort(this_item_weights) [-k:]
top_k_idx

array([1196,  110,  480, 1210,  593,  318,  356,  296,  260,    1])

In [16]:
import scipy.sparse as sps

In [17]:
# Function hiding some conversion checks
def check_matrix(X, format='csc', dtype=np.float32):
    if format == 'csc' and not isinstance(X, sps.csc_matrix):
        return X.tocsc().astype(dtype)
    elif format == 'csr' and not isinstance(X, sps.csr_matrix):
        return X.tocsr().astype(dtype)
    elif format == 'coo' and not isinstance(X, sps.coo_matrix):
        return X.tocoo().astype(dtype)
    elif format == 'dok' and not isinstance(X, sps.dok_matrix):
        return X.todok().astype(dtype)
    elif format == 'bsr' and not isinstance(X, sps.bsr_matrix):
        return X.tobsr().astype(dtype)
    elif format == 'dia' and not isinstance(X, sps.dia_matrix):
        return X.todia().astype(dtype)
    elif format == 'lil' and not isinstance(X, sps.lil_matrix):
        return X.tolil().astype(dtype)
    else:
        return X.astype(dtype)

#### Create a Basic Collaborative filtering recommender using only cosine similarity

In [18]:
class BasicItemKNN_CF_Recommender(object):
    """ ItemKNN recommender with cosine similarity and no shrinkage"""

    def __init__(self, URM, k=50, shrinkage=100):
        self.dataset = URM
        self.k = k
        self.shrinkage = shrinkage
        
    def __str__(self):
        return "ItemKNN(similarity={},k={},shrinkage={})".format(
            'cosine', self.k, self.shrinkage)
    
    def compute_similarity(self, URM):
        
        # We explore the matrix column-wise
        URM = check_matrix(URM, 'csc')     
                    
        values = []
        rows = []
        cols = []
        
        start_time = time.time()
        processedItems = 0
        
        # Compute all similarities for each item using vectorization
        for itemIndex in range(URM.shape[0]):
            
            processedItems += 1
            
            if processedItems % 100==0:
                
                itemPerSec = processedItems/(time.time()-start_time)
                
                print("Similarity item {}, {:.2f} item/sec, required time {:.2f} min".format(
                    processedItems, itemPerSec, URM.shape[0]/itemPerSec/60))
            
            # All ratings for a given item
            item_ratings = URM[:,itemIndex]
            item_ratings = item_ratings.toarray().squeeze()
            
            # Compute item similarities
            this_item_weights = URM_train.T.dot(item_ratings)
            
            # Sort indices and select TopK
            top_k_idx = np.argsort(this_item_weights) [-self.k:]
            
            # Incrementally build sparse matrix
            values.extend(this_item_weights[top_k_idx])
            rows.extend(np.arange(URM.shape[0])[top_k_idx])
            cols.extend(np.ones(self.k) * itemIndex)
            
        self.W_sparse = sps.csc_matrix((values, (rows, cols)),
                                       shape=(URM.shape[0], URM.shape[0]),
                                       dtype=np.float32)

        

    def fit(self):
        item_weights = self.compute_similarity(self.dataset)
        
        item_weights = check_matrix(item_weights, 'csr')
        
        
    def recommend(self, user_id, at=None, exclude_seen=True):
        
        # compute the scores using the dot product
        user_profile = self.dataset[user_id]
        scores = user_profile.dot(self.W_sparse).toarray().ravel()

        # rank items
        ranking = scores.argsort()[::-1]
        if exclude_seen:
            ranking = self._filter_seen(user_id, ranking)
            
        return ranking[:at]
    
    def _filter_seen(self, user_id, ranking):
        user_profile = self.dataset[user_id]
        seen = user_profile.indices
        unseen_mask = np.in1d(ranking, seen, assume_unique=True, invert=True)
        return ranking[unseen_mask]

#### Let's isolate the compute_similarity function 

In [19]:
def compute_similarity(URM, k=100):

    # We explore the matrix column-wise
    URM = check_matrix(URM, 'csc')
    
    n_items = URM.shape[0]

    values = []
    rows = []
    cols = []

    start_time = time.time()
    processedItems = 0

    # Compute all similarities for each item using vectorization
    # for itemIndex in range(n_items):
    for itemIndex in range(1000):

        processedItems += 1

        if processedItems % 100==0:

            itemPerSec = processedItems/(time.time()-start_time)

            print("Similarity item {}, {:.2f} item/sec, required time {:.2f} min".format(
                processedItems, itemPerSec, n_items/itemPerSec/60))

        # All ratings for a given item
        item_ratings = URM[:,itemIndex]
        item_ratings = item_ratings.toarray().squeeze()

        # Compute item similarities
        this_item_weights = URM.T.dot(item_ratings)

        # Sort indices and select TopK
        top_k_idx = np.argsort(this_item_weights) [-k:]

        # Incrementally build sparse matrix
        values.extend(this_item_weights[top_k_idx])
        rows.extend(np.arange(URM.shape[0])[top_k_idx])
        cols.extend(np.ones(k) * itemIndex)

    W_sparse = sps.csc_matrix((values, (rows, cols)),
                            shape=(n_items, n_items),
                            dtype=np.float32)

    return W_sparse
        

In [20]:
compute_similarity(URM_train)

Similarity item 100, 78.08 item/sec, required time 15.28 min
Similarity item 200, 77.20 item/sec, required time 15.45 min
Similarity item 300, 76.19 item/sec, required time 15.66 min
Similarity item 400, 77.18 item/sec, required time 15.45 min
Similarity item 500, 77.51 item/sec, required time 15.39 min
Similarity item 600, 77.61 item/sec, required time 15.37 min
Similarity item 700, 77.83 item/sec, required time 15.33 min
Similarity item 800, 77.70 item/sec, required time 15.35 min
Similarity item 900, 77.75 item/sec, required time 15.34 min
Similarity item 1000, 77.48 item/sec, required time 15.39 min


<71568x71568 sparse matrix of type '<class 'numpy.float32'>'
	with 100000 stored elements in Compressed Sparse Column format>

### We see that computing the similarity takes more or less 15 minutes
### Now we use the same identical code, but we compile it

In [21]:
%%cython
import time
import numpy as np
import scipy.sparse as sps

def compute_similarity_compiled(URM, k=100):

    # We explore the matrix column-wise
    URM = URM.tocsc()
    
    n_items = URM.shape[0]

    values = []
    rows = []
    cols = []

    start_time = time.time()
    processedItems = 0

    # Compute all similarities for each item using vectorization
    # for itemIndex in range(n_items):
    for itemIndex in range(1000):

        processedItems += 1

        if processedItems % 100==0:

            itemPerSec = processedItems/(time.time()-start_time)

            print("Similarity item {}, {:.2f} item/sec, required time {:.2f} min".format(
                processedItems, itemPerSec, n_items/itemPerSec/60))

        # All ratings for a given item
        item_ratings = URM[:,itemIndex]
        item_ratings = item_ratings.toarray().squeeze()

        # Compute item similarities
        this_item_weights = URM.T.dot(item_ratings)

        # Sort indices and select TopK
        top_k_idx = np.argsort(this_item_weights) [-k:]

        # Incrementally build sparse matrix
        values.extend(this_item_weights[top_k_idx])
        rows.extend(np.arange(URM.shape[0])[top_k_idx])
        cols.extend(np.ones(k) * itemIndex)

    W_sparse = sps.csc_matrix((values, (rows, cols)),
                            shape=(n_items, n_items),
                            dtype=np.float32)

    return W_sparse
        

In [22]:
compute_similarity_compiled(URM_train)

Similarity item 100, 79.84 item/sec, required time 14.94 min
Similarity item 200, 80.12 item/sec, required time 14.89 min
Similarity item 300, 79.39 item/sec, required time 15.03 min
Similarity item 400, 79.29 item/sec, required time 15.04 min
Similarity item 500, 79.43 item/sec, required time 15.02 min
Similarity item 600, 79.60 item/sec, required time 14.99 min
Similarity item 700, 79.87 item/sec, required time 14.93 min
Similarity item 800, 80.11 item/sec, required time 14.89 min
Similarity item 900, 80.17 item/sec, required time 14.88 min
Similarity item 1000, 80.14 item/sec, required time 14.88 min


<71568x71568 sparse matrix of type '<class 'numpy.float32'>'
	with 100000 stored elements in Compressed Sparse Column format>

#### As opposed to the previous example, compilation by itself is not very helpful. Why?
#### Because the compiler is just porting in C all operations that the python interpreter would have to perform, dynamic tiping included

### Now try to add some tipes

In [23]:
%%cython
import time
import numpy as np
import scipy.sparse as sps

cimport numpy as np

def compute_similarity_compiled(URM, int k=100):
    
    cdef int itemIndex, processedItems
    
    # We use the numpy syntax, allowing us to perform vectorized operations
    cdef np.ndarray[float, ndim=1] item_ratings, this_item_weights
    cdef np.ndarray[long, ndim=1] top_k_idx

    # We explore the matrix column-wise
    URM = URM.tocsc()
    
    n_items = URM.shape[0]

    values = []
    rows = []
    cols = []

    start_time = time.time()
    processedItems = 0

    # Compute all similarities for each item using vectorization
    # for itemIndex in range(n_items):
    for itemIndex in range(1000):

        processedItems += 1

        if processedItems % 100==0:

            itemPerSec = processedItems/(time.time()-start_time)

            print("Similarity item {}, {:.2f} item/sec, required time {:.2f} min".format(
                processedItems, itemPerSec, n_items/itemPerSec/60))

        # All ratings for a given item
        item_ratings = URM[:,itemIndex].toarray().squeeze()

        # Compute item similarities
        this_item_weights = URM.T.dot(item_ratings)

        # Sort indices and select TopK
        top_k_idx = np.argsort(this_item_weights) [-k:]

        # Incrementally build sparse matrix
        values.extend(this_item_weights[top_k_idx])
        rows.extend(np.arange(URM.shape[0])[top_k_idx])
        cols.extend(np.ones(k) * itemIndex)

    W_sparse = sps.csc_matrix((values, (rows, cols)),
                            shape=(n_items, n_items),
                            dtype=np.float32)

    return W_sparse

In [24]:
compute_similarity_compiled(URM_train)

Similarity item 100, 82.45 item/sec, required time 14.47 min
Similarity item 200, 81.82 item/sec, required time 14.58 min
Similarity item 300, 81.51 item/sec, required time 14.63 min
Similarity item 400, 81.14 item/sec, required time 14.70 min
Similarity item 500, 81.16 item/sec, required time 14.70 min
Similarity item 600, 81.28 item/sec, required time 14.68 min
Similarity item 700, 80.79 item/sec, required time 14.76 min
Similarity item 800, 79.44 item/sec, required time 15.02 min
Similarity item 900, 78.56 item/sec, required time 15.18 min
Similarity item 1000, 78.69 item/sec, required time 15.16 min


<71568x71568 sparse matrix of type '<class 'numpy.float32'>'
	with 100000 stored elements in Compressed Sparse Column format>

### Still no luck! Why?
### There are a few reasons:
* We are getting the data from the sparse matrix using its interface, which is SLOW
* We are transforming sparse data into a dense array, which is SLOW
* We are performing a dot product against a dense vector

#### You colud find a workaround... here we do something different

### Proposed solution
### Change the algorithm!

### Instead of performing the dot product, let's implement somenting that computes the similarity using directly sparse data

### We loop through the data and update selectively the similarity matrix cells. 
### Underlying idea:
* When I select an item I can know which users rated it
* Instead of looping through the other items trying to find common users, I use the URM to find which other items that user rated
* The user I am considering will be common between the two, so I increment the similarity of the two items
* Instead of following the path item1 -> loop item2 -> find user, i go item1 -> loop user -> loop item2

In [31]:
data_matrix = np.array([[1,1,0,1],[0,1,1,1],[1,0,1,0]])
data_matrix = sps.csc_matrix(data_matrix)
data_matrix.todense()

matrix([[1, 1, 0, 1],
        [0, 1, 1, 1],
        [1, 0, 1, 0]], dtype=int64)

### Example: Compute the similarities for item 1

#### Step 1: get users that rated item 1

In [32]:
users_rated_item = data_matrix[:,1]
users_rated_item.indices

array([0, 1], dtype=int32)

#### Step 2: count how many times those users rated other items

In [42]:
item_similarity = data_matrix[users_rated_item.indices].sum(axis = 0)
np.array(item_similarity).squeeze()

array([1, 2, 1, 2])

#### Verify our result against the common method. We can see that the similarity values for col 1 are identical

In [38]:
similarity_matrix_product = data_matrix.T.dot(data_matrix)
similarity_matrix_product.toarray()

array([[2, 1, 1, 1],
       [1, 2, 1, 2],
       [1, 1, 2, 1],
       [1, 2, 1, 2]], dtype=int64)

In [45]:
# The following code works for implicit feedback only
def compute_similarity_new_algorithm(URM, k=100):

    # We explore the matrix column-wise
    URM = check_matrix(URM, 'csc')
    URM.data = np.ones_like(URM.data)
    
    n_items = URM.shape[0]

    values = []
    rows = []
    cols = []

    start_time = time.time()
    processedItems = 0

    # Compute all similarities for each item using vectorization
    # for itemIndex in range(n_items):
    for itemIndex in range(1000):

        processedItems += 1

        if processedItems % 100==0:

            itemPerSec = processedItems/(time.time()-start_time)

            print("Similarity item {}, {:.2f} item/sec, required time {:.2f} min".format(
                processedItems, itemPerSec, n_items/itemPerSec/60))

        # All ratings for a given item
        users_rated_item = URM.indices[URM.indptr[itemIndex]:URM.indptr[itemIndex+1]]

        # Compute item similarities
        this_item_weights = URM[users_rated_item].sum(axis = 0)
        this_item_weights = np.array(this_item_weights).squeeze()

        # Sort indices and select TopK
        top_k_idx = np.argsort(this_item_weights) [-k:]

        # Incrementally build sparse matrix
        values.extend(this_item_weights[top_k_idx])
        rows.extend(np.arange(URM.shape[0])[top_k_idx])
        cols.extend(np.ones(k) * itemIndex)

    W_sparse = sps.csc_matrix((values, (rows, cols)),
                            shape=(n_items, n_items),
                            dtype=np.float32)

    return W_sparse
        

In [46]:
compute_similarity_new_algorithm(URM_train)

Similarity item 100, 22.65 item/sec, required time 52.67 min
Similarity item 200, 23.71 item/sec, required time 50.32 min
Similarity item 300, 24.07 item/sec, required time 49.55 min
Similarity item 400, 23.50 item/sec, required time 50.75 min
Similarity item 500, 23.90 item/sec, required time 49.90 min
Similarity item 600, 23.43 item/sec, required time 50.90 min
Similarity item 700, 24.32 item/sec, required time 49.04 min
Similarity item 800, 24.73 item/sec, required time 48.24 min
Similarity item 900, 25.42 item/sec, required time 46.92 min
Similarity item 1000, 25.53 item/sec, required time 46.73 min


<71568x71568 sparse matrix of type '<class 'numpy.float32'>'
	with 100000 stored elements in Compressed Sparse Column format>

#### Slower but expected, dot product operations are implemented in an efficient way and here we are using an indirect approach

### Now let's write this algorithm in Cython

In [47]:
%%cython

import time

import numpy as np
cimport numpy as np
from cpython.array cimport array, clone

import scipy.sparse as sps


cdef class Cosine_Similarity:

    cdef int TopK
    cdef long n_items

    # Arrays containing the sparse data
    cdef int[:] user_to_item_row_ptr, user_to_item_cols
    cdef int[:] item_to_user_rows, item_to_user_col_ptr
    cdef double[:] user_to_item_data, item_to_user_data

    # In case you select no TopK
    cdef double[:,:] W_dense

    
    def __init__(self, URM, TopK = 100):
        """
        Dataset must be a matrix with items as columns
        :param dataset:
        :param TopK:
        """

        super(Cosine_Similarity, self).__init__()

        self.n_items = URM.shape[1]

        self.TopK = min(TopK, self.n_items)

        URM = URM.tocsr()
        self.user_to_item_row_ptr = URM.indptr
        self.user_to_item_cols = URM.indices
        self.user_to_item_data = np.array(URM.data, dtype=np.float64)

        URM = URM.tocsc()
        self.item_to_user_rows = URM.indices
        self.item_to_user_col_ptr = URM.indptr
        self.item_to_user_data = np.array(URM.data, dtype=np.float64)

        if self.TopK == 0:
            self.W_dense = np.zeros((self.n_items,self.n_items))



    cdef int[:] getUsersThatRatedItem(self, long item_id):
        return self.item_to_user_rows[self.item_to_user_col_ptr[item_id]:self.item_to_user_col_ptr[item_id+1]]

    cdef int[:] getItemsRatedByUser(self, long user_id):
        return self.user_to_item_cols[self.user_to_item_row_ptr[user_id]:self.user_to_item_row_ptr[user_id+1]]

    
    
    cdef double[:] computeItemSimilarities(self, long item_id_input):
        """
        For every item the cosine similarity against other items depends on whether they have users in common. 
        The more common users the higher the similarity.
        
        The basic implementation is:
        - Select the first item
        - Loop through all other items
        -- Given the two items, get the users they have in common
        -- Update the similarity considering all common users
        
        That is VERY slow due to the common user part, in which a long data structure is looped multiple times.
        
        A better way is to use the data structure in a different way skipping the search part, getting directly
        the information we need.
        
        The implementation here used is:
        - Select the first item
        - Initialize a zero valued array for the similarities
        - Get the users who rated the first item
        - Loop through the users
        -- Given a user, get the items he rated (second item)
        -- Update the similarity of the items he rated
        
        
        """

        # Create template used to initialize an array with zeros
        # Much faster than np.zeros(self.n_items)
        cdef array[double] template_zero = array('d')
        cdef array[double] result = clone(template_zero, self.n_items, zero=True)


        cdef long user_index, user_id, item_index, item_id_second

        cdef int[:] users_that_rated_item = self.getUsersThatRatedItem(item_id_input)
        cdef int[:] items_rated_by_user

        cdef double rating_item_input, rating_item_second

        # Get users that rated the items
        for user_index in range(len(users_that_rated_item)):

            user_id = users_that_rated_item[user_index]
            rating_item_input = self.item_to_user_data[self.item_to_user_col_ptr[item_id_input]+user_index]

            # Get all items rated by that user
            items_rated_by_user = self.getItemsRatedByUser(user_id)

            for item_index in range(len(items_rated_by_user)):

                item_id_second = items_rated_by_user[item_index]

                # Do not compute the similarity on the diagonal
                if item_id_second != item_id_input:
                    # Increment similairty
                    rating_item_second = self.user_to_item_data[self.user_to_item_row_ptr[user_id]+item_index]

                    result[item_id_second] += rating_item_input*rating_item_second

        return result


    def compute_similarity(self):

        cdef int itemIndex, innerItemIndex
        cdef long long topKItemIndex

        cdef long long[:] top_k_idx

        # Declare numpy data type to use vetor indexing and simplify the topK selection code
        cdef np.ndarray[long, ndim=1] top_k_partition, top_k_partition_sorting
        cdef np.ndarray[np.float64_t, ndim=1] this_item_weights_np

        #cdef long[:] top_k_idx
        cdef double[:] this_item_weights

        cdef long processedItems = 0

        # Data structure to incrementally build sparse matrix
        # Preinitialize max possible length
        cdef double[:] values = np.zeros((self.n_items*self.TopK))
        cdef int[:] rows = np.zeros((self.n_items*self.TopK,), dtype=np.int32)
        cdef int[:] cols = np.zeros((self.n_items*self.TopK,), dtype=np.int32)
        cdef long sparse_data_pointer = 0


        start_time = time.time()

        # Compute all similarities for each item
        for itemIndex in range(self.n_items):

            processedItems += 1

            if processedItems % 10000==0 or processedItems==self.n_items:

                itemPerSec = processedItems/(time.time()-start_time)

                print("Similarity item {} ( {:2.0f} % ), {:.2f} item/sec, required time {:.2f} min".format(
                    processedItems, processedItems*1.0/self.n_items*100, itemPerSec, (self.n_items-processedItems) / itemPerSec / 60))

            this_item_weights = self.computeItemSimilarities(itemIndex)

            if self.TopK == 0:

                for innerItemIndex in range(self.n_items):
                    self.W_dense[innerItemIndex,itemIndex] = this_item_weights[innerItemIndex]

            else:

                # Sort indices and select TopK
                # Using numpy implies some overhead, unfortunately the plain C qsort function is even slower
                # top_k_idx = np.argsort(this_item_weights) [-self.TopK:]

                # Sorting is done in three steps. Faster then plain np.argsort for higher number of items
                # because we avoid sorting elements we already know we don't care about
                # - Partition the data to extract the set of TopK items, this set is unsorted
                # - Sort only the TopK items, discarding the rest
                # - Get the original item index

                this_item_weights_np = - np.array(this_item_weights)
                
                # Get the unordered set of topK items
                top_k_partition = np.argpartition(this_item_weights_np, self.TopK-1)[0:self.TopK]
                # Sort only the elements in the partition
                top_k_partition_sorting = np.argsort(this_item_weights_np[top_k_partition])
                # Get original index
                top_k_idx = top_k_partition[top_k_partition_sorting]



                # Incrementally build sparse matrix
                for innerItemIndex in range(len(top_k_idx)):

                    topKItemIndex = top_k_idx[innerItemIndex]

                    values[sparse_data_pointer] = this_item_weights[topKItemIndex]
                    rows[sparse_data_pointer] = topKItemIndex
                    cols[sparse_data_pointer] = itemIndex

                    sparse_data_pointer += 1


        if self.TopK == 0:

            return np.array(self.W_dense)

        else:

            values = np.array(values[0:sparse_data_pointer])
            rows = np.array(rows[0:sparse_data_pointer])
            cols = np.array(cols[0:sparse_data_pointer])

            W_sparse = sps.csr_matrix((values, (rows, cols)),
                                    shape=(self.n_items, self.n_items),
                                    dtype=np.float32)

            return W_sparse




In [48]:
cosine_cython = Cosine_Similarity(URM_train, TopK=100)

start_time = time.time()

cosine_cython.compute_similarity()

print("Similarity computed in {:.2f} seconds".format(time.time()-start_time))

Similarity item 10000 ( 15 % ), 542.42 item/sec, required time 1.69 min
Similarity item 20000 ( 31 % ), 956.34 item/sec, required time 0.79 min
Similarity item 30000 ( 46 % ), 1276.94 item/sec, required time 0.46 min
Similarity item 40000 ( 61 % ), 1525.23 item/sec, required time 0.27 min
Similarity item 50000 ( 77 % ), 1726.64 item/sec, required time 0.15 min
Similarity item 60000 ( 92 % ), 1884.95 item/sec, required time 0.05 min
Similarity item 65134 ( 100 % ), 1968.23 item/sec, required time 0.00 min
Similarity computed in 33.89 seconds


### Better... much better. There are a few other things you could do, but at this point it is not worth the effort