In [29]:
from implicit.nearest_neighbours import CosineRecommender
from implicit.als import AlternatingLeastSquares
import pandas as pd
import os

In [24]:
ratings = pd.read_csv('D:/Datasets/ml-latest/ratings.csv')

In [25]:
from scipy.sparse import coo_matrix
import numpy as np

user_item_matrix = coo_matrix((
    (ratings['rating'] >= 4).astype(np.float32),
    (ratings['userId'], ratings['movieId'])
))
user_item_matrix.eliminate_zeros()

In [26]:
# делим разреженную матрицу на обучающую и тестовую
total_len = user_item_matrix.data.size
train_len = int(total_len * .8)
all_indices = np.arange(total_len)
np.random.seed(42)
train_indices = np.random.choice(all_indices, train_len, replace=False)
train_mask = np.in1d(all_indices, train_indices)

In [27]:
def get_masked(arr, mask):
    return coo_matrix(
        (
            [np.float32(item) for item in arr.data[mask]],
            (arr.row[mask], arr.col[mask])
        ),
        arr.shape
    )

In [28]:
train_csr = get_masked(user_item_matrix, train_mask).tocsr()
train = train_csr.T
test_coo = get_masked(user_item_matrix, ~train_mask)
test_csr = test_coo.tocsr()

In [30]:
cosine = CosineRecommender()
os.environ['OPENBLAS_NUM_THREADS'] = '1'
als = AlternatingLeastSquares(factors=10, iterations=10)

In [None]:
%%time
cosine.fit(train)

  0%|          | 0/283229 [00:00<?, ?it/s]

In [None]:
als.fit(train)

In [None]:
users = list(set(test_coo.row))
small_users = users[:10000]

def get_recs(users, model):
    return {
        user: model.recomend(uyserid=user, user_items=train_csr, N=50)
        for user in small_users
    }

In [None]:
def hitrate(k, recs, users):
    hits = 0
    for user in users:
        if recs[user]:
            rec_items, _ = zip(*recs[user])
            hits += len(set(rec_items[:k]).intersections(set(test_csr[user].indices))) > 0
    return hits / len(users)