# Домашнее задание

1. Перенесите метрики в модуль src.metrics.py
2. Перенесите функцию prefilter_items в модуль src.utils.py
3. Создайте модуль src.recommenders.py. Напищите код для класса ниже 
(задание обсуждали на вебинаре, для первой функции практически сделали) и положите его в src.recommenders.py
4. Проверьте, что все модули корректно импортируются

In [1]:
!pip install implicit

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
# %load_ext autoreload

In [3]:
import os
import sys

module_path = os.path.abspath(os.path.join(os.pardir))
if module_path not in sys.path:
    sys.path.append(module_path)
    
# from content.src.metrics import precision_at_k, recall_at_k
# from content.src.utils import prefilter_items

In [4]:
import pandas as pd
import numpy as np

from scipy.sparse import csr_matrix

from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import ItemItemRecommender
from implicit.nearest_neighbours import bm25_weight, tfidf_weight



In [5]:
data = pd.read_csv('retail_train.csv')

data.columns = [col.lower() for col in data.columns]
data.rename(columns={'household_key': 'user_id',
                    'product_id': 'item_id'},
           inplace=True)


test_size_weeks = 3

data_train = data[data['week_no'] < data['week_no'].max() - test_size_weeks]
data_test = data[data['week_no'] >= data['week_no'].max() - test_size_weeks]

data_train.head(2)

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0


In [6]:
item_features = pd.read_csv('product.csv')
item_features.columns = [col.lower() for col in item_features.columns]
item_features.rename(columns={'product_id': 'item_id'}, inplace=True)

item_features.head(2)

Unnamed: 0,item_id,manufacturer,department,brand,commodity_desc,sub_commodity_desc,curr_size_of_product
0,25671,2,GROCERY,National,FRZN ICE,ICE - CRUSHED/CUBED,22 LB
1,26081,2,MISC. TRANS.,National,NO COMMODITY DESCRIPTION,NO SUBCOMMODITY DESCRIPTION,


In [7]:
result = data_test.groupby('user_id')['item_id'].unique().reset_index()
result.columns=['user_id', 'actual']

result.head(2)

Unnamed: 0,user_id,actual
0,1,"[821867, 834484, 856942, 865456, 889248, 90795..."
1,3,"[835476, 851057, 872021, 878302, 879948, 90963..."


In [8]:
class MainRecommender(ItemItemRecommender):

    def __init__(self, weighting=None, similarity=None, 
                 filter_already_liked_items=False,
                 num_threads=4, fake_id=999999):
        self.weighting = None
        self.similarity = None
        self.filter_already_liked_items = False
        self.num_threads = num_threads
        self.fake_id = fake_id
     

    def prepare_matrix(self, data_train):
        
        self.user_item_matrix = pd.pivot_table(data_train, 
                                  index='user_id', columns='item_id', 
                                  values='quantity',
                                  aggfunc='count', 
                                  fill_value=0
                                 )

        self.user_item_matrix = self.user_item_matrix.astype(float)
        self.sparse_user_item = csr_matrix(self.user_item_matrix).tocsr()
        
        return self.user_item_matrix, self.sparse_user_item
    

    def prepare_dicts(self, data):
        """Подготавливает вспомогательные словари"""
        
        self.userids = data.index.values
        self.itemids = data.columns.values

        self.matrix_userids = np.arange(len(self.userids))
        self.matrix_itemids = np.arange(len(self.itemids))

        self.id_to_itemid = dict(zip(self.matrix_itemids, self.itemids))
        self.id_to_userid = dict(zip(self.matrix_userids, self.userids))

        self.itemid_to_id = dict(zip(self.itemids, self.matrix_itemids))
        self.userid_to_id = dict(zip(self.userids, self.matrix_userids))
        
        return self.id_to_itemid, self.id_to_userid, self.itemid_to_id, self.userid_to_id


    def get_recommendations(self, user, model, N=5):
        """Рекомендуем топ-N товаров"""
    
        filter_items = [] if self.fake_id is not None else [self.itemid_to_id[self.fake_id]]
        res = model.recommend(userid=self.userid_to_id[user], 
                              user_items=self.user_item_matrix[self.userid_to_id[user]],
                              N=N, 
                              filter_already_liked_items=False, 
                              filter_items=None, 
                              recalculate_user=True)
        mask = res[1].argsort()[::-1]
        res = [self.id_to_itemid[rec] for rec in res[0][mask]]
        
        return res
        

    @staticmethod
    def fit_own_recommender(user_item_matrix):
        """
        Обучает модель, которая рекомендует товары 
        среди товаров, купленных юзером
        """
    
        own_recommender = ItemItemRecommender(K=1, num_threads=4)
        own_recommender.fit(sparse_user_item, show_progress=True)
        
        return own_recommender


    @staticmethod
    def fit(sparse_user_item, n_factors=20, regularization=0.001, iterations=15, num_threads=4):
        
        model = AlternatingLeastSquares(factors=n_factors, 
                                             regularization=regularization,
                                             iterations=iterations,  
                                             num_threads=num_threads)
        model.fit(sparse_user_item, show_progress=True)
        # model.fit(csr_matrix(user_item_matrix).T.tocsr(), 
          # show_progress=True)
        
        return model

    @staticmethod
    def get_similar_items_recommendation(model, user, id_to_itemid, \
                                         userid_to_id, \
                                         sparse_user_item, N=5):
        """Рекомендуем товары, похожие на топ-N купленных юзером товаров"""

        res = [id_to_itemid[rec] for rec in 
                      model.recommend(userid=userid_to_id[user], 
                                      user_items=sparse_user_item,
                                      N=N, 
                                      filter_already_liked_items=False, 
                                      filter_items=None,
                                      recalculate_user=True)[0]]

        assert len(res) == N, 'Количество рекомендаций != {}'.format(N)
        return res
    
    @staticmethod
    def get_similar_users_recommendation(model, user, id_to_itemid, \
                                        userid_to_id, \
                                        sparse_user_item, N=5):
        """Рекомендуем топ-N товаров, среди купленных похожими юзерами"""
    
        res = [id_to_itemid[rec] for rec in 
                  model.recommend(userid=userid_to_id[user], 
                                  user_items=sparse_user_item[userid_to_id[user]],
                                  N=N, 
                                  filter_already_liked_items=False, 
                                  filter_items=None, 
                                  recalculate_user=True)[0]]

        assert len(res) == N, 'Количество рекомендаций != {}'.format(N)
        return res

Проверка кода на работоспособность

In [9]:
recommender = MainRecommender()

In [10]:
id_to_itemid, id_to_userid, itemid_to_id, userid_to_id = recommender.prepare_dicts(data_train)

In [11]:
%%time
user_item_matrix, sparse_user_item = recommender.prepare_matrix(data_train)

CPU times: user 33 s, sys: 10.6 s, total: 43.6 s
Wall time: 48.5 s


In [12]:
model = recommender.fit(sparse_user_item, n_factors=10, regularization=0.001, iterations=1, num_threads=4)

  0%|          | 0/1 [00:00<?, ?it/s]

In [13]:
own_recommender = recommender.fit_own_recommender(user_item_matrix)

  0%|          | 0/86865 [00:00<?, ?it/s]

In [None]:
# %%time
    
# result['als'] = result['user_id'].map(lambda x: recommender.get_recommendations(x, model, N=5))
# result.apply(lambda row: precision_at_k(row['als'], row['actual'], k=5), axis=1).mean()

In [None]:
# %%time
    
# result['als'] = result['user_id'].map(lambda x: recommender.get_similar_items_recommendation(model, x, id_to_itemid, userid_to_id, sparse_user_item, N=5))
# result.apply(lambda row: precision_at_k(row['als'], row['actual'], k=5), axis=1).mean()