In [1]:
import pandas as pd
import numpy as np

# Для работы с матрицами
from scipy.sparse import csr_matrix

# Матричная факторизация
from implicit.als import AlternatingLeastSquares
from implicit.nearest_neighbours import ItemItemRecommender  # нужен для одного трюка
from implicit.nearest_neighbours import bm25_weight, tfidf_weight

In [18]:
class MainRecommender:
    """Рекоммендации, которые можно получить из ALS

    Input
    -----
    user_item_matrix: pd.DataFrame
        Матрица взаимодействий user-item
    """

    def __init__(self, data, weighting=True):

        # Топ покупок каждого юзера
        self.top_purchases = data.groupby(['user_id', 'item_id'])['quantity'].count().reset_index()
        self.top_purchases.sort_values('quantity', ascending=False, inplace=True)
        self.top_purchases = self.top_purchases[self.top_purchases['item_id'] != 999999]

        # Топ покупок по всему датасету
        self.overall_top_purchases = data.groupby('item_id')['quantity'].count().reset_index()
        self.overall_top_purchases.sort_values('quantity', ascending=False, inplace=True)
        self.overall_top_purchases = self.overall_top_purchases[self.overall_top_purchases['item_id'] != 999999]
        self.overall_top_purchases = self.overall_top_purchases.item_id.tolist()

        self.user_item_matrix = self._prepare_matrix(data)  # pd.DataFrame
        self.id_to_itemid, self.id_to_userid, \
            self.itemid_to_id, self.userid_to_id = self._prepare_dicts(self.user_item_matrix)

        if weighting:
            self.user_item_matrix = bm25_weight(self.user_item_matrix.T).T

        self.model = self.fit(self.user_item_matrix)
        self.own_recommender = self.fit_own_recommender(self.user_item_matrix)

    @staticmethod
    def _prepare_matrix(data):
        """Готовит user-item матрицу"""
        user_item_matrix = pd.pivot_table(data,
                                          index='user_id', columns='item_id',
                                          values='quantity',  # Можно пробовать другие варианты
                                          aggfunc='count',
                                          fill_value=0
                                          )

        user_item_matrix = user_item_matrix.astype(float)  # необходимый тип матрицы для implicit

        return user_item_matrix

    @staticmethod
    def _prepare_dicts(user_item_matrix):
        """Подготавливает вспомогательные словари"""

        userids = user_item_matrix.index.values
        itemids = user_item_matrix.columns.values

        matrix_userids = np.arange(len(userids))
        matrix_itemids = np.arange(len(itemids))

        id_to_itemid = dict(zip(matrix_itemids, itemids))
        id_to_userid = dict(zip(matrix_userids, userids))

        itemid_to_id = dict(zip(itemids, matrix_itemids))
        userid_to_id = dict(zip(userids, matrix_userids))

        return id_to_itemid, id_to_userid, itemid_to_id, userid_to_id

    @staticmethod
    def fit_own_recommender(user_item_matrix):
        """Обучает модель, которая рекомендует товары, среди товаров, купленных юзером"""

        own_recommender = ItemItemRecommender(K=1, num_threads=4)
        own_recommender.fit(csr_matrix(user_item_matrix).T.tocsr())

        return own_recommender

    @staticmethod
    def fit(user_item_matrix, n_factors=20, regularization=0.001, iterations=15, num_threads=4):
        """Обучает ALS"""

        model = AlternatingLeastSquares(factors=n_factors,
                                        regularization=regularization,
                                        iterations=iterations,
                                        num_threads=num_threads)
        model.fit(csr_matrix(user_item_matrix).T.tocsr())

        return model

    def _update_dict(self, user_id):
        """Если появился новыю user / item, то нужно обновить словари"""

        if user_id not in self.userid_to_id.keys():

            max_id = max(list(self.userid_to_id.values()))
            max_id += 1

            self.userid_to_id.update({user_id: max_id})
            self.id_to_userid.update({max_id: user_id})

    def _get_similar_item(self, item_id):
        """Находит товар, похожий на item_id"""
        recs = self.model.similar_items(self.itemid_to_id[item_id], N=2)  # Товар похож на себя -> рекомендуем 2 товара
        top_rec = recs[1][0]  # И берем второй (не товар из аргумента метода)
        return self.id_to_itemid[top_rec]

    def _extend_with_top_popular(self, recommendations, N=5):
        """Если кол-во рекоммендаций < N, то дополняем их топ-популярными"""

        if len(recommendations) < N:
            recommendations.extend(self.overall_top_purchases[:N])
            recommendations = recommendations[:N]

        return recommendations

    def _get_recommendations(self, user, model, N=5):
        """Рекомендации через стардартные библиотеки implicit"""

        self._update_dict(user_id=user)
        res = [self.id_to_itemid[rec[0]] for rec in model.recommend(userid=self.userid_to_id[user],
                                        user_items=csr_matrix(self.user_item_matrix).tocsr(),
                                        N=N,
                                        filter_already_liked_items=False,
                                        filter_items=[self.itemid_to_id[999999]],
                                        recalculate_user=True)]

        res = self._extend_with_top_popular(res, N=N)

        assert len(res) == N, 'Количество рекомендаций != {}'.format(N)
        return res

    def get_als_recommendations(self, user, N=5):
        """Рекомендации через стардартные библиотеки implicit"""

        self._update_dict(user_id=user)
        return self._get_recommendations(user, model=self.model, N=N)

    def get_own_recommendations(self, user, N=5):
        """Рекомендуем товары среди тех, которые юзер уже купил"""

        self._update_dict(user_id=user)
        return self._get_recommendations(user, model=self.own_recommender, N=N)

    def get_similar_items_recommendation(self, user, N=5):
        """Рекомендуем товары, похожие на топ-N купленных юзером товаров"""

        top_users_purchases = self.top_purchases[self.top_purchases['user_id'] == user].head(N)

        res = top_users_purchases['item_id'].apply(lambda x: self._get_similar_item(x)).tolist()
        res = self._extend_with_top_popular(res, N=N)

        assert len(res) == N, 'Количество рекомендаций != {}'.format(N)
        return res

    def get_similar_users_recommendation(self, user, N=5):
        """Рекомендуем топ-N товаров, среди купленных похожими юзерами"""

        res = []

        # Находим топ-N похожих пользователей
        similar_users = self.model.similar_users(self.userid_to_id[user], N=N+1)
        similar_users = [rec[0] for rec in similar_users]
        similar_users = similar_users[1:]   # удалим юзера из запроса

        for user in similar_users:
            res.extend(self.get_own_recommendations(user, N=1))

        res = self._extend_with_top_popular(res, N=N)

        assert len(res) == N, 'Количество рекомендаций != {}'.format(N)
        return res

In [4]:
data = pd.read_csv('/home/actrec/.virtualenvs/recomm/recommenders/seq_quantity.csv')
data.drop(columns=['catLevel1'], inplace=True) 
# data.drop_duplicates(inplace=True)
# data.rename(columns = {'total_orders_of_this_product' : 'rating'}, inplace = True)
data.rename(columns = {'userID':'user_id', 'itemID':'item_id'}, inplace = True)
data.head()

Unnamed: 0,user_id,item_id,day,quantity
0,0,3,31,1.072
1,0,3,53,2.428
2,0,3,58,1.946
3,0,3,65,0.235
4,0,3,66,0.21


In [56]:
out = MainRecommender(data)



  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2511 [00:00<?, ?it/s]

In [20]:
us2 = out.get_similar_items_recommendation(user=2, N=10)
us2

[349, 4370, 3369, 4370, 851, 397, 3278, 2606, 32, 844]

In [22]:
us3 = out.get_similar_items_recommendation(user=24, N=10)
us3

[349, 3056, 7106, 254, 4344, 2635, 50, 3863, 712, 537]

In [53]:
def predict_ranking(
    userlist
):
    userlist.sort_values(by='userID',inplace=True, ignore_index=True)
    users1 = userlist.assign(rec1='', rec2='', rec3='', rec4='', rec5='', rec6='', rec7='', rec8='', rec9='', rec10='')
    index = 0
    for c, uid in userlist.iterrows():
#         print (uid['userID'])
        temp = out.get_similar_items_recommendation(user=uid['userID'], N=10)
        users1.loc[index, 'rec1':'rec10'] = temp
        index = index + 1
    return users1

In [40]:
users = pd.read_csv('/home/actrec/.virtualenvs/recomm/recommenders/test_users_intermediate.csv')
users.sort_values(by='userID',inplace=True, ignore_index=True)
users1 = users.assign(rec1='', rec2='', rec3='', rec4='', rec5='', rec6='', rec7='', rec8='', rec9='', rec10='')
# data.rename(columns = {'total_orders_of_this_product' : 'rating'}, inplace = True)
# merged_i = merged_i.assign(num=a[np.arange(len(merged_i)) % len(a)])
users1.loc[0, 'rec1':'rec10'] = us2
users1.head(10)

Unnamed: 0,userID,rec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,rec9,rec10
0,2,349.0,4370.0,3369.0,4370.0,851.0,397.0,3278.0,2606.0,32.0,844.0
1,24,,,,,,,,,,
2,28,,,,,,,,,,
3,48,,,,,,,,,,
4,55,,,,,,,,,,
5,71,,,,,,,,,,
6,75,,,,,,,,,,
7,76,,,,,,,,,,
8,80,,,,,,,,,,
9,93,,,,,,,,,,


In [57]:
users = pd.read_csv('/home/actrec/.virtualenvs/recomm/recommenders/test_users_intermediate.csv')
fin = predict_ranking(users)
fin.head(20)

Unnamed: 0,userID,rec1,rec2,rec3,rec4,rec5,rec6,rec7,rec8,rec9,rec10
0,2,349,851,3369,397,397,397,3278,2606,32,4554
1,24,349,3056,1787,254,4344,2635,477,3863,1579,5237
2,28,269,349,1338,32,23,870,1867,2315,251,280
3,48,349,32,80,1162,38,1377,243,438,1270,542
4,55,349,1806,243,1878,243,3156,957,1741,5264,4964
5,71,1253,1253,4752,8551,786,4145,349,5126,3319,1558
6,75,349,3484,2104,4298,1548,3546,3168,2924,438,2578
7,76,349,3010,1766,1,1907,911,2139,438,2836,3671
8,80,349,1,65,1173,319,1618,285,2694,243,251
9,93,349,703,4281,869,4229,2352,32,150,6980,6980


In [58]:
fin.to_csv('/media/actrec/DATA/submits/androbaza_131.csv', index=False)