**Imports**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from scipy.sparse import csr_matrix

In [None]:
from implicit import als

In [None]:
from src.metrics import precision_at_k, recall_at_k
from src.utils import prefilter_items
from src.recommenders import MainRecommender

In [None]:
from src.recommenders_alt import alt_recommender

In [None]:
%load_ext autoreload

In [None]:
%autoreload

**Reading of data**

In [None]:
data = pd.read_csv('retail_train.csv')
data_test = pd.read_csv('retail_test.csv')
item_features = pd.read_csv('product.csv')
user_features = pd.read_csv('hh_demographic.csv')

**Precessing of train data**

In [None]:
# column processing
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

item_features.rename(columns={'product_id': 'item_id'}, inplace=True)
user_features.rename(columns={'household_key': 'user_id'}, inplace=True)


# Важна схема обучения и валидации!
# -- давние покупки -- | -- 6 недель -- | -- 3 недель -- 
# подобрать размер 2-ого датасета (6 недель) --> learning curve (зависимость метрики recall@k от размера датасета)
val_lvl_1_size_weeks = 6
val_lvl_2_size_weeks = 3

data_train_lvl_1 = data[data['week_no'] < data['week_no'].max() - (val_lvl_1_size_weeks + val_lvl_2_size_weeks)]
data_val_lvl_1 = data[(data['week_no'] >= data['week_no'].max() - (val_lvl_1_size_weeks + val_lvl_2_size_weeks)) &
                      (data['week_no'] < data['week_no'].max() - (val_lvl_2_size_weeks))]

data_train_lvl_2 = data_val_lvl_1.copy()  # Для наглядности. Далее мы добавим изменения, и они будут отличаться
data_val_lvl_2 = data[data['week_no'] >= data['week_no'].max() - val_lvl_2_size_weeks]

data_train_lvl_1.head(2)

In [None]:
data_train_lvl_1.shape

In [None]:
n_items_before = data_train_lvl_1['item_id'].nunique()

data_train_lvl_1 = prefilter_items(data_train_lvl_1, take_n_popular=5000, item_features=item_features)

n_items_after = data_train_lvl_1['item_id'].nunique()
print('Decreased # items from {} to {}'.format(n_items_before, n_items_after))

In [None]:
data_train_lvl_1.shape

In [None]:
#recommender = MainRecommender(data_train_lvl_1)

In [None]:
result_lvl_1 = data_val_lvl_1.groupby('user_id')['item_id'].unique().reset_index()
result_lvl_1.columns=['user_id', 'actual']
result_lvl_1.head(2)

In [None]:
user_ids = result_lvl_1['user_id'].tolist()

In [None]:
user_ids.remove(1813)

In [None]:
user_ids.remove(1984)

In [None]:
data_train_lvl_1.index

In [None]:
data_train_lvl_1.user_id

In [None]:
userids

In [None]:
alt = alt_recommender(data_train_lvl_1)

In [None]:
alt.train_model(20, 0.001, 15)

In [None]:
alt.get_als_recommendations(2378)

In [None]:
alt.get_similar_items_recommendation(2378)

In [None]:
alt.get_similar_users_recommendation(2378)

In [None]:
alt.get_own_recommendations(2378)

In [None]:
idx = result_lvl_1[(result_lvl_1['user_id'] == 296) |
                           (result_lvl_1['user_id'] == 1813) |
                           (result_lvl_1['user_id'] == 1984)
                  ].index

In [None]:
idx

In [None]:
result_lvl_1.drop(idx, axis = 0, inplace = True)

In [None]:
 k=20
for x in user_ids:
    print(x, user_ids.index(x))
    recs = recommender.get_als_recommendations(x, N=k)
    recs
    #result_lvl_1[result_lvl_1['user_id'] == x][f'als_{k}'] = recs

In [None]:
%time

#k=50
for k in [20, 50, 100, 200, 500]:
#for k in [50]:    
    print(f'Iteration k={k}')
    result_lvl_1[f'als_{k}'] = result_lvl_1['user_id'].apply(lambda x: recommender.get_als_recommendations(x, N=k))
    print(f'...als finished')
    result_lvl_1[f'own_{k}'] = result_lvl_1['user_id'].apply(lambda x: recommender.get_own_recommendations(x, N=k))
    print(f'...own finished')
    result_lvl_1[f'sim_user_{k}'] = result_lvl_1['user_id'].apply(lambda x: recommender.get_similar_users_recommendation(x, N=k))
    print(f'...sim_user finished')
    result_lvl_1[f'sim_item_{k}'] = result_lvl_1['user_id'].apply(lambda x: recommender.get_similar_items_recommendation(x, N=k))
    print(f'...sim_item finished')

result_lvl_1.head(2)