# Импорт библеотек

In [1]:
# проведение пути до собственных модулей
import sys
sys.path.append('../')

In [2]:
# основные модули
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

# загрузка собственных модулей
from src.utils import prefilter_items
from src.metrics import precision_at_k, recall_at_k, evaluete_rec
from src.myf import reduction_memory
from src.recommenders import MainRecommender

# отключение предупреждений
import warnings
warnings.filterwarnings('ignore')

# Загрузка данных

In [3]:
# создание датафреймов
train_test = pd.read_csv('../data/retail_train.csv')
test_lvl2 = pd.read_csv('../data/retail_test.csv')
item_features = pd.read_csv('../data/product.csv')
user_features = pd.read_csv('../data/hh_demographic.csv')

# снижение веса датафлеймов
train_test, test_lvl2, item_features = map(reduction_memory, [train_test, test_lvl2, item_features])

before:		230.09 MB
after:		141.41 MB
reduсed:	88.68 MB
before:		8.52 MB
after:		5.24 MB
reduсed:	3.28 MB
before:		5.17 MB
after:		4.25 MB
reduсed:	0.92 MB


In [4]:
# lower columns
item_features.columns = [col.lower() for col in item_features.columns]
user_features.columns = [col.lower() for col in user_features.columns]

# rename columns
item_features.rename(columns={'product_id': 'item_id'}, inplace=True)
user_features.rename(columns={'household_key': 'user_id'}, inplace=True)

# Разбиение на тестовые трейновые фреймы

In [5]:
weeks_lvl1 = 9

# 1й уровень
train_lvl1 = train_test[train_test['week_no'] < train_test['week_no'].max() - weeks_lvl1]
test_lvl1 = train_test[train_test['week_no'] >= train_test['week_no'].max() - weeks_lvl1]

# 2й уровень
train_lvl2 = test_lvl1.copy()

# Обучение двухуровневой модели

## Модель 1го уровня

In [6]:
# предфильтрация трейна 1го уровня
train_lvl1 = prefilter_items(train_lvl1, item_features=item_features)
train_lvl1.head()

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc,price
0,2375,26984851472,1,1004906,1,1.39,364,-0.6,1631,1,0.0,0.0,1.39
1,2375,26984851472,1,1033142,1,0.82,364,0.0,1631,1,0.0,0.0,0.82
2,2375,26984851472,1,1036325,1,0.99,364,-0.3,1631,1,0.0,0.0,0.99
3,2375,26984851472,1,1082185,1,1.21,364,0.0,1631,1,0.0,0.0,1.21
4,2375,26984851472,1,8160430,1,1.5,364,-0.39,1631,1,0.0,0.0,1.5


In [7]:
# обучение модели 1го уровня
recommender_lvl1 = MainRecommender(train_lvl1)



HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5001), HTML(value='')))




### Прогнозирование рекоммендаций моделью 1го уровня

In [8]:
# создание фрейма результатов 1го уровня
result_lvl1 = test_lvl1.groupby('user_id')['item_id'].unique().reset_index()
result_lvl1.columns = ['user_id', 'actual']

# удаление тех пользователей, на которых модель не обучалась
result_lvl1 = result_lvl1[result_lvl1['user_id'].isin(train_lvl1['user_id'])]
result_lvl1.head()

Unnamed: 0,user_id,actual
0,1,"[853529, 865456, 867607, 872137, 874905, 87524..."
1,2,"[15830248, 838136, 839656, 861272, 866211, 870..."
2,3,"[835476, 851057, 872021, 878302, 879948, 90963..."
3,4,"[883932, 970760, 1035676, 1055863, 1097610, 67..."
4,6,"[1024306, 1102949, 6548453, 835394, 940804, 96..."


In [9]:
k = 5

# прогнозирование результатов и добавление ление в фрейм
result_lvl1['similar_items'] = result_lvl1['user_id'].apply(
    lambda x: recommender_lvl1.get_similar_items_recommendation(user=x, N=k)
)# apply

result_lvl1['als'] = result_lvl1['user_id'].apply(
    lambda x: recommender_lvl1.get_als_recommendations(user=x, N=k)
)# apply

result_lvl1['own'] = result_lvl1['user_id'].apply(
    lambda x: recommender_lvl1.get_own_recommendations(user=x, N=k)
)# apply

result_lvl1.head()

Unnamed: 0,user_id,actual,similar_items,als,own
0,1,"[853529, 865456, 867607, 872137, 874905, 87524...","[12352293, 999999, 1082185, 9526410, 7135183]","[1004390, 7467039, 1082212, 1062572, 1082185]","[856942, 9297615, 5577022, 1074612, 9655212]"
1,2,"[15830248, 838136, 839656, 861272, 866211, 870...","[8090509, 1133018, 7025275, 1106523, 985999]","[880150, 1009449, 5569230, 1041259, 916122]","[1076580, 911974, 826784, 1083296, 838136]"
2,3,"[835476, 851057, 872021, 878302, 879948, 90963...","[1076875, 909396, 960318, 1075979, 1133018]","[1110244, 951590, 5569230, 8090521, 883404]","[835476, 921345, 998206, 1092937, 964594]"
3,4,"[883932, 970760, 1035676, 1055863, 1097610, 67...","[951590, 1119454, 856772, 1055425, 1133312]","[8090521, 1119454, 891423, 902172, 873627]","[891423, 910109, 887003, 1121367, 951821]"
4,6,"[1024306, 1102949, 6548453, 835394, 940804, 96...","[999999, 904360, 825541, 845208, 948650]","[1082185, 1042616, 863632, 1024306, 878996]","[13003092, 1119051, 9911484, 8203834, 1108094]"


### Оценка рекоммендаций 1го уровня

In [10]:
recall_lvls = pd.DataFrame(columns=['recall@5', 'similar_items', 'als', ' own']).set_index('recall@5')

recall_lvls.loc['level 1'] = evaluete_rec(data=result_lvl1, true='actual', metric='recall@k', k=k)
recall_lvls

Unnamed: 0_level_0,similar_items,als,own
recall@5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
level 1,0.008587,0.013422,0.022427


## Модель 2го уровня

In [11]:
# предфильтрация трейна 2го уровня
train_lvl2 = prefilter_items(data=train_lvl2, item_features=item_features)
train_lvl2.head()

Unnamed: 0,user_id,basket_id,day,item_id,quantity,sales_value,store_id,retail_disc,trans_time,week_no,coupon_disc,coupon_match_disc,price
2104867,2070,40618492260,594,999999,1,1.0,311,-0.29,40,86,0.0,0.0,1.0
2107468,2021,40618753059,594,840361,1,0.99,443,0.0,101,86,0.0,0.0,0.99
2107469,2021,40618753059,594,856060,1,1.77,443,-0.09,101,86,0.0,0.0,1.77
2107470,2021,40618753059,594,869344,1,1.67,443,-0.22,101,86,0.0,0.0,1.67
2107471,2021,40618753059,594,896862,2,5.0,443,-2.98,101,86,0.0,0.0,2.5


In [12]:
# обучение модели 2го уровня
recommender_lvl2 = MainRecommender(train_lvl2)

HBox(children=(IntProgress(value=0, max=15), HTML(value='')))




HBox(children=(IntProgress(value=0, max=5001), HTML(value='')))




### Прогнозирование рекоммендаций моделью 2го уровня

In [13]:
# создание фрейма результатов 2го уровня
result_lvl2 = test_lvl2.groupby('user_id')['item_id'].unique().reset_index()
result_lvl2.columns = ['user_id', 'actual']

# удаление тех пользователей, на которых модель не обучалась
result_lvl2 = result_lvl2[result_lvl2['user_id'].isin(train_lvl2['user_id'])]
result_lvl2.head()

Unnamed: 0,user_id,actual
0,1,"[880007, 883616, 931136, 938004, 940947, 94726..."
1,2,"[820165, 820291, 826784, 826835, 829009, 85784..."
2,3,"[827683, 908531, 989069, 1071377, 1080155, 109..."
3,6,"[956902, 960791, 1037863, 1119051, 1137688, 84..."
4,7,"[847270, 855557, 859987, 863407, 895454, 90663..."


In [14]:
# прогнозирование результатов и добавление ление в фрейм
result_lvl2['similar_items'] = result_lvl2['user_id'].apply(
    lambda x: recommender_lvl2.get_similar_items_recommendation(user=x, N=k)
)# apply

result_lvl2['als'] = result_lvl2['user_id'].apply(
    lambda x: recommender_lvl2.get_als_recommendations(user=x, N=k)
)# apply

result_lvl2['own'] = result_lvl2['user_id'].apply(
    lambda x: recommender_lvl2.get_own_recommendations(user=x, N=k)
)# apply

result_lvl2.head()

Unnamed: 0,user_id,actual,similar_items,als,own
0,1,"[880007, 883616, 931136, 938004, 940947, 94726...","[1041796, 1005576, 961554, 951590, 878285]","[1082185, 856942, 878285, 8293439, 1105488]","[8293439, 1074612, 856942, 5577022, 1050310]"
1,2,"[820165, 820291, 826784, 826835, 829009, 85784...","[1044805, 860776, 8065410, 12301073, 937276]","[12301109, 981760, 1139142, 889774, 1068708]","[1128698, 1138132, 909396, 6633224, 1139142]"
2,3,"[827683, 908531, 989069, 1071377, 1080155, 109...","[9526185, 987480, 7167249, 1083328, 925514]","[7167218, 9526886, 6463658, 7167249, 9526563]","[909638, 984088, 872021, 1083328, 878302]"
3,6,"[956902, 960791, 1037863, 1119051, 1137688, 84...","[951412, 878996, 845208, 923746, 1041796]","[1037863, 1051516, 1015296, 1022254, 962568]","[1015296, 946489, 1017061, 1126203, 1010259]"
4,7,"[847270, 855557, 859987, 863407, 895454, 90663...","[961554, 1031833, 16809471, 938700, 932182]","[13987153, 1082185, 1068258, 5591154, 1056973]","[966058, 1001525, 1018769, 971660, 840386]"


### Оценка рекоммендаций 2го уровня

In [15]:
recall_lvls.loc['level 2'] = evaluete_rec(data=result_lvl2, true='actual', metric='recall@k', k=k)
recall_lvls

Unnamed: 0_level_0,similar_items,als,own
recall@5,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
level 1,0.008587,0.013422,0.022427
level 2,0.01156,0.024069,0.032365
