In [1]:
import numpy as np
import pandas as pd
from hnmchallenge.constant import *
from hnmchallenge.data_reader import DataReader
from hnmchallenge.datasets.last_month_last_day import LMLDDataset
from hnmchallenge.datasets.last_month_last_week_dataset import LMLWDataset
from hnmchallenge.datasets.last_week_last_week import LWLWDataset
from hnmchallenge.evaluation.python_evaluation import map_at_k, recall_at_k
from hnmchallenge.models_prediction.bought_items_recs import BoughtItemsRecs
from hnmchallenge.models_prediction.ease_recs import EaseRecs
from hnmchallenge.models_prediction.itemknn_recs import ItemKNNRecs
from hnmchallenge.models_prediction.popularity_recs import PopularityRecs
from hnmchallenge.models_prediction.recs_interface import RecsInterface
from hnmchallenge.models_prediction.time_pop import TimePop
from hnmchallenge.models_prediction.ensemble_recs import EnsembleRecs

In [2]:
dataset = LMLDDataset()

In [3]:
models = [
        # "cutf_100_PSGE_tw_True_rs_False_k_256",
        # "cutf_100_Popularity_cutoff_100",
        "cutf_100_TimePop_alpha_1.0",
        #"cutf_100_EASE_tw_True_rs_False_l2_0.001",
        "cutf_100_ItemKNN_tw_True_rs_False",
        # "cutf_40_Popularity_cutoff_40",
        # "cutf_0_BoughtItemsRecs",
    ]
dataset = LMLDDataset()

ensemble = EnsembleRecs(
            models_list=models,
            kind="train",
            dataset=dataset,
        )
recs_df=ensemble.get_recommendations()

Creating ensemble with:
cutf_100_TimePop_alpha_1.0
cutf_100_ItemKNN_tw_True_rs_False


[1;36mloading recs model:
 cutf_100_TimePop_alpha_1.0[0m
[1;36mloading recs model:
 cutf_100_ItemKNN_tw_True_rs_False[0m
TimePop_alpha_1.0_recs
ItemKNN_tw_True_rs_False_recs
['customer_id', 'TimePop_alpha_1.0_recs', 'relevance']
['customer_id', 'ItemKNN_tw_True_rs_False_recs', 'relevance']
dropping cols: ['TimePop_alpha_1.0_recs', 'ItemKNN_tw_True_rs_False_recs']
Average recs per user: 128.70881887967144


In [4]:
recs=recs_df

In [5]:
recs

Unnamed: 0,customer_id,TimePop_alpha_1.0_score,TimePop_alpha_1.0_rank,relevance,ItemKNN_tw_True_rs_False_score,ItemKNN_tw_True_rs_False_rank,recs,rank
0,25,3.863502,1.0,0,,,4136.0,1
72,25,-0.316614,73.0,0,,,23245.0,2
71,25,-0.316613,72.0,0,,,22158.0,3
70,25,-0.316613,71.0,0,,,20453.0,4
69,25,-0.316613,70.0,0,,,23722.0,5
...,...,...,...,...,...,...,...,...
7897115,1166002,,,0,0.012082,29.0,22775.0,96
7897114,1166002,,,0,0.012365,28.0,21577.0,97
7897113,1166002,,,0,0.013280,27.0,21314.0,98
7897123,1166002,,,0,0.010438,37.0,14917.0,99


In [6]:
recs=recs.fillna(0)

In [7]:
score_cols = [col for col in recs_df.columns if 'score' in col]

In [8]:
score_cols

['TimePop_alpha_1.0_score', 'ItemKNN_tw_True_rs_False_score']

In [9]:
for column in score_cols:
        recs[column] = (recs[column] - recs[column].mean()) / recs[column].std()

In [10]:
recs

Unnamed: 0,customer_id,TimePop_alpha_1.0_score,TimePop_alpha_1.0_rank,relevance,ItemKNN_tw_True_rs_False_score,ItemKNN_tw_True_rs_False_rank,recs,rank
0,25,3.131902,1.0,0,-0.175645,0.0,4136.0,1
72,25,-0.535086,73.0,0,-0.175645,0.0,23245.0,2
71,25,-0.535086,72.0,0,-0.175645,0.0,22158.0,3
70,25,-0.535086,71.0,0,-0.175645,0.0,20453.0,4
69,25,-0.535086,70.0,0,-0.175645,0.0,23722.0,5
...,...,...,...,...,...,...,...,...
7897115,1166002,-0.257339,0.0,0,0.351245,29.0,22775.0,96
7897114,1166002,-0.257339,0.0,0,0.363612,28.0,21577.0,97
7897113,1166002,-0.257339,0.0,0,0.403515,27.0,21314.0,98
7897123,1166002,-0.257339,0.0,0,0.279578,37.0,14917.0,99


ItemKNN:
Users:39205
Recall@100:0.1397332847340381
MAP@100: 0.04217208947113406

Time_Pop:
Users:47340
Recall@100:0.1839606121190764
MAP@100:0.0970550910163751

In [11]:
alpha = 39205 * 0.1397332847340381 * 0.04217208947113406
beta = 47340 * 0.1839606121190764 * 0.0970550910163751

In [12]:
X=alpha/(alpha+beta)
Y=beta/(alpha + beta)

In [14]:
recs["weighted_score"] = X * recs["ItemKNN_tw_True_rs_False_score"] + Y * recs["TimePop_alpha_1.0_score"]

In [16]:
recs=recs.drop(["TimePop_alpha_1.0_score","TimePop_alpha_1.0_rank","ItemKNN_tw_True_rs_False_score","ItemKNN_tw_True_rs_False_rank","rank"],axis=1)

In [17]:
recs

Unnamed: 0,customer_id,relevance,recs,weighted_score
0,25,0,4136.0,2.421902
72,25,0,23245.0,-0.457929
71,25,0,22158.0,-0.457929
70,25,0,20453.0,-0.457929
69,25,0,23722.0,-0.457928
...,...,...,...,...
7897115,1166002,0,22775.0,-0.126700
7897114,1166002,0,21577.0,-0.124045
7897113,1166002,0,21314.0,-0.115479
7897123,1166002,0,14917.0,-0.142084


In [18]:
recs["rank"] = (recs.groupby(DEFAULT_USER_COL)["weighted_score"].rank(ascending=False, method="min").astype(int))

In [19]:
recs

Unnamed: 0,customer_id,relevance,recs,weighted_score,rank
0,25,0,4136.0,2.421902,1
72,25,0,23245.0,-0.457929,73
71,25,0,22158.0,-0.457929,72
70,25,0,20453.0,-0.457929,71
69,25,0,23722.0,-0.457928,70
...,...,...,...,...,...
7897115,1166002,0,22775.0,-0.126700,29
7897114,1166002,0,21577.0,-0.124045,28
7897113,1166002,0,21314.0,-0.115479,27
7897123,1166002,0,14917.0,-0.142084,37
