In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()

In [2]:
decisions = pd.read_csv("../data/processed/decisions.csv")
books =  pd.read_csv("../data/processed/books.csv", index_col = "Unnamed: 0")
evaluation =  pd.read_csv("../data/external/evaluation.csv")

In [3]:
decisions_melt = pd.melt(decisions, id_vars = ["book_id", "decision", "time"], value_vars=['recommendation_shown_0',
                                                                                   'recommendation_shown_1',
                                                                                   'recommendation_shown_2'])

In [4]:
decisions_melt.drop_duplicates(["book_id", "decision", "time", "value"],inplace = True)

In [5]:
decisions_melt.replace({"variable":{"recommendation_shown_0": "recommendation_0",
                                  "recommendation_shown_1": "recommendation_1",
                                  "recommendation_shown_2": "recommendation_2"}}, inplace = True)

In [6]:
decisions_melt["chosen"] = (decisions_melt["decision"] == decisions_melt["variable"]).astype(int)

In [7]:
decisions_melt.head()

Unnamed: 0,book_id,decision,time,variable,value,chosen
0,77554,recommendation_1,2021-06-11 11:24:00.379000+00:00,recommendation_0,39220,0
1,46634,recommendation_2,2021-06-23 10:45:29.685000+00:00,recommendation_0,4277,0
2,18756,recommendation_1,2021-06-23 07:54:56.901000+00:00,recommendation_0,35061,0
3,25488,recommendation_1,2021-06-12 13:04:27.267000+00:00,recommendation_0,9911,0
4,39627,recommendation_1,2021-06-16 07:27:36.338000+00:00,recommendation_0,19702,0


In [8]:
decisions_grouped = decisions_melt.groupby(["book_id", "value"]).agg([('average','mean'),('chosen','sum')])
decisions_grouped.reset_index(inplace= True)

In [9]:
decisions_grouped.columns = ['book_id', 'recommendation_id', 'average', 'chosen']

In [10]:
decisions_grouped.head()

Unnamed: 0,book_id,recommendation_id,average,chosen
0,12,1500,0.0,0
1,12,6452,0.0,0
2,12,10698,0.0,0
3,12,15978,0.0,0
4,12,16316,0.75,3


In [11]:
recommendation = {}
for book_id in evaluation.itemID:
    recommendation[book_id] = []

In [12]:
def update_recommendation(level, recommendation):
    for i in tqdm(range(level.shape[0])):
            book_id = level.book_id[i]
            recommendation_id = level.recommendation_id[i]
            if recommendation_id not in recommendation[book_id] and len(recommendation[book_id]) < 5:
                recommendation[book_id].append(recommendation_id)

In [13]:
level_one = decisions_grouped[
    np.logical_and(decisions_grouped.chosen > 1,
                   decisions_grouped.average > 0.5)].sort_values(["average","chosen"],
                                                                 ascending = False).reset_index(drop = True)

In [14]:
level_one.head()

Unnamed: 0,book_id,recommendation_id,average,chosen
0,59879,28285,1.0,9
1,70627,73314,1.0,8
2,12209,31533,1.0,7
3,30806,55964,1.0,7
4,38197,78067,1.0,7


In [15]:
update_recommendation(level_one, recommendation)

100%|██████████| 2153/2153 [00:00<00:00, 50804.72it/s]


In [16]:
level_two = decisions_grouped[
    np.logical_or(decisions_grouped.chosen == 1,
                  decisions_grouped.average <= 0.5)].sort_values(["average","chosen"],
                                                                 ascending = False).reset_index(drop = True)

In [17]:
update_recommendation(level_two, recommendation)

100%|██████████| 21946/21946 [00:00<00:00, 82576.43it/s]


In [18]:
result = pd.DataFrame(recommendation).transpose().reset_index().rename({"index":"book_id",
                                                               0:"recommendation_1",
                                                               1:"recommendation_2",
                                                               2:"recommendation_3",
                                                               3:"recommendation_4",
                                                               4:"recommendation_5"}, axis = 1)

In [19]:
result["team_id"] = "dataminerz"
result["model_id"] = "ensemble"

In [20]:
result.to_csv("../data/processed/ensemble_dataminerz.csv")