In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm
tqdm.pandas()

In [None]:
decisions = pd.read_csv("../data/processed/decisions.csv")
books =  pd.read_csv("../data/processed/books.csv", index_col = "Unnamed: 0")
evaluation =  pd.read_csv("../data/external/evaluation.csv")

In [None]:
decisions_melt = pd.melt(decisions, id_vars = ["book_id", "team", "decision", "time"], value_vars=['recommendation_shown_0',
                                                                                   'recommendation_shown_1',
                                                                                   'recommendation_shown_2'])

In [None]:
decisions_melt.replace({"variable":{"recommendation_shown_0": "recommendation_0",
                                  "recommendation_shown_1": "recommendation_1",
                                  "recommendation_shown_2": "recommendation_2"}}, inplace = True)

In [None]:
decisions_melt["chosen"] = (decisions_melt["decision"] == decisions_melt["variable"]).astype(int)

In [None]:
decisions_grouped = decisions_melt.groupby(["book_id", "value"]).agg([('average','mean'),('chosen','sum')])
decisions_grouped.reset_index(inplace= True)

In [None]:
decisions_grouped.columns = ['book_id', 'recommendation_id', 'average', 'chosen']

In [None]:
recommendation = {}
for book_id in evaluation.itemID:
    recommendation[book_id] = []

In [None]:
def update_recommendation(level, recommendation):
    for i in tqdm(range(level.shape[0])):
            book_id = level.book_id[i]
            recommendation_id = level.recommendation_id[i]
            if recommendation_id not in recommendation[book_id] and len(recommendation[book_id]) < 5:
                recommendation[book_id].append(recommendation_id)

In [None]:
level_one = decisions_grouped[
    np.logical_and(decisions_grouped.chosen > 1,
                   decisions_grouped.average > 0.5)].sort_values(["average","chosen"],
                                                                 ascending = False).reset_index(drop = True)

In [None]:
update_recommendation(level_one, recommendation)

In [None]:
level_two = decisions_grouped[
    np.logical_or(decisions_grouped.chosen == 1,
                  decisions_grouped.average <= 0.5)].sort_values(["average","chosen"],
                                                                 ascending = False).reset_index(drop = True)

In [None]:
update_recommendation(level_two, recommendation)

In [None]:
result = pd.DataFrame(recommendation).transpose().reset_index().rename({"index":"book_id",
                                                               0:"recommendation_1",
                                                               1:"recommendation_2",
                                                               2:"recommendation_3",
                                                               3:"recommendation_4",
                                                               4:"recommendation_5"}, axis = 1)

In [None]:
result_shifted = result.rename({"recommendation_1": "recommendation_2",
                  "recommendation_2": "recommendation_3",
                  "recommendation_3": "recommendation_4",
                  "recommendation_4": "recommendation_5",
                  "recommendation_5": "recommendation_1"}, axis = 1)

In [None]:
result["team_id"] = "dataminerz"
result["model_id"] = "ensemble_v2"
result_shifted["team_id"] = "dataminerz"
result_shifted["model_id"] = "ensemble_v2_shifted"

In [None]:
result.to_csv("../data/processed/ensemble_v2_dataminerz.csv")
result_shifted.to_csv("../data/processed/ensemble_v2_shifted_dataminerz.csv")