In [1]:
from src.data_reader import DataReader
from src.constant import *
import matplotlib.pyplot as plt
import numpy as np
from src.datasets.dataset import Dataset
from src.models.itemknn.itemknn import ItemKNN
from src.models.ease.ease import EASE
from src.models.content_ease.content_ease import CEASE
from src.models.cb_iknn.cb_iknn import CBItemKNN
from src.models.hybrid_item_sim.hybrid_item_sim import HybridItemSimilarity
import pandas as pd
from src.evaluation import compute_mrr

In [2]:
dataset = Dataset()

In [3]:
split_dict = dataset.get_split()
train, train_label = split_dict[TRAIN]
val, val_label = split_dict[VAL]
test, test_label = split_dict[TEST]

In [4]:
from datetime import datetime, timedelta
max_date = train[DATE].max()
train_limit_date = max_date - timedelta(days=120)

In [5]:
filtered_train = train[train[DATE]>train_limit_date].copy()
id_filtered_train = filtered_train[SESS_ID].unique()
filtered_train_label = train_label[train_label[SESS_ID].isin(id_filtered_train)].copy()

In [6]:
full_data = dataset.get_train_sessions()
# we can not use that for the final submission
lead_data = dataset.get_test_leaderboard_sessions()
final_data = dataset.get_test_final_sessions()

In [7]:
train["sample_weight"] = 1
train_label["sample_weight"] = 5

filtered_train["sample_weight"] = 1
filtered_train_label["sample_weight"] = 5

val["sample_weight"] = 1
val_label["sample_weight"] = 5

test["sample_weight"] = 1
test_label["sample_weight"] = 5

lead_data["sample_weight"] = 1
final_data["sample_weight"] = 1

In [8]:
train_pur = pd.concat([filtered_train, filtered_train_label], axis=0)

In [9]:
cease = CEASE(dataset, time_weight=None, l2=1e-1)
ease = EASE(dataset, time_weight=None, l2=1e-1)


In [44]:
hybrid_m = HybridItemSimilarity(dataset=dataset, model_list=[ease, cease], model_weight_list=[0.3, 0.7],
                                normalization=None, normalization_axis=1, time_weight=50)

[0.6, 0.4]


In [45]:
hybrid_m.compute_similarity_matrix(train_pur)

(23691, 23691)
Computing inverse


In [46]:
recs = hybrid_m.recommend(
    interactions=val,
    remove_seen=True,
    cutoff=100,
    leaderboard=True
)

Using Time Weight on Interaction matrix
Considering white list items...


In [47]:
compute_mrr(recs, val_label)

MRR: 0.17282309385684277


0.17282309385684277

## SUBMISSION

In [50]:
concat_full_data = pd.concat([filtered_train, filtered_train_label, val, val_label, test, test_label], axis=0)

In [51]:
cease = CEASE(dataset, time_weight=None, l2=1e-1)
ease = EASE(dataset, time_weight=None, l2=1e-1)


In [52]:
hybrid_m = HybridItemSimilarity(dataset=dataset, model_list=[ease, cease], model_weight_list=[0.3, 0.7],
                                normalization=None, normalization_axis=1, time_weight=50)

[0.3, 0.7]


In [54]:
hybrid_m.compute_similarity_matrix(concat_full_data)

(23691, 23691)
Computing inverse


In [55]:
recs = hybrid_m.recommend(
    interactions=lead_data,
    remove_seen=True,
    cutoff=100,
    leaderboard=True
)

Using Time Weight on Interaction matrix
Considering white list items...


In [56]:
dataset.create_submission(recs, sub_name="CEASE_tw50_0.3_0.7")

Submission with name: CEASE_tw50_0.3_0.7 created succesfully!
