In [1]:
from src.data_reader import DataReader
from src.constant import *
import matplotlib.pyplot as plt
import numpy as np
from src.datasets.dataset import Dataset
from src.models.itemknn.itemknn import ItemKNN
from src.models.ease.ease import EASE
from src.models.content_ease.content_ease import CEASE
import pandas as pd
from src.evaluation import compute_mrr
from src.utils.sparse_matrix import interactions_to_sparse_matrix

In [2]:
dataset = Dataset()

In [3]:
split_dict = dataset.get_split()
train, train_label = split_dict[TRAIN]
val, val_label = split_dict[VAL]

In [4]:
# concat purchases
train_pur = pd.concat([train, train_label], axis=0)

In [5]:
full_data = dataset.get_train_sessions()

In [6]:
train_pur = pd.concat([full_data, train_label], axis=0)

In [42]:
train_pur["last_buy"] = train_pur.groupby(SESS_ID)[DATE].transform(max)
train_pur["first_buy"] = train_pur.groupby(SESS_ID)[DATE].transform(min)
train_pur["time_score"] = 1 / (
    (
        (train_pur["last_buy"] - train_pur[DATE]).apply(
            lambda x: x.total_seconds() / 3600
        )
    )
    + 1
)
train_pur = train_pur[train_pur["time_score"] >= 0.5]

In [13]:
model = CEASE(dataset, time_weight=None, l2=1e-1)

In [14]:
model.compute_similarity_matrix(train_pur)

(23691, 23691)
Computing inverse


In [15]:
recs = model.recommend(
    interactions=val,
    remove_seen=True,
    cutoff=100,
    leaderboard=False
)

In [16]:
compute_mrr(recs, val_label)

MRR: 0.05441777072544032


0.05441777072544032

# Create Submission

In [30]:
lead_data = dataset.get_test_leaderboard_sessions()

In [31]:
final_data = dataset.get_test_final_sessions()

In [32]:
full_data = dataset.get_train_sessions()
full_label = dataset.get_train_purchases()
concat_full_data = pd.concat([full_data, full_label, lead_data, final_data], axis=0)

In [33]:
model = EASE(dataset, time_weight=50, l2=1e-1)

In [34]:
model.compute_similarity_matrix(concat_full_data)

In [35]:
recs = model.recommend(
    interactions=lead_data,
    remove_seen=True,
    cutoff=100,
    leaderboard=True
)

Using Time Weight on Interaction matrix
Considering white list items...


In [36]:
dataset.create_submission(recs, sub_name="EASE_tw_f")

Submission with name: EASE_tw_f created succesfully!
