In [1]:
from src.data_reader import DataReader
from src.constant import *
import matplotlib.pyplot as plt
import numpy as np
from src.datasets.dataset import Dataset
from src.models.itemknn.itemknn import ItemKNN
from src.models.ease.ease import EASE
import pandas as pd
from src.evaluation import compute_mrr, map_at_k
from src.utils.sparse_matrix import interactions_to_sparse_matrix

In [2]:
dataset = Dataset()

In [3]:
split_dict = dataset.get_split()
train, train_label = split_dict[TRAIN]
val, val_label = split_dict[VAL]
test, test_label = split_dict[TEST]

val_test = pd.concat([val, test])
val_test_label = pd.concat([val_label, test_label])

In [6]:
unique_sess = val[SESS_ID].unique()

In [7]:
# np.split(unique_sess, 5)

[array([   115,    526,    782, ..., 895553, 895560, 895565]),
 array([ 895747,  895829,  895934, ..., 1770208, 1770222, 1770264]),
 array([1770356, 1770393, 1770441, ..., 2669870, 2669939, 2670002]),
 array([2670054, 2670065, 2670336, ..., 3557835, 3557844, 3557903]),
 array([3558022, 3558201, 3558836, ..., 4439576, 4439823, 4439986])]

In [22]:
# ease all
final_train_data = train
final_train_label = train_label

In [4]:
from datetime import datetime, timedelta
max_date = train[DATE].max()
train_limit_date = max_date - timedelta(days=150)
filtered_train = train[train[DATE] > train_limit_date].copy()
id_filtered_train = filtered_train[SESS_ID].unique()

final_train_data = train[train[SESS_ID].isin(id_filtered_train)]
final_train_label = train_label[train_label[SESS_ID].isin(id_filtered_train)]

In [23]:
full_data = dataset.get_train_sessions()
# we can not use that for the final submission
lead_data = dataset.get_test_leaderboard_sessions()
final_data = dataset.get_test_final_sessions()

In [24]:
train["sample_weight"] = 1
train_label["sample_weight"] = 5

final_train_data["sample_weight"] = 1
final_train_label["sample_weight"] = 5

val["sample_weight"] = 1
val_label["sample_weight"] = 5

test["sample_weight"] = 1
test_label["sample_weight"] = 5

lead_data["sample_weight"] = 1
final_data["sample_weight"] = 1

In [25]:
train_pur = pd.concat([final_train_data, final_train_label], axis=0)

In [26]:
model = EASE(dataset, time_weight=50, l2=1e-1)

In [27]:
model.compute_similarity_matrix(train_pur)

In [28]:
recs = model.recommend(
    interactions=val_test,
    remove_seen=True,
    cutoff=100,
    leaderboard=False
)

Using Time Weight on Interaction matrix
Considering white list items...


In [29]:
compute_mrr(recs, val_test_label)

MRR: 0.16395229272131795


0.16395229272131795

In [12]:
recs

Unnamed: 0,session_id,item_id,score,rank
0,115,7392,0.094777,1
1,115,10592,0.036394,2
2,115,10531,0.032017,3
3,115,12390,0.029069,4
4,115,4135,0.027007,5
...,...,...,...,...
8161995,4439949,4742,0.001568,96
8161996,4439949,21143,0.001563,97
8161997,4439949,6511,0.001560,98
8161998,4439949,23279,0.001559,99


In [30]:
recs.reset_index(drop=True).to_feather(dataset.get_train_recs_df_folder() / "EASE_tw_full.feather")

# Create Submission

In [31]:
concat_full_data = pd.concat([final_train_data, final_train_label, val, val_label, test, test_label], axis=0)
#concat_full_data = pd.concat([train, train_label, val, val_label, test, test_label], axis=0)

In [32]:
model = EASE(dataset, time_weight=50, l2=1e-1)

In [33]:
model.compute_similarity_matrix(concat_full_data)

In [34]:
recs_lead = model.recommend(
    interactions=lead_data,
    remove_seen=True,
    cutoff=100,
    leaderboard=True
)

Using Time Weight on Interaction matrix
Considering white list items...


In [35]:
recs_lead.reset_index(drop=True).to_feather(dataset.get_leaderboard_recs_df_folder() / "EASE_tw_full.feather")

In [36]:
recs_final = model.recommend(
    interactions=final_data,
    remove_seen=True,
    cutoff=100,
    leaderboard=True
)

Using Time Weight on Interaction matrix
Considering white list items...


In [37]:
recs_final.reset_index(drop=True).to_feather(dataset.get_final_recs_df_folder() / "EASE_tw_full.feather")

In [32]:
dataset.create_submission(recs, sub_name="Ease_3m")

Submission with name: Ease_3m created succesfully!
