In [1]:
import pandas as pd
from hnmchallenge.data_reader import DataReader
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from hnmchallenge.dataset import Dataset
from hnmchallenge.filtered_dataset import FilterdDataset
from hnmchallenge.models.top_pop import TopPop
from hnmchallenge.evaluation.python_evaluation import map_at_k
from hnmchallenge.constant import *
from hnmchallenge.models.sgmc.sgmc import SGMC
from hnmchallenge.models.ease.ease import EASE
from hnmchallenge.models.itemknn.itemknn import ItemKNN 

In [2]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)

In [3]:
dataset = FilterdDataset()
dr = DataReader()

In [4]:
users_mb = dataset.get_filtered_user_multiply_buy()
normal_users = users_mb[users_mb["diff"] == 0][DEFAULT_USER_COL]
users_top_pop = users_mb[~(users_mb["diff"] == 0)][DEFAULT_USER_COL]

In [None]:
recom = SGMC(dataset, k=128, time_weight=True)

In [5]:
recom = ItemKNN(dataset, topk=1000, time_weight=True)

In [None]:
recom = EASE(dataset, l2=1e-3, time_weight=True)

In [6]:
train = dataset.get_train_df()
train_sub = dataset.get_train_df_user_subset()
fd = dr.get_filtered_full_data()

In [7]:
recom.compute_similarity_matrix(train)  

INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 1128716[0m


func:interactions_to_sparse_matrix
 took: 9.625576972961426 sec


In [8]:
val_df_users = dataset.get_val_df()[DEFAULT_USER_COL].unique()
test_df_users = dataset.get_test_df()[DEFAULT_USER_COL].unique()

normal_users_val_train = train[(train[DEFAULT_USER_COL].isin(val_df_users)) & (train[DEFAULT_USER_COL].isin(normal_users))]
normal_users_test_train = train[(train[DEFAULT_USER_COL].isin(test_df_users)) & (train[DEFAULT_USER_COL].isin(normal_users))]
normal_users_train = pd.concat([normal_users_val_train, normal_users_test_train], axis=0)

mb_users_val_train = train[(train[DEFAULT_USER_COL].isin(val_df_users)) & (train[DEFAULT_USER_COL].isin(users_top_pop))]
mb_users_test_train = train[(train[DEFAULT_USER_COL].isin(test_df_users)) & (train[DEFAULT_USER_COL].isin(users_top_pop))]
mb_users_train = pd.concat([mb_users_val_train, mb_users_test_train], axis=0)

In [9]:
normal_users_recs = recom.recommend(interactions=normal_users_train, batch_size=10_000, remove_seen=True)

INFO:hnmchallenge.recommender_interface:[1;36mRecommending items MONOCORE[0m
INFO:hnmchallenge.recommender_interface:[1;36mPredicting for: 17824 users[0m
INFO:hnmchallenge.recommender_interface:[1;36mnum batches: 2[0m
  0%|          | 0/2 [00:00<?, ?it/s]INFO:hnmchallenge.recommender_interface:[1;36mgetting predictions...[0m
INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 8912[0m
INFO:hnmchallenge.utils.sparse_matrix:[1;31mApplying time weight on user-item interactions[0m
  0%|          | 0/2 [00:00<?, ?it/s]


Predicting using time_weight importance...


TypeError: Addition/subtraction of integers and integer-arrays with TimedeltaArray is no longer supported.  Instead of adding/subtracting `n`, use `n * obj.freq`

In [None]:
normal_users_recs = recom.recommend_multicore(interactions=normal_users_train, batch_size=10_000, num_cpus=20,
                                 remove_seen=True)

In [None]:
map_at_k(rating_true=dataset.get_val_df(), rating_pred=normal_users_recs, adjust_user_count=False)

In [None]:
map_at_k(rating_true=dataset.get_test_df(), rating_pred=normal_users_recs, adjust_user_count=False)

In [None]:
mb_users_recs = recom.recommend_multicore(interactions=mb_users_train, batch_size=10_000, num_cpus=20,
                                 remove_seen=False)

In [None]:
map_at_k(rating_true=dataset.get_val_df(), rating_pred=mb_users_recs, adjust_user_count=False)

In [None]:
map_at_k(rating_true=dataset.get_test_df(), rating_pred=mb_users_recs, adjust_user_count=False)

In [None]:
recom.compute_similarity_matrix(fd)  

In [None]:
normal_users_fd = fd[fd[DEFAULT_USER_COL].isin(normal_users)]
mb_users_fd = fd[fd[DEFAULT_USER_COL].isin(users_top_pop)]

In [None]:
normal_users_recs_fd = recom.recommend_multicore(interactions=normal_users_fd, batch_size=10_000, num_cpus=20,
                                 remove_seen=True)

In [None]:
mb_users_recs_fd = recom.recommend_multicore(interactions=mb_users_fd, batch_size=10_000, num_cpus=20,
                                 remove_seen=False)

In [None]:
from hnmchallenge.submission_handler import SubmissionHandler
sh = SubmissionHandler()                                               

In [None]:
sh.create_submission_filtered_data([normal_users_recs_fd, mb_users_recs_fd], sub_name="clusters_ease")