In [1]:
import pandas as pd
from hnmchallenge.data_reader import DataReader
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from hnmchallenge.dataset import Dataset
from hnmchallenge.filtered_dataset import FilterdDataset
from hnmchallenge.models.top_pop import TopPop
from hnmchallenge.evaluation.python_evaluation import map_at_k
from hnmchallenge.constant import *
from hnmchallenge.models.sgmc.sgmc import SGMC
from hnmchallenge.models.ease.ease import EASE
from hnmchallenge.models.itemknn.itemknn import ItemKNN 

In [2]:
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)

In [3]:
dataset = FilterdDataset()
dr = DataReader()


In [4]:
recom = SGMC(dataset, k=128, time_weight=True)

In [4]:
recom = ItemKNN(dataset, topk=1000, time_weight=True)

In [10]:
recom = EASE(dataset, l2=1e-3, time_weight=False)

In [11]:
train = dataset.get_train_df()
train_sub = dataset.get_train_df_user_subset()
fd = dr.get_filtered_full_data()

In [12]:
recom.compute_similarity_matrix(train)  

INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 1128716[0m


func:interactions_to_sparse_matrix
 took: 9.908806324005127 sec


In [13]:
recs = recom.recommend(interactions=train_sub, batch_size=10_000, remove_seen=False)

INFO:hnmchallenge.recommender_interface:[1;36mRecommending items MONOCORE[0m
INFO:hnmchallenge.recommender_interface:[1;36mPredicting for: 45014 users[0m
INFO:hnmchallenge.recommender_interface:[1;36mnum batches: 5[0m
  0%|          | 0/5 [00:00<?, ?it/s]INFO:hnmchallenge.recommender_interface:[1;36mgetting predictions...[0m
INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 9003[0m
INFO:hnmchallenge.recommender_interface:[1;36mDENSE Item Similarity MUL...[0m


func:interactions_to_sparse_matrix
 took: 0.32175326347351074 sec


  0%|          | 0/5 [00:02<?, ?it/s]


KeyboardInterrupt: 

In [14]:
recs = recom.recommend_multicore(interactions=train_sub, batch_size=10_000, num_cpus=20,
                                 remove_seen=True)

INFO:hnmchallenge.recommender_interface:[1;36mRecommending items MULTICORE[0m


  0%|          | 0/5 [00:00<?, ?it/s]

INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 9003[0m
INFO:hnmchallenge.recommender_interface:[1;36mDENSE Item Similarity MUL...[0m


func:interactions_to_sparse_matrix
 took: 0.3506743907928467 sec


INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 9003[0m


func:interactions_to_sparse_matrix
 took: 0.273942232131958 sec


INFO:hnmchallenge.recommender_interface:[1;36mDENSE Item Similarity MUL...[0m
INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 9003[0m


func:interactions_to_sparse_matrix
 took: 0.22741484642028809 sec


INFO:hnmchallenge.recommender_interface:[1;36mDENSE Item Similarity MUL...[0m
INFO:hnmchallenge.recommender_interface:[1;36mRemoving seen items[0m
INFO:hnmchallenge.utils.sparse_matrix:[1;36mSort_top_k:True[0m
INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 9003[0m


func:interactions_to_sparse_matrix
 took: 0.1773083209991455 sec


INFO:hnmchallenge.recommender_interface:[1;36mDENSE Item Similarity MUL...[0m
INFO:hnmchallenge.recommender_interface:[1;36mRemoving seen items[0m
INFO:hnmchallenge.utils.sparse_matrix:[1;36mSort_top_k:True[0m
INFO:hnmchallenge.utils.pandas_utils:[1;33munique customer_id: 9002[0m


func:interactions_to_sparse_matrix
 took: 0.12210536003112793 sec


INFO:hnmchallenge.recommender_interface:[1;36mDENSE Item Similarity MUL...[0m
INFO:hnmchallenge.recommender_interface:[1;36mRemoving seen items[0m
INFO:hnmchallenge.utils.sparse_matrix:[1;36mSort_top_k:True[0m
INFO:hnmchallenge.recommender_interface:[1;36mRemoving seen items[0m
INFO:hnmchallenge.utils.sparse_matrix:[1;36mSort_top_k:True[0m
INFO:hnmchallenge.recommender_interface:[1;36mRemoving seen items[0m
INFO:hnmchallenge.utils.sparse_matrix:[1;36mSort_top_k:True[0m


In [15]:
map_at_k(rating_true=dataset.get_val_df(), rating_pred=recs)

0.007157400365645329

In [9]:
map_at_k(rating_true=dataset.get_test_df(), rating_pred=recs)

0.008800728749777206

In [13]:
from hnmchallenge.submission_handler import SubmissionHandler
sh = SubmissionHandler()                                               

In [14]:
sh.create_submission_filtered_data(recs, sub_name="EASE")

INFO:hnmchallenge.submission_handler:[1;33mSubmission with Filtered Data: EASE created succesfully![0m


In [None]:
user_map_dict, item_map_dict = dr.get_new_raw_mapping_dict()

In [None]:
grp_recs_df = recs.groupby(DEFAULT_USER_COL)[DEFAULT_ITEM_COL].apply(list)
grp_recs_df = grp_recs_df.to_frame().reset_index()
# map back to original ids
grp_recs_df[DEFAULT_USER_COL] = grp_recs_df[DEFAULT_USER_COL].apply(
    lambda x: user_map_dict.get(x)
)

In [None]:
grp_recs_df[DEFAULT_ITEM_COL] = grp_recs_df[DEFAULT_ITEM_COL].apply(
    lambda x: " ".join(list(map(item_map_dict.get, x)))
)

In [None]:
grp_recs_df

In [None]:
map_at_k(rating_true=test_df_s, rating_pred=recs)

In [None]:
map_at_k(rating_true=val_df_s, rating_pred=recs)

In [None]:
dr = DataReader()

In [None]:
dr.get_zero_interactions_recs()

In [None]:
zero_int_users