In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from functools import partial
from sklearn.preprocessing import StandardScaler

from datasplit import DataSplit
from preprocess import DataPreprocessor
from candidate_model import CandidateModel
from metrics import precision_at_k, recall_at_k, ap_k, calc_mean_metric

## load & split

In [2]:
purchases = pd.read_csv('retail_train.csv')

# train/valid split
splitter = DataSplit(purchases, 'week_no', [6, 3])

train_lv1 = purchases[splitter.part0].copy()
valid_lv1 = purchases[splitter.part1].copy()
train_lv2 = purchases[splitter.part1].copy()
valid_lv2 = purchases[splitter.part2].copy()

# prepare result DataFrame
true_lv1 = valid_lv1.groupby('user_id')['item_id'].unique().reset_index()
true_lv1.columns=['user_id', 'actual']
# true_values.head(3)

## prepare data & fit candidate model

In [3]:
mix_feat_params = {
    'top_config': {'fields': ['quantity', 'sales_value'],
                   'beta': [1., 1.],
                   'k': 5000,
                   'scaler': StandardScaler
                    },
    'uim_config': {'aggfunc': 'sum',
                   # 'weights': tfidf_weight
                   },
}

pre = DataPreprocessor(train_lv1, valid_lv1, **mix_feat_params)
pre.fit()

In [10]:
candidate_params = {
    'train': pre.train_uim_sparse,
    'weighted': pre.train_uim_weighted,
    'top_items': pre.top_k_items,
    'placeholder_id': pre.placeholder_id,
    'idx_to_item': pre.idx_to_item,
    'item_to_idx': pre.item_to_idx,
    'user_to_idx': pre.user_to_idx
}
n_candidates = 60
cm = CandidateModel('TFIDF', **candidate_params)
cm.fit(K=1)     # TFIDF own recommender
predicts = cm.predict(true_lv1['user_id'], N=n_candidates)
calc_mean_metric(recall_at_k, true_lv1['actual'], predicts, k=n_candidates)

0.11519221861973136