In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cudf

from src.utils import get_data_period
from src.features import UserActionScore, ItemActionScore, UserItemActionScore, ConcatFeatureTransformer

In [2]:
date_th = '2017-04-23'
train_period = 7
eval_period = 7
top_n = 30
decay_rate = 0.9

In [3]:
user_log = cudf.read_csv('../data/processed/train.csv', parse_dates=['time_stamp'])
pairs_path = f'pairs_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv'
pairs = cudf.read_csv(pairs_path)
pairs.head()

Unnamed: 0,user_id,product_id,target,rated
0,0000073_D,00684669_d,0,0
1,0000073_D,00032850_d,0,0
2,0000073_D,00277807_d,0,0
3,0000073_D,00761206_d,0,0
4,0000073_D,00337874_d,0,0


In [4]:
train_start_date, train_end_date, _, _ = get_data_period(date_th, train_period, eval_period)

In [5]:
config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

feature_transformer = UserActionScore(**config_user_action_score)
features = feature_transformer.fit_transform(user_log, pairs)
features.head()

Unnamed: 0,cv-score-r0.9_by_user,click-score-r0.9_by_user,pv-score-r0.9_by_user,other-score-r0.9_by_user
0,0.0,0.0,3.606296,0.723815
1,0.0,0.0,3.606296,0.723815
2,0.0,0.0,3.606296,0.723815
3,0.0,0.0,3.606296,0.723815
4,0.0,0.0,3.606296,0.723815


In [6]:
config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

feature_transformers = [
    UserActionScore(**config_user_action_score),
    ItemActionScore(**config_user_action_score),
    UserItemActionScore(**config_user_action_score),
]

config_concat_feature_transformer = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'feature_transformers': feature_transformers,
}

feature_transformer = ConcatFeatureTransformer(**config_concat_feature_transformer)
features = feature_transformer.fit_transform(user_log, pairs)
pairs = cudf.concat([pairs[['user_id', 'product_id', 'target', 'rated']], features], axis=1)

pairs.to_csv(f'features_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv', index=False)
pairs.head()

Unnamed: 0,user_id,product_id,target,rated,cv-score-r0.9_by_user,click-score-r0.9_by_user,pv-score-r0.9_by_user,other-score-r0.9_by_user,cv-score-r0.9_by_item,click-score-r0.9_by_item,pv-score-r0.9_by_item,other-score-r0.9_by_item,cv-score-r0.9_by_user-item,click-score-r0.9_by_user-item,pv-score-r0.9_by_user-item,other-score-r0.9_by_user-item
0,0000073_D,00684669_d,0,0,0.0,0.0,3.606296,0.723815,0.0,0.0,2.762622,0.0,0.0,0.0,0.0,0.0
1,0000073_D,00032850_d,0,0,0.0,0.0,3.606296,0.723815,0.0,0.0,1.916256,0.0,0.0,0.0,0.0,0.0
2,0000073_D,00277807_d,0,0,0.0,0.0,3.606296,0.723815,0.0,0.0,1.193847,0.0,0.0,0.0,0.0,0.0
3,0000073_D,00761206_d,0,0,0.0,0.0,3.606296,0.723815,0.0,0.0,0.897884,0.0,0.0,0.0,0.0,0.0
4,0000073_D,00337874_d,0,0,0.0,0.0,3.606296,0.723815,0.0,0.0,7.116273,8.810582,0.0,0.0,0.0,0.0


In [7]:
date_th = '2017-04-30'
train_period = 7
eval_period = 7
top_n = 30
decay_rate = 0.9

user_log = cudf.read_csv('../data/processed/train.csv', parse_dates=['time_stamp'])
pairs_path = f'test_pairs_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv'
pairs = cudf.read_csv(pairs_path)

train_start_date, train_end_date, _, _ = get_data_period(date_th, train_period, eval_period)


config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

feature_transformers = [
    UserActionScore(**config_user_action_score),
    ItemActionScore(**config_user_action_score),
    UserItemActionScore(**config_user_action_score),
]

config_concat_feature_transformer = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'feature_transformers': feature_transformers,
}

feature_transformer = ConcatFeatureTransformer(**config_concat_feature_transformer)
features = feature_transformer.fit_transform(user_log, pairs)
pairs = cudf.concat([pairs[['user_id', 'product_id', 'target', 'rated']], features], axis=1)

pairs.to_csv(f'test_features_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv', index=False)
pairs.head()

Unnamed: 0,user_id,product_id,target,rated,cv-score-r0.9_by_user,click-score-r0.9_by_user,pv-score-r0.9_by_user,other-score-r0.9_by_user,cv-score-r0.9_by_item,click-score-r0.9_by_item,pv-score-r0.9_by_item,other-score-r0.9_by_item,cv-score-r0.9_by_user-item,click-score-r0.9_by_user-item,pv-score-r0.9_by_user-item,other-score-r0.9_by_user-item
0,0028734_A,00009492_a,0,0,0.0,0.0,0.0,0.0,0.0,0.479718,3.431413,7.523275,0.0,0.0,0.0,0.0
1,0028734_A,00009413_a,0,0,0.0,0.0,0.0,0.0,3.790171,5.950829,105.982391,101.194386,0.0,0.0,0.0,0.0
2,0028734_A,00002511_a,0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0028734_A,00010901_a,0,0,0.0,0.0,0.0,0.0,12.922887,8.820094,315.20051,206.762406,0.0,0.0,0.0,0.0
4,0028734_A,00012717_a,0,0,0.0,0.0,0.0,0.0,4.79054,9.810857,408.530459,301.210299,0.0,0.0,0.0,0.0
