In [1]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import cudf

from src.utils import get_data_period
from src.features import UserActionScore, ItemActionScore, UserItemActionScore, ConcatFeatureTransformer

In [2]:
date_th = '2017-04-16'
train_period = 7
eval_period = 7
top_n = 30
decay_rate = 0.9

In [3]:
user_log = cudf.read_csv('../data/processed/train.csv', parse_dates=['time_stamp'])
pairs_path = f'pairs_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv'
pairs = cudf.read_csv(pairs_path)
pairs.head()

Unnamed: 0,user_id,product_id,target,rated
0,0000019_B,00012052_b,0,0
1,0000019_B,00012487_b,0,0
2,0000019_B,00001760_b,0,0
3,0000019_B,00003917_b,0,0
4,0000019_B,00013563_b,0,0


In [4]:
train_start_date, train_end_date, _, _ = get_data_period(date_th, train_period, eval_period)

In [5]:
config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

feature_transformer = UserActionScore(**config_user_action_score)
features = feature_transformer.fit_transform(user_log, pairs)
features.head()

Unnamed: 0,cv-score-r0.9_by_user,click-score-r0.9_by_user,pv-score-r0.9_by_user,other-score-r0.9_by_user
0,0.0,0.0,4.842043,0.0
1,0.0,0.0,4.842043,0.0
2,0.0,0.0,4.842043,0.0
3,0.0,0.0,4.842043,0.0
4,0.0,0.0,4.842043,0.0


In [6]:
config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

feature_transformers = [
    UserActionScore(**config_user_action_score),
    ItemActionScore(**config_user_action_score),
    UserItemActionScore(**config_user_action_score),
]

config_concat_feature_transformer = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'feature_transformers': feature_transformers,
}

feature_transformer = ConcatFeatureTransformer(**config_concat_feature_transformer)
features = feature_transformer.fit_transform(user_log, pairs)
pairs = cudf.concat([pairs[['user_id', 'product_id', 'target', 'rated']], features], axis=1)

pairs.to_csv(f'features_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv', index=False)
pairs.head()

Unnamed: 0,user_id,product_id,target,rated,cv-score-r0.9_by_user,click-score-r0.9_by_user,pv-score-r0.9_by_user,other-score-r0.9_by_user,cv-score-r0.9_by_item,click-score-r0.9_by_item,pv-score-r0.9_by_item,other-score-r0.9_by_item,cv-score-r0.9_by_user-item,click-score-r0.9_by_user-item,pv-score-r0.9_by_user-item,other-score-r0.9_by_user-item
0,0000019_B,00012052_b,0,0,0.0,0.0,4.842043,0.0,2.851759,0.685239,36.381503,7.122906,0.0,0.0,0.511026,0.0
1,0000019_B,00012487_b,0,0,0.0,0.0,4.842043,0.0,0.0,0.0,38.376767,0.0,0.0,0.0,1.021933,0.0
2,0000019_B,00001760_b,0,0,0.0,0.0,4.842043,0.0,0.564306,0.0,50.393612,0.564286,0.0,0.0,1.021762,0.0
3,0000019_B,00003917_b,0,0,0.0,0.0,4.842043,0.0,0.0,0.0,88.576456,0.0,0.0,0.0,0.0,0.0
4,0000019_B,00013563_b,0,0,0.0,0.0,4.842043,0.0,0.0,0.728189,73.353978,0.0,0.0,0.0,0.0,0.0


In [7]:
date_th = '2017-04-30'
train_period = 7
eval_period = 7
top_n = 30
decay_rate = 0.9

user_log = cudf.read_csv('../data/processed/train.csv', parse_dates=['time_stamp'])
pairs_path = f'test_pairs_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv'
pairs = cudf.read_csv(pairs_path)

train_start_date, train_end_date, _, _ = get_data_period(date_th, train_period, eval_period)


config_user_action_score = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'decay_rate': decay_rate,
}

feature_transformers = [
    UserActionScore(**config_user_action_score),
    ItemActionScore(**config_user_action_score),
    UserItemActionScore(**config_user_action_score),
]

config_concat_feature_transformer = {
    'start_date': train_start_date,
    'end_date': train_end_date,
    'feature_transformers': feature_transformers,
}

feature_transformer = ConcatFeatureTransformer(**config_concat_feature_transformer)
features = feature_transformer.fit_transform(user_log, pairs)
#pairs = cudf.concat([pairs[['user_id', 'product_id', 'target', 'rated']], features], axis=1)
pairs[['target', 'rated']] = 0 
pairs = cudf.concat([pairs[['user_id', 'product_id', 'target', 'rated']], features], axis=1)

pairs.to_csv(f'test_features_{date_th}_t{train_period}_e{eval_period}_n{top_n}.csv', index=False)
pairs.head()

Unnamed: 0,user_id,product_id,target,rated,cv-score-r0.9_by_user,click-score-r0.9_by_user,pv-score-r0.9_by_user,other-score-r0.9_by_user,cv-score-r0.9_by_item,click-score-r0.9_by_item,pv-score-r0.9_by_item,other-score-r0.9_by_item,cv-score-r0.9_by_user-item,click-score-r0.9_by_user-item,pv-score-r0.9_by_user-item,other-score-r0.9_by_user-item
0,0000008_A,00008092_a,0,0,0.0,0.0,5.990876,0.0,3.4178,0.0,46.317481,62.208568,0.0,0.0,0.812076,0.0
1,0000008_A,00006560_a,0,0,0.0,0.0,5.990876,0.0,5.649521,2.035684,160.366425,60.967523,0.0,0.0,0.811893,0.0
2,0000008_A,00006259_a,0,0,0.0,0.0,5.990876,0.0,6.339871,1.550382,150.762256,57.238125,0.0,0.0,1.552863,0.0
3,0000008_A,00004596_a,0,0,0.0,0.0,5.990876,0.0,3.992705,4.691124,106.297486,107.391576,0.0,0.0,0.811814,0.0
4,0000008_A,00000269_a,0,0,0.0,0.0,5.990876,0.0,11.858996,6.753852,332.442633,244.034612,0.0,0.0,0.740898,0.0
