In [1]:
import pandas as pd
import numpy as np
import feather

from tqdm import tqdm

In [2]:
from outliers import remove_outliers

In [3]:
df_pays = feather.read_dataframe('data/df_pays_na_test.feather')

In [4]:
shops = df_pays.shop_id.unique()
shops = sorted(shops)

In [155]:
df_shop = df_pays[df_pays.shop_id == 1].reset_index(drop=1)
remove_outliers(df_shop)

In [24]:
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt

In [56]:
def smooth_one(df_shop, a):
    y = np.array(list(df_shop.pays_count))
    s = np.zeros_like(y)

    s[0] = y[0]

    for i in range(1, len(s)):
        if np.isnan(y[i]):
            y[i] = s[i - 1]
        s[i] = a * y[i] + (1 - a) * s[i - 1]

    return s

In [107]:
def smooth_two_additive(df_shop, alpha, beta):
    y = np.array(list(df_shop.pays_count))

    l = np.zeros_like(y)
    t = np.zeros_like(y)

    s = np.zeros_like(y)
    
    s[0] = y[0]
    t[0] = y[1] - y[0]

    for i in range(1, len(s)):
        if np.isnan(y[i]):
            y[i] = s[i - 1]

        l[i] = alpha * y[i] + (1 - alpha) * s[i - 1]
        t[i] = beta * (l[i] - l[i - 1]) + (1 - beta) * t[i - 1]
        s[i] = l[i] + t[i]

    return l, t

In [128]:
def smooth_two_mult(df_shop, alpha, beta):
    y = np.array(list(df_shop.pays_count))

    l = np.zeros_like(y)
    t = np.zeros_like(y)

    s = np.zeros_like(y)
    
    s[0] = y[0]
    t[0] = y[0] / y[1]

    for i in range(1, len(s)):
        if np.isnan(y[i]):
            y[i] = s[i - 1]

        l[i] = alpha * y[i] + (1 - alpha) * s[i - 1]
        t[i] = beta * (l[i - 1] / l[i]) + (1 - beta) * t[i - 1]
        s[i] = l[i] + t[i]

    return l, t

In [139]:
y = np.array(list(df_shop.pays_count))

In [188]:
def calc_base_smooth_2(df_shop, alpha, beta, func, name):
    l, t = func(df_shop, alpha, beta)

    df_shop[name + '_level'] = l
    df_shop[name + '_trend'] = t
    df_shop[name + '_sum'] = l + t

    df_shop[name + '_level'] = df_shop[name + '_level'].shift(14)
    df_shop[name + '_trend'] = df_shop[name + '_trend'].shift(14)
    df_shop[name + '_sum'] = df_shop[name + '_sum'].shift(14)


def add_exp_smoothing_features(df_shop):
    idx = [(df_shop.dow == i) for i in range(0, 7)]

    for a in [0.2, 0.5, 0.8]:
        name = 'exp_smoothing_1_0%d' % (int(10 * a))
        df_shop[name] = smooth_one(df_shop, a=a)
        df_shop[name] = df_shop[name].shift(14)

        name = 'exp_smoothing_1_0%d_dow' % (int(10 * a))
        df_shop[name] = np.nan

        for i in range(0, 7):
            df_part = df_shop[idx[i]]
            df_shop.loc[idx[i], name] = smooth_one(df_part, a=a)

        df_shop[name] = df_shop[name].shift(14)

        for b in [0.2, 0.5, 0.8]:
            name = 'exp_smoothing_2_add_0%d_0%d' % (int(a * 10), int(b * 10))
            calc_base_smooth_2(df_shop, a, b, smooth_two_additive, name)

            name = 'exp_smoothing_2_mult_0%d_0%d' % (int(a * 10), int(b * 10))
            calc_base_smooth_2(df_shop, a, b, smooth_two_mult, name)



In [189]:
dfs = []

for i in tqdm(shops):
    df_shop = df_pays[df_pays.shop_id == i].reset_index(drop=1)
    remove_outliers(df_shop)
    add_exp_smoothing_features(df_shop)
    dfs.append(df_shop)

100%|██████████| 2000/2000 [03:44<00:00,  8.92it/s]


In [190]:
truncated_dfs = []

for df in dfs:
    df = df.iloc[7 * 2 * 3:]
    truncated_dfs.append(df)

In [191]:
df_features = pd.concat(truncated_dfs).reset_index(drop=1)

In [192]:
feather.write_dataframe(df_features, 'features/exp_smoothing_simple.feather')