In [1]:
import configparser as cp
from datetime import datetime, timedelta
import numpy as np
import os
import pandas as pd

config = cp.ConfigParser()
config.read('../../../config.ini')

exp_id = 'exp_01'

data_train = pd.read_csv(config['paths']['train'])
data_test = pd.read_csv(config['paths']['test'])
submit_path = os.path.join(config['paths']['submit_path'], exp_id, 'submission', 'submit.csv')

In [2]:
data_train.head(10)

Unnamed: 0,id,date,store_nbr,family,sales,onpromotion
0,0,2013-01-01,1,AUTOMOTIVE,0.0,0
1,1,2013-01-01,1,BABY CARE,0.0,0
2,2,2013-01-01,1,BEAUTY,0.0,0
3,3,2013-01-01,1,BEVERAGES,0.0,0
4,4,2013-01-01,1,BOOKS,0.0,0
5,5,2013-01-01,1,BREAD/BAKERY,0.0,0
6,6,2013-01-01,1,CELEBRATION,0.0,0
7,7,2013-01-01,1,CLEANING,0.0,0
8,8,2013-01-01,1,DAIRY,0.0,0
9,9,2013-01-01,1,DELI,0.0,0


In [3]:
def find_sale(df, ts, lag):
    target_date = datetime.strptime(ts,'%Y-%m-%d').date() + timedelta(days = lag)
    target_date = datetime.strftime(target_date, '%Y-%m-%d')

    mask = (df['date'] == target_date)

    target_sale = df.loc[mask, "sales"].values[0]
    return target_sale

def forecast_sale(df, ts, alpha):
    first_lag = find_sale(df, ts, -28)
    second_lag = find_sale(df, ts, -56)
    forecast = first_lag + alpha*(first_lag - second_lag)
    return first_lag

In [4]:
df = data_train

prods = data_train['family'].unique()
stores = data_train['store_nbr'].unique()
dates = data_test['date'].unique()

list_prods = []
list_dates = []
list_stores = []
list_forc = []

for prod in prods:
    df_prod = data_train.loc[data_train['family'] == prod, :]
    for store in stores:
        df_stores = df_prod.loc[df_prod['store_nbr'] == store, :]
        for ts in dates:
            list_forc.append(forecast_sale(df_stores, ts, 0.5))
            list_stores.append(store)
            list_dates.append(ts)
            list_prods.append(prod)

aux = pd.DataFrame({'date':list_dates, 'family':list_prods, 'store_nbr': list_stores, 'sales': list_forc})

In [5]:
res = pd.merge(data_test, aux, how='left', left_on=['date', 'store_nbr', 'family'], right_on=['date', 'store_nbr', 'family'])
res = res[['id', 'sales']]
res.to_csv(submit_path, index=False, header=True)