In [32]:
import pandas as pd

In [50]:
predictions = pd.read_csv(f"../data/predictions/predictions-{11}-{6}.csv")
features = pd.read_csv("../data/tsfeatures/tsfeatures.csv")
predictions.drop(predictions.columns[0], axis=1, inplace=True)
predictions.rename(columns={list(predictions)[0]:'date'}, inplace=True)
predictions['date'] = pd.to_datetime(predictions['date'], errors='coerce')


sales = pd.DataFrame(predictions.sales)

errors = predictions.drop(predictions.columns[[0, 1, 2]], axis=1)\
    .sub(sales.sales, axis=0)\
        .divide(sales.sales, axis=0) * 100

In [51]:
def create_features(df):
    df['dayofweek'] = df['date'].dt.dayofweek
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['year'] = df['date'].dt.year
    df['dayofyear'] = df['date'].dt.dayofyear
    df['dayofmonth'] = df['date'].dt.day
    df['weekofyear'] = df['date'].dt.isocalendar().week
    df['horizon'] = (df['date'] - df.date.min()) / np.timedelta64(1, 'D')
    
    X = df[['dayofweek','quarter','month','year',
           'dayofyear','dayofmonth','weekofyear', 'horizon']]
    
    return X

In [57]:
errors_df = pd.concat([predictions[["store_item", "date"]], errors], axis=1)

error_melted = pd.melt(errors_df, ["store_item", "date"], 
                       value_vars=errors.columns, value_name='pct_error', var_name="model")

feats = features.drop(features.columns[[0, 3, 6, 7, 8]], axis=1)\
    .rename(columns={"unique_id":"store_item"})

errors_and_feats = pd.merge(error_melted, feats, right_on="store_item", left_on= "store_item")\
    .dropna(axis = 1, how = 'all')\
    .dropna()
    
error_melted_time = pd.concat([errors_and_feats, create_features(predictions)], axis=1)


error_melted_time

Unnamed: 0,store_item,date,model,pct_error,hurst,unitroot_pp,unitroot_kpss,stability,nperiods,seasonal_period,...,diff2_acf1,diff2_acf10,dayofweek,quarter,month,year,dayofyear,dayofmonth,weekofyear,horizon
0,1-1,2016-07-03,autoreg,-4.100212,0.841606,-991.917409,3.146563,0.436294,0.0,1.0,...,-0.631182,0.535604,6.0,3.0,7.0,2016.0,185.0,3.0,26,0.0
1,1-1,2016-07-04,autoreg,40.951577,0.841606,-991.917409,3.146563,0.436294,0.0,1.0,...,-0.631182,0.535604,0.0,3.0,7.0,2016.0,186.0,4.0,27,1.0
2,1-1,2016-07-05,autoreg,-12.255485,0.841606,-991.917409,3.146563,0.436294,0.0,1.0,...,-0.631182,0.535604,1.0,3.0,7.0,2016.0,187.0,5.0,27,2.0
3,1-1,2016-07-06,autoreg,27.276438,0.841606,-991.917409,3.146563,0.436294,0.0,1.0,...,-0.631182,0.535604,2.0,3.0,7.0,2016.0,188.0,6.0,27,3.0
4,1-1,2016-07-07,autoreg,-20.604291,0.841606,-991.917409,3.146563,0.436294,0.0,1.0,...,-0.631182,0.535604,3.0,3.0,7.0,2016.0,189.0,7.0,27,4.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1634431,10-50,2017-12-27,xgb_preds,-5.223773,0.904447,-611.977975,3.074741,0.594580,0.0,1.0,...,-0.636360,0.810785,,,,,,,,
1634432,10-50,2017-12-28,xgb_preds,12.025831,0.904447,-611.977975,3.074741,0.594580,0.0,1.0,...,-0.636360,0.810785,,,,,,,,
1634433,10-50,2017-12-29,xgb_preds,-5.312405,0.904447,-611.977975,3.074741,0.594580,0.0,1.0,...,-0.636360,0.810785,,,,,,,,
1634434,10-50,2017-12-30,xgb_preds,11.576532,0.904447,-611.977975,3.074741,0.594580,0.0,1.0,...,-0.636360,0.810785,,,,,,,,


In [60]:
y = errors_and_feats.pct_error
X = errors_and_feats.drop(errors_and_feats.columns[[0,1,3,8,9]], axis=1)

X = pd.get_dummies(X, ["model"])
X.columns = X.columns.str.replace("model_", '', regex=False)

In [62]:
X.to_csv("../data/forecasts_for_ml/X.csv")
y.to_csv("../data/forecasts_for_ml/y.csv")