In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [13]:
plt.style.use('ggplot')

## Load Data

In [14]:
df_train = pd.read_csv('../data/raw/train.csv')
df_test = pd.read_csv('../data/raw/test.csv')
df_ss = pd.read_csv('../data/raw/sample_submission.csv')

In [15]:
df_gdp = pd.read_csv('../data/others/GDP_data_2015_to_2019_Finland_Norway_Sweden.csv')

## Transformation Functions

In [16]:
def feature_transfrom(df):
    df['date'] = pd.to_datetime(df['date'])
    
    return df

In [17]:
def feature_engineering(df):
    df['year'] = df['date'].dt.year
    df['quarter'] = df['date'].dt.quarter
    df['month'] = df['date'].dt.month
    df['week'] = df['date'].dt.isocalendar().week.astype(int)
    df['day'] = df['date'].dt.day
    df['dayofyear'] = df['date'].dt.dayofyear
    df['daysinmonth'] = df['date'].dt.days_in_month
    df['dayofweek'] = df['date'].dt.dayofweek
    df['weekend'] = ((df['date'].dt.dayofweek) // 5 == 1).astype(int)
    
    t0 = np.datetime64('2015-01-01')
    df['time_step'] = (df.date-t0).astype('timedelta64[D]').astype(np.int)
    
    
    
    return df

## Feature Analysis

In [18]:
df_train.head()

Unnamed: 0,row_id,date,country,store,product,num_sold
0,0,2015-01-01,Finland,KaggleMart,Kaggle Mug,329
1,1,2015-01-01,Finland,KaggleMart,Kaggle Hat,520
2,2,2015-01-01,Finland,KaggleMart,Kaggle Sticker,146
3,3,2015-01-01,Finland,KaggleRama,Kaggle Mug,572
4,4,2015-01-01,Finland,KaggleRama,Kaggle Hat,911


In [19]:
df_train = feature_transfrom(df=df_train)
df_test = feature_transfrom(df=df_test)

In [20]:
df_train = feature_engineering(df_train)

In [21]:
df_train.head()

Unnamed: 0,row_id,date,country,store,product,num_sold,year,quarter,month,week,day,dayofyear,daysinmonth,dayofweek,weekend,time_step
0,0,2015-01-01,Finland,KaggleMart,Kaggle Mug,329,2015,1,1,1,1,1,31,3,0,0
1,1,2015-01-01,Finland,KaggleMart,Kaggle Hat,520,2015,1,1,1,1,1,31,3,0,0
2,2,2015-01-01,Finland,KaggleMart,Kaggle Sticker,146,2015,1,1,1,1,1,31,3,0,0
3,3,2015-01-01,Finland,KaggleRama,Kaggle Mug,572,2015,1,1,1,1,1,31,3,0,0
4,4,2015-01-01,Finland,KaggleRama,Kaggle Hat,911,2015,1,1,1,1,1,31,3,0,0


In [32]:
# df_ts = df_train[['row_id','date','num_sold','country','store','product']]

### tsfresh

In [23]:
import tsfresh
from tsfresh import extract_features

In [33]:
# extracted_features = extract_features(df_train[['row_id','date','num_sold','country','store','product']], column_id="row_id", column_sort="date")