> **Jupyter slideshow:** This notebook can be displayed as slides. To view it as a slideshow in your browser, run the following cell:

> `> jupyter nbconvert [this_notebook.ipynb] --to slides --post serve`
 
> To toggle off the slideshow cell formatting, click the `CellToolbar` button, then `View > Cell Toolbar > None`.

<img src="../images/shell-logo.png" width="50" height="50" align="right"/>

<img src="../images/di.png" width="50" height="50" align="right"/>

# Feature Engineering in Time Series

In [3]:
# ! jupyter nbconvert Python_intro.ipynb --to slides --post serve

## Learning Objectives
*In this lesson, we will go over the following:*

- Introduction to Feature Engineering in Time Series
- Extracting Data & Time based Features
- Extracting Lag feature, sliding window and expanding window


In [4]:
#changing dtype
import pandas as pd
data = pd.read_csv('../data/Train_SU63ISt.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'],format='%d-%m-%Y %H:%M')
data.dtypes

ID                   int64
Datetime    datetime64[ns]
Count                int64
dtype: object

In [5]:
#time feature
import pandas as pd
data = pd.read_csv('../data/Train_SU63ISt.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'],format='%d-%m-%Y %H:%M')

data['Hour'] = data['Datetime'].dt.hour 
data['minute'] = data['Datetime'].dt.minute 

data.head()

Unnamed: 0,ID,Datetime,Count,Hour,minute
0,0,2012-08-25 00:00:00,8,0,0
1,1,2012-08-25 01:00:00,2,1,0
2,2,2012-08-25 02:00:00,6,2,0
3,3,2012-08-25 03:00:00,2,3,0
4,4,2012-08-25 04:00:00,2,4,0


In [9]:
#date feature
import pandas as pd
data = pd.read_csv('../data/Train_SU63ISt.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'],format='%d-%m-%Y %H:%M')

data['year']=data['Datetime'].dt.year 
data['month']=data['Datetime'].dt.month 
data['day']=data['Datetime'].dt.day

data['dayofweek_num']=data['Datetime'].dt.dayofweek  
data['dayofweek_name']=data['Datetime'].dt.day_name()

data.head()

Unnamed: 0,ID,Datetime,Count,year,month,day,dayofweek_num,dayofweek_name
0,0,2012-08-25 00:00:00,8,2012,8,25,5,Saturday
1,1,2012-08-25 01:00:00,2,2012,8,25,5,Saturday
2,2,2012-08-25 02:00:00,6,2012,8,25,5,Saturday
3,3,2012-08-25 03:00:00,2,2012,8,25,5,Saturday
4,4,2012-08-25 04:00:00,2,2012,8,25,5,Saturday


In [10]:
#expanding windows
import pandas as pd
data = pd.read_csv('../data/Train_SU63ISt.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'],format='%d-%m-%Y %H:%M')

data['expanding_mean'] = data['Count'].expanding(2).mean()
data = data[['Datetime','Count', 'expanding_mean']]
data.head(10)

Unnamed: 0,Datetime,Count,expanding_mean
0,2012-08-25 00:00:00,8,
1,2012-08-25 01:00:00,2,5.0
2,2012-08-25 02:00:00,6,5.333333
3,2012-08-25 03:00:00,2,4.5
4,2012-08-25 04:00:00,2,4.0
5,2012-08-25 05:00:00,2,3.666667
6,2012-08-25 06:00:00,2,3.428571
7,2012-08-25 07:00:00,2,3.25
8,2012-08-25 08:00:00,6,3.555556
9,2012-08-25 09:00:00,2,3.4


In [11]:
#lag feature
import pandas as pd
data = pd.read_csv('../data/Train_SU63ISt.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'],format='%d-%m-%Y %H:%M')

data['lag_1'] = data['Count'].shift(1)
data = data[['Datetime', 'lag_1', 'Count']]
data.head()

Unnamed: 0,Datetime,lag_1,Count
0,2012-08-25 00:00:00,,8
1,2012-08-25 01:00:00,8.0,2
2,2012-08-25 02:00:00,2.0,6
3,2012-08-25 03:00:00,6.0,2
4,2012-08-25 04:00:00,2.0,2


In [12]:
#lag seven

import pandas as pd
data = pd.read_csv('../data/Train_SU63ISt.csv')
data['Datetime'] = pd.to_datetime(data['Datetime'],format='%d-%m-%Y %H:%M')

data['lag_1'] = data['Count'].shift(1)
data['lag_2'] = data['Count'].shift(2)
data['lag_3'] = data['Count'].shift(3)
data['lag_4'] = data['Count'].shift(4)
data['lag_5'] = data['Count'].shift(5)
data['lag_6'] = data['Count'].shift(6)
data['lag_7'] = data['Count'].shift(7)

data = data[['Datetime', 'lag_1', 'lag_2', 'lag_3', 'lag_4', 'lag_5', 'lag_6', 'lag_7', 'Count']]
data.head(10)

Unnamed: 0,Datetime,lag_1,lag_2,lag_3,lag_4,lag_5,lag_6,lag_7,Count
0,2012-08-25 00:00:00,,,,,,,,8
1,2012-08-25 01:00:00,8.0,,,,,,,2
2,2012-08-25 02:00:00,2.0,8.0,,,,,,6
3,2012-08-25 03:00:00,6.0,2.0,8.0,,,,,2
4,2012-08-25 04:00:00,2.0,6.0,2.0,8.0,,,,2
5,2012-08-25 05:00:00,2.0,2.0,6.0,2.0,8.0,,,2
6,2012-08-25 06:00:00,2.0,2.0,2.0,6.0,2.0,8.0,,2
7,2012-08-25 07:00:00,2.0,2.0,2.0,2.0,6.0,2.0,8.0,2
8,2012-08-25 08:00:00,2.0,2.0,2.0,2.0,2.0,6.0,2.0,6
9,2012-08-25 09:00:00,6.0,2.0,2.0,2.0,2.0,2.0,6.0,2
