# Feature Engineering
* Date Time Feature
* Lag Feature
* Window Feature

In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [52]:
#Load the Dataset { We are using daily minimum tempertaure Dataset} [Remove the last line]
series=pd.read_csv("daily-minimum-temperatures.csv",squeeze=True,header=0,index_col=0,parse_dates=True)
series


Date
1981-01-01    20.7
1981-01-02    17.9
1981-01-03    18.8
1981-01-04    14.6
1981-01-05    15.8
              ... 
1990-12-27    14.0
1990-12-28    13.6
1990-12-29    13.5
1990-12-30    15.7
1990-12-31    13.0
Name: Daily minimum temperatures in Melbourne, Australia, 1981-1990, Length: 3650, dtype: float64

In [53]:
#Change data type of series
series=series.astype('float')

## Date Time Feature
* Month - Month of the year
* Day- Day of the month
* Temperature- recorded minimum temp.

In [56]:
# create date time features of a dataset
dataframe=pd.DataFrame()
dataframe['Month']=[series.index[i].month for i in range(len(series))]
dataframe['Day']=[series.index[i].day for i in range(len(series))]
dataframe['Temperature']=[series[i] for i in range(len(series))]
dataframe.head()

Unnamed: 0,Month,Day,Temperature
0,1,1,20.7
1,1,2,17.9
2,1,3,18.8
3,1,4,14.6
4,1,5,15.8


## Lag Feature
* Shift the series by some step to create lag feature

In [57]:
# Create a lag feature
temps=pd.DataFrame(series.values)
dataframe_lag=pd.concat([temps.shift(1),temps],axis=1)
dataframe_lag.columns=['t','t+1']
dataframe_lag.head()

Unnamed: 0,t,t+1
0,,20.7
1,20.7,17.9
2,17.9,18.8
3,18.8,14.6
4,14.6,15.8


Remove the first row because it includes a NaN value

### Lag Feature for 3 shift


In [58]:
# lag feature with 3 shift

dataframe_lag_3=pd.concat([temps.shift(3),temps.shift(2),temps.shift(1),temps],axis=1)
dataframe_lag_3.columns=['t-2','t-1','t','t+1']
dataframe_lag_3.head()

Unnamed: 0,t-2,t-1,t,t+1
0,,,,20.7
1,,,20.7,17.9
2,,20.7,17.9,18.8
3,20.7,17.9,18.8,14.6
4,17.9,18.8,14.6,15.8


Remove the first three rows because it includes NaN values

## Window Feature

In [60]:
# Creating a rolling window mean feature
shifted=temps.shift(1)
window=shifted.rolling(window=2)
means=window.mean()
dataframe_roll_mean=pd.concat([means,temps],axis=1)
dataframe_roll_mean.columns=['mean(t-1,t)','t+1']
dataframe_roll_mean.head()

Unnamed: 0,"mean(t-1,t)",t+1
0,,20.7
1,,17.9
2,19.3,18.8
3,18.35,14.6
4,16.7,15.8


In [62]:
# Creating a rolling window feature with width parameter
width=3
shifted=temps.shift(width-1)
window=shifted.rolling(window=width)
dataframe_roll=pd.concat([window.mean(),window.median(),window.max(),window.min(),temps],axis=1)
dataframe_roll.columns=['mean','median','max','min','t+1']
dataframe_roll.head()

Unnamed: 0,mean,median,max,min,t+1
0,,,,,20.7
1,,,,,17.9
2,,,,,18.8
3,,,,,14.6
4,19.133333,18.8,20.7,17.9,15.8
