# Feature Engineering

In [1]:
import pandas as pd
import numpy as np

import joblib

import params as p

In [2]:
data = joblib.load(p.DATA_PATH + '01_data.pkl')

In [3]:
data.head()

Unnamed: 0,rented_bike_count,hour,temperature,humidity,wind_speed,visibility,solar_radiation,rainfall,snowfall,seasons,holiday,date_weekday,date_month
264,133,0,-11.0,51,1.1,2000,0.0,0.0,0.0,Winter,No Holiday,1,12
265,127,1,-11.2,51,1.1,2000,0.0,0.0,0.0,Winter,No Holiday,1,12
266,95,2,-11.5,50,0.7,2000,0.0,0.0,0.0,Winter,No Holiday,1,12
267,54,3,-11.6,50,2.2,1995,0.0,0.0,0.0,Winter,No Holiday,1,12
268,46,4,-11.6,47,2.1,1982,0.0,0.0,0.0,Winter,No Holiday,1,12


### Dealing with other categorical features

In [4]:
data.holiday.value_counts()

No Holiday    7793
Holiday        408
Name: holiday, dtype: int64

In [5]:
data.seasons.value_counts()

Summer    2208
Spring    2160
Autumn    1937
Winter    1896
Name: seasons, dtype: int64

`holiday`: map to a binary features

`seasons`: Create dummy columns for each season. Remove one season for linear independence.

### Turn `holiday` into a binary feature

In [6]:
data['holiday'] = np.where(data.holiday == 'Holiday', 1, 0)

In [7]:
data.holiday.value_counts()

0    7793
1     408
Name: holiday, dtype: int64

### Create dummy features for `seasons`

In [8]:
data_dummies = pd.get_dummies(data)

# Drop to maintain linear independence
data_dummies.drop(columns = ['seasons_Winter'], inplace=True)

### Check data

In [9]:
data_dummies.head()

Unnamed: 0,rented_bike_count,hour,temperature,humidity,wind_speed,visibility,solar_radiation,rainfall,snowfall,holiday,date_weekday,date_month,seasons_Autumn,seasons_Spring,seasons_Summer
264,133,0,-11.0,51,1.1,2000,0.0,0.0,0.0,0,1,12,0,0,0
265,127,1,-11.2,51,1.1,2000,0.0,0.0,0.0,0,1,12,0,0,0
266,95,2,-11.5,50,0.7,2000,0.0,0.0,0.0,0,1,12,0,0,0
267,54,3,-11.6,50,2.2,1995,0.0,0.0,0.0,0,1,12,0,0,0
268,46,4,-11.6,47,2.1,1982,0.0,0.0,0.0,0,1,12,0,0,0


### save csv

In [10]:
joblib.dump(data_dummies, p.DATA_PATH + '02_data.pkl')

['./data/02_data.pkl']