### Feature Engineering Techniques: Scaling

This is the sixth notebook in a series of hands-on methods of [feature engineering techniques](https://heartbeat.fritz.ai/hands-on-with-feature-engineering-techniques-variables-types-b2120e534680)

In [1]:
import pandas as pd
import numpy as np

In [2]:
wind_farm_data = pd.read_csv("data/windfarm_data.csv", index_col="year_month_day")
wind_farm_data.head(2)

Unnamed: 0_level_0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16,power
year_month_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-01,4.702022,106.74259,4.743292,7.189482,100.41638,6.593833,8.172301,99.288,5.967206,1959.3535
2014-01-02,7.695733,98.036705,6.142716,9.977118,94.03181,4.383676,9.690135,204.25444,1.696528,1266.6239


### Mean Normalization

x' = X - Mean(X)/Max(X) - Min(X), where X is the variable being normalized, along the axis=0




In [4]:
# calculate the means
means = wind_farm_data.mean(axis = 0)

temperature_00          9.613588
wind_direction_00     198.381353
wind_speed_00           5.001413
temperature_08         12.827786
wind_direction_08     191.538394
wind_speed_08           5.136628
temperature_16         13.991459
wind_direction_16     202.051406
wind_speed_16           6.072578
power                2424.978880
dtype: float64

In [6]:
# calculate max - min
max_min = wind_farm_data.max(axis = 0) - wind_farm_data.min(axis = 0)
max_min

temperature_00          33.212039
wind_direction_00      338.236448
wind_speed_00           18.929976
temperature_08          34.108495
wind_direction_08      337.340519
wind_speed_08           22.875357
temperature_16          36.608897
wind_direction_16      343.929967
wind_speed_16           18.298089
power                10567.787430
dtype: float64

In [8]:
# apply the transformation to data
train_scaled = (wind_farm_data - means) / max_min
train_scaled.head(2)

Unnamed: 0_level_0,temperature_00,wind_direction_00,wind_speed_00,temperature_08,wind_direction_08,wind_speed_08,temperature_16,wind_direction_16,wind_speed_16,power
year_month_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2014-01-01,-0.147885,-0.270931,-0.013636,-0.165305,-0.270119,0.063702,-0.158955,-0.298792,-0.005759,-0.044061
2014-01-02,-0.057746,-0.29667,0.060291,-0.083576,-0.289045,-0.032915,-0.117494,0.006405,-0.239153,-0.109612


### Standardization

x' = X - mean(X) / std(X)

In [9]:
from sklearn.preprocessing import StandardScaler

In [10]:
# create the scaler object
scaler = StandardScaler()

# fit the scaler to the train data
scaler.fit(wind_farm_data)

# transform train and test data
train_scaled = scaler.transform(wind_farm_data)
train_scaled

array([[-9.47081470e-01, -1.12094415e+00, -8.78054256e-02, ...,
        -1.15815444e+00, -4.07560353e-02, -2.28759074e-01],
       [-3.69813725e-01, -1.22743633e+00,  3.88238323e-01, ...,
         2.48284233e-02, -1.69257472e+00, -5.69093146e-01],
       [-1.03215113e-03,  9.25731414e-01,  1.87532738e+00, ...,
         5.43409484e-01,  2.30746819e+00,  2.51577089e+00],
       ...,
       [-1.96712573e-01, -1.01464629e+00,  2.44257872e+00, ...,
        -1.03561351e+00,  2.02244360e+00,  3.29517744e+00],
       [-2.97831589e-01, -1.16464866e+00,  1.69475152e+00, ...,
        -1.01937255e+00, -7.67845989e-01,  6.10685547e-01],
       [-6.14799449e-01, -1.01120782e+00, -4.23829384e-01, ...,
        -1.08393955e+00, -1.70824028e-01, -7.69394599e-01]])

###   Min-Max Scaling

x' = X - min(X)/ max(X) - min(x)

In [12]:
from sklearn.preprocessing import MinMaxScaler

# create the scaler object
min_max = MinMaxScaler()

# fit the scaler to the train data
min_max.fit(wind_farm_data)

# transform train and test data
train_scaled = min_max.transform(wind_farm_data)
train_scaled

array([[0.32743522, 0.27263445, 0.23830374, ..., 0.27223236, 0.29544226,
        0.16581408],
       [0.41757456, 0.2468954 , 0.31223006, ..., 0.57742949, 0.0620475 ,
        0.10026302],
       [0.47515916, 0.76731387, 0.54316477, ..., 0.71121795, 0.6272361 ,
        0.69443241],
       ...,
       [0.444604  , 0.29832654, 0.631255  , ..., 0.30384663, 0.58696337,
        0.84455232],
       [0.42881445, 0.26207111, 0.51512258, ..., 0.30803663, 0.19270763,
        0.3274978 ],
       [0.37932047, 0.29915761, 0.18612152, ..., 0.29137902, 0.27706423,
        0.06168337]])