# Data Preprocessing

In [8]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style(style='whitegrid')

## 1. Transformation using `StandardScaler()`, `MinMaxScaler()`, `MaxAbsScaler()`

### Standard Scaler
$$
\bar{x} = \frac {\textbf x - \mu} { \sigma } 
$$

In [6]:
from sklearn.preprocessing import StandardScaler

In [9]:
X = np.array([4, 3, 2, 5, 6]).reshape((5, 1))

ss = StandardScaler()
print("MEAN : ", X.mean(), "\nSTANDARD DEVIATION : ", round(X.std(), 2))
ss.fit_transform(X)

MEAN :  4.0 
STANDARD DEVIATION :  1.41


array([[ 0.        ],
       [-0.70710678],
       [-1.41421356],
       [ 0.70710678],
       [ 1.41421356]])

### Min Max Scaler ( Normalization ) 

$$
\bar{x} = \frac {\textbf x - \textbf x_{min}} {\textbf x_{max} - \textbf x_{min}}   
$$


In [10]:
from sklearn.preprocessing import MinMaxScaler

In [11]:
X = np.array([4, 3, 2, 5, 6]).reshape((5, 1))

mm = MinMaxScaler()
print("MINIMUM : ", X.min(), "\nMAXIMUM : ", X.max())
mm.fit_transform(X)

MINIMUM :  2 
MAXIMUM :  6


array([[0.5 ],
       [0.25],
       [0.  ],
       [0.75],
       [1.  ]])

### Max Absolute Scaler

$$
\bar x = \frac {\textbf x} { max(\textbf x_{max}, \left| \textbf x_{min} \right|)}
$$

In [12]:
from sklearn.preprocessing import MaxAbsScaler

In [13]:
X = np.array([4, 2, 5, -2, -100]).reshape((5, 1))

mm = MinMaxScaler()
print("MINIMUM : ", X.min(), "\nABSOLUTE VALUE OF MIN : ", abs(X.min()), "\nMAXIMUM : ", X.max())
mm.fit_transform(X)

MINIMUM :  -100 
ABSOLUTE VALUE OF MIN :  100 
MAXIMUM :  5


array([[0.99047619],
       [0.97142857],
       [1.        ],
       [0.93333333],
       [0.        ]])