## 04 Data Preprocessing dengan Scikit Learning

### Sample Data

In [None]:
import numpy as np
from sklearn import preprocessing

sample_data = np.array([[2.1, -1.9, 5.5],
                        [-1.5, 2.4, 3.5],
                        [0.5, -7.9, 5.6],
                        [5.9, 2.3, -5.8]])
sample_data.shape

## Cara-cara melakukan Preprocessing

### 1. Binarisation

In [None]:
sample_data

In [None]:
preprocessor = preprocessing.Binarizer(threshold=0.5) # threshold is the value that will be used to binarize the data
binarized_data = preprocessor.transform(sample_data) # transform() method is used to transform the data
binarized_data

### 2. Scaling
Tujuannya adalah menghasilkan suatu data numerik yang berada pada rentang tertentu

In [None]:
sample_data

In [None]:
preprocessor = preprocessing.MinMaxScaler(feature_range=(0, 1)) # feature_range is the range of the transformed data
preprocessor.fit(sample_data) # fit() method is used to fit the data
scaled_data = preprocessor.transform(sample_data) # transform() method is used to transform the data
scaled_data

In [None]:
# Cara 2
scaled_data = preprocessor.fit_transform(sample_data)
scaled_data

### 3. Normalisasi

In [8]:
sample_data

array([[ 2.1, -1.9,  5.5],
       [-1.5,  2.4,  3.5],
       [ 0.5, -7.9,  5.6],
       [ 5.9,  2.3, -5.8]])

#### L1 Normalisation: Least Absolute Deviations
Referensi: https://en.wikipedia.org/wiki/Least_absolute_deviations

In [11]:
l1_normalized = preprocessing.normalize(sample_data, norm='l1') # norm is the type of normalization
print(l1_normalized)

[[ 0.22105263 -0.2         0.57894737]
 [-0.2027027   0.32432432  0.47297297]
 [ 0.03571429 -0.56428571  0.4       ]
 [ 0.42142857  0.16428571 -0.41428571]]


#### L2 Normalisation: Least Square
Referensi: https://en.wikipedia.org/wiki/Least_squares

In [12]:
l2_normalized = preprocessing.normalize(sample_data, norm='l2') # norm is the type of normalization
print(l2_normalized)

[[ 0.33946114 -0.30713151  0.88906489]
 [-0.33325106  0.53320169  0.7775858 ]
 [ 0.05156558 -0.81473612  0.57753446]
 [ 0.68706914  0.26784051 -0.6754239 ]]
