### t2.micro, AWS Marketplace -> Anaconda with Python 3

### 1) Binarization

In [8]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import Binarizer

iris = datasets.load_iris()

print(iris.data                                                    [0:5,:]    )
print(np.around(Binarizer(threshold = 1.5).fit_transform(iris.data)[0:5,:], 2))

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[[1. 1. 0. 0.]
 [1. 1. 0. 0.]
 [1. 1. 0. 0.]
 [1. 1. 0. 0.]
 [1. 1. 0. 0.]]


### 2) Data Rescaling

In [9]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import MinMaxScaler

iris = datasets.load_iris()

print(iris.data                                                            [0:5,:]   )
print(np.around(MinMaxScaler(feature_range=(0, 1)).fit_transform(iris.data)[0:5,:],2))

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[[0.22 0.62 0.07 0.04]
 [0.17 0.42 0.07 0.04]
 [0.11 0.5  0.05 0.04]
 [0.08 0.46 0.08 0.04]
 [0.19 0.67 0.07 0.04]]


### 3) Standardization

In [12]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()

print(iris.data                                          [0:5,:]    )
print(np.around(StandardScaler().fit_transform(iris.data)[0:5,:], 2))

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[[-0.9   1.02 -1.34 -1.32]
 [-1.14 -0.13 -1.34 -1.32]
 [-1.39  0.33 -1.4  -1.32]
 [-1.51  0.1  -1.28 -1.32]
 [-1.02  1.25 -1.34 -1.32]]


### 4) Normalization

In [16]:
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import Normalizer

iris = datasets.load_iris()

print(iris.data                                      [0:5,:]    )
print(np.around(Normalizer().fit_transform(iris.data)[0:5,:], 2))

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[[0.8  0.55 0.22 0.03]
 [0.83 0.51 0.24 0.03]
 [0.81 0.55 0.22 0.03]
 [0.8  0.54 0.26 0.03]
 [0.79 0.57 0.22 0.03]]


### 5) Encoding Categorical Variables

In [39]:
from sklearn.preprocessing import LabelEncoder

data = np.array([
                 [1, "this"       ],
                 [2, "is"         ],
                 [3, "how"        ],
                 [4, "we"         ],
                 [5, "encode"     ],
                 [6, "categorical"],
                 [7, "variables"  ],
                 [8, "variables"  ],
                 [9, "Variables"  ],
               ])

print(data)
data[:,-1] = LabelEncoder().fit_transform(data[:,-1])
print(data)

[['1' 'this']
 ['2' 'is']
 ['3' 'how']
 ['4' 'we']
 ['5' 'encode']
 ['6' 'categorical']
 ['7' 'variables']
 ['8' 'variables']
 ['9' 'Variables']]
[['1' '5']
 ['2' '4']
 ['3' '3']
 ['4' '7']
 ['5' '2']
 ['6' '1']
 ['7' '6']
 ['8' '6']
 ['9' '0']]


### 6) One-Hot Encoding

In [11]:
import pandas as pd

df = pd.DataFrame({
                   'City': ['SF', 'SF', 'SF', 'NYC', 'NYC', 'NYC', 'Seattle', 'Seattle', 'Seattle'], 
                   'Rent': [3999, 4000, 4001,  3499,  3500,  3501,      2499,      2500,     2501 ]
                 })

print(pd.get_dummies(df, prefix=['city']))

   Rent  city_NYC  city_SF  city_Seattle
0  3999         0        1             0
1  4000         0        1             0
2  4001         0        1             0
3  3499         1        0             0
4  3500         1        0             0
5  3501         1        0             0
6  2499         0        0             1
7  2500         0        0             1
8  2501         0        0             1
