In [53]:
import numpy as np
import pandas as pd

## Min Max Scaler

![MinMaxScaler.PNG](attachment:MinMaxScaler.PNG)

In [60]:
from numpy import set_printoptions
from sklearn.preprocessing import MinMaxScaler

In [55]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = pd.read_csv('../DataSets/diabetes.csv', names=names)

In [56]:
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [58]:
df['preg'].unique()

array([ 6,  1,  8,  0,  5,  3, 10,  2,  4,  7,  9, 11, 13, 15, 17, 12, 14],
      dtype=int64)

In [59]:
target_name = 'class'

# Separate object for target feature
y = df[target_name]

# Separate Object for Input Features
X = df.drop(target_name, axis=1)

In [61]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [62]:
rescaledX = scaler.fit_transform(X)

In [14]:
df['preg'].unique()

array([ 6,  1,  8,  0,  5,  3, 10,  2,  4,  7,  9, 11, 13, 15, 17, 12, 14],
      dtype=int64)

In [63]:
# summarize transformed data
print(rescaledX[0:5,:])

[[0.353 0.744 0.59  0.354 0.    0.501 0.234 0.483]
 [0.059 0.427 0.541 0.293 0.    0.396 0.117 0.167]
 [0.471 0.92  0.525 0.    0.    0.347 0.254 0.183]
 [0.059 0.447 0.541 0.232 0.111 0.419 0.038 0.   ]
 [0.    0.688 0.328 0.354 0.199 0.642 0.944 0.2  ]]


## STANDARDIZE

![Z-Score.PNG](attachment:Z-Score.PNG)

![StandardDeviation.PNG](attachment:StandardDeviation.PNG)

In [64]:
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [65]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = pd.read_csv('../DataSets/diabetes.csv', names=names)

In [66]:
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [67]:
target_name = 'class'

# Separate object for target feature
y = df[target_name]

# Separate Object for Input Features
X = df.drop(target_name, axis=1)

In [68]:
scaler = StandardScaler()

In [69]:
scaler.fit(X)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [70]:
rescaledX = scaler.transform(X)

In [71]:
# summarize transformed data
set_printoptions(precision=3)
print(rescaledX[0:5,:])

[[ 0.64   0.848  0.15   0.907 -0.693  0.204  0.468  1.426]
 [-0.845 -1.123 -0.161  0.531 -0.693 -0.684 -0.365 -0.191]
 [ 1.234  1.944 -0.264 -1.288 -0.693 -1.103  0.604 -0.106]
 [-0.845 -0.998 -0.161  0.155  0.123 -0.494 -0.921 -1.042]
 [-1.142  0.504 -1.505  0.907  0.766  1.41   5.485 -0.02 ]]


In [74]:
df['preg'].std(), df['preg'].mean()

(3.3695780626988623, 3.8450520833333335)

In [75]:
(6 - 3.8450520833333335)  / 3.3695780626988623

0.6395304921176576

## NORMALIZE

![Normalizer.PNG](attachment:Normalizer.PNG)

In [79]:
from sklearn.preprocessing import Normalizer
import pandas as pd

In [80]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = pd.read_csv('../DataSets/diabetes.csv', names=names)

In [82]:
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [83]:
target_name = 'class'

# Separate object for target feature
y = df[target_name]

# Separate Object for Input Features
X = df.drop(target_name, axis=1)

In [84]:
scaler = Normalizer()

In [85]:
scaler.fit(X)

Normalizer(copy=True, norm='l2')

In [86]:
rescaledX = scaler.transform(X)

In [87]:
# summarize transformed data
print(rescaledX[0:5,:])

[[0.034 0.828 0.403 0.196 0.    0.188 0.004 0.28 ]
 [0.008 0.716 0.556 0.244 0.    0.224 0.003 0.261]
 [0.04  0.924 0.323 0.    0.    0.118 0.003 0.162]
 [0.007 0.588 0.436 0.152 0.622 0.186 0.001 0.139]
 [0.    0.596 0.174 0.152 0.731 0.188 0.01  0.144]]


In [102]:
148 / np.sqrt(6**2 + 148**2 + 72 ** 2 + 35**2 + 0 + 33.6 ** 2 + 0.627**2 + 50 ** 2)

0.8276251286315007

## BINARIZATION

In [94]:
from sklearn.preprocessing import Binarizer
import pandas as pd

In [95]:
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
df = pd.read_csv('../DataSets/diabetes.csv', names=names)

In [96]:
df.head()

Unnamed: 0,preg,plas,pres,skin,test,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [97]:
target_name = 'class'

# Separate object for target feature
y = df[target_name]

# Separate Object for Input Features
X = df.drop(target_name, axis=1)

In [98]:
scaler = Binarizer()

In [99]:
scaler.fit(X)

Binarizer(copy=True, threshold=0.0)

In [100]:
rescaledX = scaler.transform(X)

In [101]:
# summarize transformed data
print(rescaledX[0:5,:])

[[1. 1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 1. 1. 0. 1. 1. 1.]
 [1. 1. 1. 0. 0. 1. 1. 1.]
 [1. 1. 1. 1. 1. 1. 1. 1.]
 [0. 1. 1. 1. 1. 1. 1. 1.]]


In [None]:
<= Threshold ==> 0; > Threshold = 1