# Feature Scaling
### Feature Scaling is a technique to standardize the independent features present in the data in a fixed range. It is performed during the data pre-processing to handle highly varying magnitudes or values or units.

In [73]:
import numpy as np
import pandas as pd

In [74]:
df=pd.read_csv("diabetis.csv",usecols=["col1","col2","col3","col4","col5","col6","col7","col8"])
df.head()

Unnamed: 0,col1,col2,col3,col4,col5,col6,col7,col8
0,6,148,72,35,0,33.6,0.627,50
1,1,85,66,29,0,26.6,0.351,31
2,8,183,64,0,0,23.3,0.672,32
3,1,89,66,23,94,28.1,0.167,21
4,0,137,40,35,168,43.1,2.288,33


In [75]:
dataset=df.values
dataset1=dataset
dataset

array([[  6.   , 148.   ,  72.   , ...,  33.6  ,   0.627,  50.   ],
       [  1.   ,  85.   ,  66.   , ...,  26.6  ,   0.351,  31.   ],
       [  8.   , 183.   ,  64.   , ...,  23.3  ,   0.672,  32.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,  26.2  ,   0.245,  30.   ],
       [  1.   , 126.   ,  60.   , ...,  30.1  ,   0.349,  47.   ],
       [  1.   ,  93.   ,  70.   , ...,  30.4  ,   0.315,  23.   ]])

In [76]:
def GetMinMax(dataset):
    minmax=list()
    for i in range(dataset.shape[1]):
        ColValues=[row[i] for row in dataset]
        MaxVal=max(ColValues)
        MinVal=min(ColValues)
        minmax.append([MinVal,MaxVal])
    return minmax

In [77]:
MinMax=GetMinMax(dataset)
MinMax

[[0.0, 17.0],
 [0.0, 199.0],
 [0.0, 122.0],
 [0.0, 99.0],
 [0.0, 846.0],
 [0.0, 67.1],
 [0.078, 2.42],
 [21.0, 81.0]]

In [78]:
def Normalization(dataset,MinMax):    
    for row in dataset:
        for i in range(len(row)):
            row[i]=np.abs(row[i]-MinMax[i][0])/(MinMax[i][1]-MinMax[i][0])

In [79]:
Normalization(dataset,MinMax)
dataset

array([[0.35294118, 0.74371859, 0.59016393, ..., 0.50074516, 0.23441503,
        0.48333333],
       [0.05882353, 0.42713568, 0.54098361, ..., 0.39642325, 0.11656704,
        0.16666667],
       [0.47058824, 0.91959799, 0.52459016, ..., 0.34724292, 0.25362938,
        0.18333333],
       ...,
       [0.29411765, 0.6080402 , 0.59016393, ..., 0.390462  , 0.07130658,
        0.15      ],
       [0.05882353, 0.63316583, 0.49180328, ..., 0.4485842 , 0.11571307,
        0.43333333],
       [0.05882353, 0.46733668, 0.57377049, ..., 0.45305514, 0.10119556,
        0.03333333]])

In [80]:
for i in range(dataset.shape[1]):
    print(dataset[:, i].min(),dataset[:, i].max())

0.0 1.0
0.0 1.0
0.0 1.0
0.0 1.0
0.0 1.0
0.0 1.0
0.0 1.0
0.0 1.0


In [81]:
# We can see that all the values in dataset are normalized between 0-1 
# Using normalization Technique
# Although it is not good to normalize all the columns
# We can apply normalization to essential columns

In [84]:
# Applying Standardization to dataset
# Standardization is a rescaling technique that refers to centering the distribution of the data 
# on the value 0 and the standard deviation to the value 1.
def GetMeanStd(dataset):
    
    MeanStd=list()
    
    for i in range(dataset.shape[1]):
        ColValues=dataset[:, i]
        Mean=np.mean(ColValues)
        Std=np.std(ColValues)
        MeanStd.append([Mean,Std])
    return MeanStd

In [86]:
MeanStd=GetMeanStd(dataset1)

In [87]:
def Standardization(dataset,MeanStd):
    for row in range(dataset.shape[0]):
        for col in range(dataset.shape[1]):
            dataset[row][col]=(dataset[row][col]-MeanStd[col][0])/MeanStd[col][1]

In [91]:
# Standardization(dataset1,MeanStd)
dataset1

array([[ 0.63994726,  0.84832379,  0.14964075, ...,  0.20401277,
         0.46849198,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.68442195,
        -0.36506078, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -1.10325546,
         0.60439732, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ..., -0.73518964,
        -0.68519336, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.24020459,
        -0.37110101,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.20212881,
        -0.47378505, -0.87137393]])

In [92]:
for col in range(dataset1.shape[1]):
    print(dataset1[:, col].min(),dataset1[:, col].max())

-1.1418515161634992 3.906578350084603
-3.783653713779629 2.444478206307916
-3.572597239872642 2.7345282474204655
-1.288212212945236 4.921865835155968
-0.6928905722954675 6.652839378368461
-4.060473872668308 4.45580749082507
-1.1895531764897846 5.883564765877941
-1.0415494364835027 4.063715751598595


In [None]:
# There is no specific upper or lower bound for the maximum and minimum values
# when we apply standardization on dataset like(0-1 for normalization)