In [1]:
import pandas as pd
import numpy as np

In [3]:
d = {'Age' : [23, 35, 36, 47, 50], 
    'Salary' : [75000, 85000, 95000, 90000, 78000]
    }
df = pd.DataFrame(d)
df

Unnamed: 0,Age,Salary
0,23,75000
1,35,85000
2,36,95000
3,47,90000
4,50,78000


## Maximum absolute scaling(Decimal Scaling)

- Used to rescale the column between -1 to 1 by dividing every observation by its maximum absolute value. 

In [5]:
df_max_scale = df.copy()

for col in df_max_scale:
    df_max_scale[col] = df_max_scale[col] / df_max_scale[col].abs().max()
    
df_max_scale

Unnamed: 0,Age,Salary
0,0.46,0.789474
1,0.7,0.894737
2,0.72,1.0
3,0.94,0.947368
4,1.0,0.821053


In [6]:
df_mas = df.copy()

from sklearn.preprocessing import MaxAbsScaler
mas = MaxAbsScaler()
mas.fit_transform(df_mas[['Age', 'Salary']])

array([[0.46      , 0.78947368],
       [0.7       , 0.89473684],
       [0.72      , 1.        ],
       [0.94      , 0.94736842],
       [1.        , 0.82105263]])

## Min Max Scaling

- Used to scale the data to a range of 0 to 1, by subtracting the minimum value of the feature then dividing by the range

In [13]:
df_min_max = df.copy()

for col in df_min_max:
    df_min_max[col] = (df_min_max[col] - df_min_max[col].min()) / (df_min_max[col].max() - df_min_max[col].min())
                       
df_min_max

Unnamed: 0,Age,Salary
0,0.0,0.0
1,0.444444,0.5
2,0.481481,1.0
3,0.888889,0.75
4,1.0,0.15


In [17]:
df_mms = df.copy()

from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
mms.fit_transform(df_mms[['Age', 'Salary']])

array([[0.        , 0.        ],
       [0.44444444, 0.5       ],
       [0.48148148, 1.        ],
       [0.88888889, 0.75      ],
       [1.        , 0.15      ]])

## Standard Scaling:

-Used to scale the values of a column, to a range [-3, 3]

In [18]:
df_stand_scaling = df.copy()

for col in df_stand_scaling:
    df_stand_scaling[col] = (df_stand_scaling[col] - df_stand_scaling[col].mean()) /df_stand_scaling[col].std()
    
df_stand_scaling

Unnamed: 0,Age,Salary
0,-1.413113,-1.161611
1,-0.297497,0.0484
2,-0.20453,1.258412
3,0.818118,0.653406
4,1.097022,-0.798608


In [19]:
df_ss = df.copy()

from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
ss.fit_transform(df_ss[['Age', 'Salary']])

array([[-1.57990842, -1.29872107],
       [-0.3326123 ,  0.05411338],
       [-0.22867096,  1.40694783],
       [ 0.91468382,  0.7305306 ],
       [ 1.22650786, -0.89287074]])

## Robust Scaling:

- Used to scale data in a column to a range[-2, 2].

In [21]:
df_rob_scale = df.copy()

for col in df_rob_scale:
    df_rob_scale[col] = (df_rob_scale[col] - df_rob_scale[col].median()) / (df_rob_scale[col].quantile(0.75) - df_rob_scale[col].quantile(0.25)) 

df_rob_scale

Unnamed: 0,Age,Salary
0,-1.083333,-0.833333
1,-0.083333,0.0
2,0.0,0.833333
3,0.916667,0.416667
4,1.166667,-0.583333


In [22]:
df_rs = df.copy()

from sklearn.preprocessing import RobustScaler
rs = RobustScaler()
rs.fit_transform(df_rs[['Age', 'Salary']])

array([[-1.08333333, -0.83333333],
       [-0.08333333,  0.        ],
       [ 0.        ,  0.83333333],
       [ 0.91666667,  0.41666667],
       [ 1.16666667, -0.58333333]])

## Scaling on a data set

In [23]:
data = sns.load_dataset('titanic')
data.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


In [26]:
data[['age', 'fare']].describe()

Unnamed: 0,age,fare
count,714.0,891.0
mean,29.699118,32.204208
std,14.526497,49.693429
min,0.42,0.0
25%,20.125,7.9104
50%,28.0,14.4542
75%,38.0,31.0
max,80.0,512.3292


## Why Scaling is necessary on age and fare

- Min value of age is 0.42 i.e. 4 months and the Max value of age is 80 years.
- Min value of fare is 0 and Max value of fare is 512.32

- Note: We clearly see the imbalance in the scale for age and fare columns. Hence, scaling becomes necessary

## Apply Max Absolute Scaling

In [28]:
data_max_abs = data.copy()

from sklearn.preprocessing import MaxAbsScaler
mas = MaxAbsScaler()
data_max_abs[['age', 'fare']] = mas.fit_transform(data_max_abs[['age', 'fare']])
data_max_abs

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,0.2750,1,0,0.014151,S,Third,man,True,,Southampton,no,False
1,1,1,female,0.4750,1,0,0.139136,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,0.3250,0,0,0.015469,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,0.4375,1,0,0.103644,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,0.4375,0,0,0.015713,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,0.3375,0,0,0.025374,S,Second,man,True,,Southampton,no,True
887,1,1,female,0.2375,0,0,0.058556,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,0.045771,S,Third,woman,False,,Southampton,no,False
889,1,1,male,0.3250,0,0,0.058556,C,First,man,True,C,Cherbourg,yes,True


In [32]:
data_max_abs[['age', 'fare']].describe()

Unnamed: 0,age,fare
count,714.0,891.0
mean,0.371239,0.062858
std,0.181581,0.096995
min,0.00525,0.0
25%,0.251563,0.01544
50%,0.35,0.028213
75%,0.475,0.060508
max,1.0,1.0


## Apply min max scaling

In [30]:
data_mms = data.copy()

from sklearn.preprocessing import MinMaxScaler
mms = MinMaxScaler()
data_mms[['age', 'fare']] = mms.fit_transform(data_mms[['age', 'fare']])
data_mms.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,0.271174,1,0,0.014151,S,Third,man,True,,Southampton,no,False
1,1,1,female,0.472229,1,0,0.139136,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,0.321438,0,0,0.015469,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,0.434531,1,0,0.103644,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,0.434531,0,0,0.015713,S,Third,man,True,,Southampton,no,True


In [31]:
data_mms[['age', 'fare']].describe()

Unnamed: 0,age,fare
count,714.0,891.0
mean,0.367921,0.062858
std,0.18254,0.096995
min,0.0,0.0
25%,0.247612,0.01544
50%,0.346569,0.028213
75%,0.472229,0.060508
max,1.0,1.0


## Apply Robust Scaling

In [33]:
data_rs = data.copy()

from sklearn.preprocessing import RobustScaler
rs = RobustScaler()
data_rs[['age', 'fare']] = rs.fit_transform(data_rs[['age', 'fare']])

data_rs.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,-0.335664,1,0,-0.312011,S,Third,man,True,,Southampton,no,False
1,1,1,female,0.559441,1,0,2.461242,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,-0.111888,0,0,-0.282777,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,0.391608,1,0,1.673732,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,0.391608,0,0,-0.277363,S,Third,man,True,,Southampton,no,True


In [34]:
data_rs[['age', 'fare']].describe()

Unnamed: 0,age,fare
count,714.0,891.0
mean,0.095056,0.768745
std,0.812671,2.1522
min,-1.542937,-0.626005
25%,-0.440559,-0.283409
50%,0.0,0.0
75%,0.559441,0.716591
max,2.909091,21.562738


## Apply Standard Scaling

In [39]:
data_ss = data.copy()

from sklearn.preprocessing import StandardScaler
ss = StandardScaler()
data_ss[['age', 'fare']] = ss.fit_transform(data_ss[['age', 'fare']])
data_ss.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,-0.530377,1,0,-0.502445,S,Third,man,True,,Southampton,no,False
1,1,1,female,0.571831,1,0,0.786845,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,-0.254825,0,0,-0.488854,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,0.365167,1,0,0.42073,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,0.365167,0,0,-0.486337,S,Third,man,True,,Southampton,no,True


In [40]:
data_ss[['age', 'fare']].describe()

Unnamed: 0,age,fare
count,714.0,891.0
mean,2.388379e-16,3.9873330000000004e-18
std,1.000701,1.000562
min,-2.016979,-0.6484217
25%,-0.6595416,-0.4891482
50%,-0.1170488,-0.3573909
75%,0.571831,-0.02424635
max,3.465126,9.667167
