In [2]:
import pandas as pd
import numpy as np 
import seaborn as sns
import matplotlib.pyplot as plt

![](https://miro.medium.com/max/1400/1*GAcEj37smCOCZMrqp-rjjA.png)

In [3]:
cols = ['loan_amnt', 'int_rate', 'installment']
data = pd.read_csv('loan.csv', nrows = 30000, usecols = cols)

In [4]:
data.describe()

Unnamed: 0,loan_amnt,int_rate,installment
count,30000.0,30000.0,30000.0
mean,15941.94,12.948691,461.282355
std,10257.787699,4.880157,287.407671
min,1000.0,6.0,30.64
25%,8000.0,8.81,248.4
50%,13800.0,11.8,380.66
75%,22000.0,16.14,622.7
max,40000.0,30.84,1618.24


# Standardization (Standard Scalar) :

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler() 
data_scaled = scaler.fit_transform(data)

In [7]:
print(data_scaled.mean(axis=0))
print(data_scaled.std(axis=0))

[-8.71599089e-17  1.77635684e-18  3.03164901e-17]
[1. 1. 1.]


In [8]:
print('Min values (Loan Amount, Int rate and Installment): ', data_scaled.min(axis=0))
print('Max values (Loan Amount, Int rate and Installment): ', data_scaled.max(axis=0))

Min values (Loan Amount, Int rate and Installment):  [-1.4566678  -1.42389012 -1.49839262]
Max values (Loan Amount, Int rate and Installment):  [2.34538496 3.66619529 4.02556036]


# Normalization (Min-Max Scalar) :
![](https://miro.medium.com/max/253/1*Dl3P3Rrzto258X0Ales9Xw.png)

In [9]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler() 
data_scaled = scaler.fit_transform(data)

In [10]:
print('means (Loan Amount, Int rate and Installment): ', data_scaled.mean(axis=0))
print('std (Loan Amount, Int rate and Installment): ', data_scaled.std(axis=0))

means (Loan Amount, Int rate and Installment):  [0.38312667 0.27973796 0.27125369]
std (Loan Amount, Int rate and Installment):  [0.26301581 0.19646036 0.18102978]


In [11]:
print('Min (Loan Amount, Int rate and Installment): ', data_scaled.min(axis=0))
print('Max (Loan Amount, Int rate and Installment): ', data_scaled.max(axis=0))

Min (Loan Amount, Int rate and Installment):  [0. 0. 0.]
Max (Loan Amount, Int rate and Installment):  [1. 1. 1.]


# Robust Scalar (Scaling to median and quantiles) :

IQR = 75th quantile — 25th quantile
X_scaled = (X — X.median) / IQR

In [15]:
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler() 
data_scaled = scaler.fit_transform(data)

In [16]:
print('means (Loan Amount, Int rate and Installment): ', data_scaled.mean(axis=0))
print('std (Loan Amount, Int rate and Installment): ', data_scaled.std(axis=0))

means (Loan Amount, Int rate and Installment):  [0.15299571 0.15671091 0.21539502]
std (Loan Amount, Int rate and Installment):  [0.73268691 0.66576743 0.76784099]


In [17]:
print('Min (Loan Amount, Int rate and Installment): ', data_scaled.min(axis=0))
print('Max (Loan Amount, Int rate and Installment): ', data_scaled.max(axis=0))

Min (Loan Amount, Int rate and Installment):  [-0.91428571 -0.79126876 -0.93513225]
Max (Loan Amount, Int rate and Installment):  [1.87142857 2.59754434 3.30638525]
