# 1. Import libraries

In [1]:
import io
import warnings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
from statsmodels.tools.tools import add_constant
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [3]:
warnings.filterwarnings("ignore")
%matplotlib inline
plt.style.use('seaborn-white')
plt.rcParams['figure.figsize'] = 10,5

In [4]:
def IV (data, X, Y):    
    datag = data[[X,Y]]
    datag = datag.groupby(X)[Y].agg(['sum','count'])
    datag['bad'] = datag['sum']
    datag['good'] = datag['count'] - datag['sum']
    datag['pct_bad'] = datag['bad'] / datag['bad'].sum()
    datag['pct_good'] = datag['good'] / datag['good'].sum()
    datag['WOE'] = np.log(datag['pct_bad'] / datag['pct_good'])
    datag['IV'] = datag['WOE'] * (datag['pct_bad'] - datag['pct_good'])
    print(X,': ',round(datag['IV'].sum(),3))

In [5]:
def VIF (data):
    """number of arguments = 1
    arg 1 (data): enter the dataset name"""
    X = add_constant(data)
    Y = pd.Series([variance_inflation_factor(X.values, i)
                   for i in range(X.shape[1])],
                  index=X.columns)
    print(Y)

# 2. Import datasets

In [6]:
df = pd.read_csv("../outputs/model.csv")
df.shape

(1000, 7)

# 3. univariate

In [7]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
loan_amount,1000.0,0.29962,0.106224,0.08,0.24,0.32,0.32,0.54
months,1000.0,0.29572,0.108852,0.1,0.24,0.32,0.32,0.56
history,1000.0,0.30981,0.112338,0.18,0.18,0.33,0.33,0.63
age,1000.0,0.30384,0.072034,0.15,0.27,0.27,0.33,0.45
purpose,1000.0,0.30102,0.083109,0.12,0.21,0.36,0.36,0.42
property,1000.0,0.2949,0.079153,0.2,0.2,0.3,0.3,0.45
Y,1000.0,0.3,0.458487,0.0,0.0,0.0,1.0,1.0


# 4. IV

In [8]:
IV(df, 'loan_amount', 'Y')
IV(df, 'months', 'Y')
IV(df, 'history', 'Y')
IV(df, 'purpose', 'Y')
IV(df, 'age', 'Y')
IV(df, 'property', 'Y')

loan_amount :  0.263
months :  0.289
history :  0.293
purpose :  0.162
age :  0.132
property :  0.113


# 5. VIF

In [9]:
x = df[['loan_amount', 'months', 'history', 'property', 'age', 'purpose']]
VIF(x)

const          55.800574
loan_amount     1.097527
months          1.162643
history         1.030923
property        1.103694
age             1.020514
purpose         1.006265
dtype: float64
