# Logistic Regression Example

Simple demonstration of the Logistic Regression model

In [1]:
# Importing useful libraries
import numpy as np
#import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Importing the dataset
default = pd.read_csv('datasets/credit_card_default.csv', index_col="ID")
# Renaming columns
default.rename(columns=lambda x: x.lower(), inplace=True)
default = default[['pay_amt1','age','default payment next month']]
default.rename(columns={'default payment next month':'default'}, inplace=True)
default['pay_amt1'] = default['pay_amt1']/1000

In [3]:
default.head()

Unnamed: 0_level_0,pay_amt1,age,default
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.0,24,1
2,0.0,26,1
3,1.518,34,0
4,2.0,37,0
5,2.0,57,0


### Building a Logistic Regression model with two features: Limit Balance and Age
 
Positive class: Default(==1)
$$Pr(default=1 | X) = \frac{1}{1+exp(-Z)}$$
Where: 
$$Z = w_0 + w_1 LimitBalance + w_2 Age$$

In [4]:
X = default[['pay_amt1','age']]
y = default['default']

In [5]:
# 1. Import the estimator object (model)
from sklearn.linear_model import LogisticRegression

# 2. Create an instance of the estimator
logreg = LogisticRegression()

# 3. Use the trainning data to train the estimator
logreg.fit(X, y)

LogisticRegression()

In [6]:
print(logreg.intercept_)
print(logreg.coef_)

[-1.26428614]
[[-0.03657552  0.0045475 ]]


$$Pr(default=1 | X) = \frac{1}{1+exp(-Z)}$$

Where: 

$$Z = -1.2602 -0.036586 \times LimitBalance + 0.004441 \times Age$$

In [7]:
def Z(pay_amt1, age):
    return logreg.intercept_[0] + logreg.coef_[0][0]*pay_amt1 + logreg.coef_[0][1]*age

def prob_default(limit_balance, age):
    z = Z(limit_balance, age) 
    return 1/(1 + np.exp(-z))

## Predicting probability of default

In [8]:
prob_default(limit_balance=2.0, age=40)

0.2394791118997556

In [9]:
prob_default(limit_balance=4.0, age=40)

0.22641106413101234

In [10]:
prob_default(limit_balance=2.0, age=20)

0.22330879904636194

In [11]:
prob_default(limit_balance=1.0, age=40)

0.24620389927596653

In [12]:
prob_default(limit_balance=1.0, age=20)

0.2297166642938805