In [1]:
import pandas as pd

In [2]:
import os
from os.path import curdir
xl_path = os.path.join(curdir,'Data','banks.csv')

In [3]:
df = pd.read_csv(xl_path,header='infer',index_col='Obs')
df.head()

Unnamed: 0_level_0,Financial Condition,TotCap/Assets,TotExp/Assets,TotLns&Lses/Assets
Obs,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1,9.7,0.12,0.65
2,1,1.0,0.11,0.62
3,1,6.9,0.09,1.02
4,1,5.8,0.1,0.67
5,1,4.3,0.11,0.69


In [4]:
df.describe()

Unnamed: 0,Financial Condition,TotCap/Assets,TotExp/Assets,TotLns&Lses/Assets
count,20.0,20.0,20.0,20.0
mean,0.5,9.32,0.1045,0.6285
std,0.512989,4.797214,0.026052,0.159779
min,0.0,1.0,0.07,0.3
25%,0.0,7.125,0.08,0.525
50%,0.5,9.2,0.1,0.64
75%,1.0,11.3,0.12,0.7225
max,1.0,20.5,0.16,1.02


In [5]:
#Check if there is null data in the dataset
df.isna().sum()

Financial Condition    0
TotCap/Assets          0
TotExp/Assets          0
TotLns&Lses/Assets     0
dtype: int64

In [6]:
df.dtypes

Financial Condition      int64
TotCap/Assets          float64
TotExp/Assets          float64
TotLns&Lses/Assets     float64
dtype: object

In [7]:
df.columns

Index(['Financial Condition', 'TotCap/Assets', 'TotExp/Assets',
       'TotLns&Lses/Assets'],
      dtype='object')

In [8]:
df['Financial Condition'].unique()

array([1, 0], dtype=int64)

In [9]:
from sklearn.model_selection import train_test_split
y=df['Financial Condition']
X=df.drop(columns=['Financial Condition','TotCap/Assets'])
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3, random_state=0)

In [10]:
from sklearn.linear_model import LogisticRegression

logit_reg = LogisticRegression(penalty="l2", C=1e42, solver='liblinear')
logit_reg.fit(X_train, y_train)

LogisticRegression(C=1e+42, solver='liblinear')

In [11]:
from dmba.metric import AIC_score
print('intercept ', logit_reg.intercept_[0])
print(pd.DataFrame({'coeff': logit_reg.coef_[0]}, index=X.columns).transpose())
print('AIC', AIC_score(y_test, logit_reg.predict(X_test), df = len(X_train.columns)
+ 1))

intercept  -12.519508398826535
       TotExp/Assets  TotLns&Lses/Assets
coeff      84.005065              6.2125
AIC 14.276705583087743


In [12]:
#linear model

import statsmodels.api as sm
from statsmodels.formula.api import ols

X_train_1 = sm.add_constant(X_train)

model = sm.OLS(y_train, X_train_1).fit()
print(model.summary())

                             OLS Regression Results                            
Dep. Variable:     Financial Condition   R-squared:                       0.500
Model:                             OLS   Adj. R-squared:                  0.409
Method:                  Least Squares   F-statistic:                     5.501
Date:                 Mon, 22 May 2023   Prob (F-statistic):             0.0221
Time:                         14:01:53   Log-Likelihood:                -5.1642
No. Observations:                   14   AIC:                             16.33
Df Residuals:                       11   BIC:                             18.25
Df Model:                            2                                         
Covariance Type:             nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 -1.4



In [13]:
import numpy as np
from dmba import classificationSummary

y_pred = model.predict(X_train_1)
cutoff = 0.5                              
y_pred_classes = np.zeros_like(y_pred)
y_pred_classes[y_pred > cutoff] = 1


classificationSummary(y_train, y_pred_classes)

Confusion Matrix (Accuracy 0.9286)

       Prediction
Actual 0 1
     0 5 1
     1 0 8


In [14]:
classificationSummary(y_train, logit_reg.predict(X_train))
classificationSummary(y_test, logit_reg.predict(X_test))

Confusion Matrix (Accuracy 0.9286)

       Prediction
Actual 0 1
     0 5 1
     1 0 8
Confusion Matrix (Accuracy 0.8333)

       Prediction
Actual 0 1
     0 3 1
     1 0 2


In [15]:
B0 = logit_reg.intercept_[0]
B1 = logit_reg.coef_[0][0]
B2 = logit_reg.coef_[0][1]

In [16]:
print('Logit = ',B0,'+',B1,' * TotExp/Assets +',B2,'* TotLns&Lses/Assets')
print('Odds = exp(',B0,'+',B1,' * TotExp/Assets +',B2,'* TotLns&Lses/Assets')
print('Probability = 1/1 +exp(-(',B0,'+',B1,' * TotExp/Assets +',B2,'* TotLns&Lses/Assets ))')

Logit =  -12.519508398826535 + 84.00506521757427  * TotExp/Assets + 6.212500121412339 * TotLns&Lses/Assets
Odds = exp( -12.519508398826535 + 84.00506521757427  * TotExp/Assets + 6.212500121412339 * TotLns&Lses/Assets
Probability = 1/1 +exp(-( -12.519508398826535 + 84.00506521757427  * TotExp/Assets + 6.212500121412339 * TotLns&Lses/Assets ))


In [17]:
Total_loans_and_Leases_Asstes = 0.6
Total_expense_Assets = 0.11

In [18]:
Logit = B0 + B1 * Total_expense_Assets +B2 * Total_loans_and_Leases_Asstes
print(f'Logit: {Logit:.4f}')

Logit: 0.4485


In [19]:
odds = np.exp(Logit)
print(f'Odds: {odds:.4f}')

Odds: 1.5660


In [20]:
probability = 1 / (1 + np.exp(-Logit))
print(f'Probability: {probability:.4f}')

Probability: 0.6103


In [21]:
if(probability > 0.5):
    print('The bank is financially weak')
else:
    print('The bank is financially strong')

The bank is financially weak


In [22]:
#3.	The cutoff value of 0.5 is used in conjunction with the probability of being financially weak. Compute the threshold that should be used if we want to make a classification based on the odds of being financially weak, and the threshold for the corresponding logit.
odds_cutoff = cutoff/(1-cutoff)
print('The corresponding odd cutoff is', odds_cutoff)
print('The corresponding logit cutoff is', np.log(odds_cutoff))

The corresponding odd cutoff is 1.0
The corresponding logit cutoff is 0.0


In [23]:
#4.	Interpret the estimated coefficient for the total loans & leases to total assets ratio (TotLns&Lses/Assets) in terms of the odds of being financially weak.
print('the coefficient for total loans & leases to total assets is',B2)
print('the coefficient for total loans & leases to total assetes in terms of odds is', np.log(B2))

the coefficient for total loans & leases to total assets is 6.212500121412339
the coefficient for total loans & leases to total assetes in terms of odds is 1.8265634109659805
