# Importing Libraries

In [1]:
import pandas as pd
import numpy as np

import plotly.express as px
import plotly.graph_objects as go

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report, confusion_matrix
from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier

import warnings
warnings.filterwarnings("ignore")

# Load the data

In [2]:
df=pd.read_csv("C:/Users/amani/Downloads/financial/data.csv")

# Data Exploration

In [3]:
df.head()

Unnamed: 0,Bankrupt?,ROA(C) before interest and depreciation before interest,ROA(A) before interest and % after tax,ROA(B) before interest and depreciation after tax,Operating Gross Margin,Realized Sales Gross Margin,Operating Profit Rate,Pre-tax net Interest Rate,After-tax net Interest Rate,Non-industry income and expenditure/revenue,...,Net Income to Total Assets,Total assets to GNP price,No-credit Interval,Gross Profit to Sales,Net Income to Stockholder's Equity,Liability to Equity,Degree of Financial Leverage (DFL),Interest Coverage Ratio (Interest expense to EBIT),Net Income Flag,Equity to Liability
0,1,0.370594,0.424389,0.40575,0.601457,0.601457,0.998969,0.796887,0.808809,0.302646,...,0.716845,0.009219,0.622879,0.601453,0.82789,0.290202,0.026601,0.56405,1,0.016469
1,1,0.464291,0.538214,0.51673,0.610235,0.610235,0.998946,0.79738,0.809301,0.303556,...,0.795297,0.008323,0.623652,0.610237,0.839969,0.283846,0.264577,0.570175,1,0.020794
2,1,0.426071,0.499019,0.472295,0.60145,0.601364,0.998857,0.796403,0.808388,0.302035,...,0.77467,0.040003,0.623841,0.601449,0.836774,0.290189,0.026555,0.563706,1,0.016474
3,1,0.399844,0.451265,0.457733,0.583541,0.583541,0.9987,0.796967,0.808966,0.30335,...,0.739555,0.003252,0.622929,0.583538,0.834697,0.281721,0.026697,0.564663,1,0.023982
4,1,0.465022,0.538432,0.522298,0.598783,0.598783,0.998973,0.797366,0.809304,0.303475,...,0.795016,0.003878,0.623521,0.598782,0.839973,0.278514,0.024752,0.575617,1,0.03549


In [4]:
df.isnull().sum()

Bankrupt?                                                   0
 ROA(C) before interest and depreciation before interest    0
 ROA(A) before interest and % after tax                     0
 ROA(B) before interest and depreciation after tax          0
 Operating Gross Margin                                     0
                                                           ..
 Liability to Equity                                        0
 Degree of Financial Leverage (DFL)                         0
 Interest Coverage Ratio (Interest expense to EBIT)         0
 Net Income Flag                                            0
 Equity to Liability                                        0
Length: 96, dtype: int64

In [5]:
df.shape

(6819, 96)

In [6]:
X=df.drop(['Bankrupt?'],axis=1)
y=df['Bankrupt?']

# Handling Class imbalance

In [7]:
from imblearn.over_sampling import RandomOverSampler
ros=RandomOverSampler()
X_resampled, y_resampled = ros.fit_resample(X, y)

df=pd.concat([X_resampled, y_resampled], axis=1)

# Feature Selection - apply PCA to select top 10 features

In [8]:
n_components = 10
pca = PCA(n_components=n_components)
X_pca = pca.fit_transform(X_resampled)

In [9]:
X_train,X_test,y_train,y_test = train_test_split(X_pca,y_resampled,test_size = 0.2, random_state = 42)

# Training Various classification models

In [10]:
X_train,X_test,y_train,y_test = train_test_split(X_pca,y_resampled,test_size = 0.2, random_state = 42)

In [12]:
models = [
    LogisticRegression(),
    RandomForestClassifier(),
    DecisionTreeClassifier(),
    GradientBoostingClassifier(),
]

In [13]:
for model in models:
    print(f'Regressor: {model.__class__.__name__}')
    
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    print("Confusin Matrix:\n")
    print(confusion_matrix(y_test,y_pred))
    print()
    print("Classification Report:\n")
    print(classification_report(y_test,y_pred))
    print("------------------------------------------")

Regressor: LogisticRegression
Confusin Matrix:

[[898 436]
 [585 721]]

Classification Report:

              precision    recall  f1-score   support

           0       0.61      0.67      0.64      1334
           1       0.62      0.55      0.59      1306

    accuracy                           0.61      2640
   macro avg       0.61      0.61      0.61      2640
weighted avg       0.61      0.61      0.61      2640

------------------------------------------
Regressor: RandomForestClassifier
Confusin Matrix:

[[1327    7]
 [   0 1306]]

Classification Report:

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      1334
           1       0.99      1.00      1.00      1306

    accuracy                           1.00      2640
   macro avg       1.00      1.00      1.00      2640
weighted avg       1.00      1.00      1.00      2640

------------------------------------------
Regressor: DecisionTreeClassifier
Confusin Matrix:

[[1282  