https://www.kaggle.com/datasets/fedesoriano/company-bankruptcy-prediction



Taiwanese Bankruptcy Prediction
Donated on 6/27/2020
The data were collected from the Taiwan Economic Journal for the years 1999 to 2009. Company bankruptcy was defined based on the business regulations of the Taiwan Stock Exchange.



Source
Deron Liang and Chih-Fong Tsai, deronliang '@' gmail.com; cftsai '@' mgt.ncu.edu.tw, National Central University, Taiwan
The data was obtained from UCI Machine Learning Repository: https://archive.ics.uci.edu/ml/datasets/Taiwanese+Bankruptcy+Prediction

Relevant Papers
Liang, D., Lu, C.-C., Tsai, C.-F., and Shih, G.-A. (2016) Financial Ratios and Corporate Governance Indicators in Bankruptcy Prediction: A Comprehensive Study. European Journal of Operational Research, vol. 252, no. 2, pp. 561-572.
https://www.sciencedirect.com/science/article/pii/S0377221716000412

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
df = pd.read_csv("data.zip")
df.shape

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df["Bankrupt?"].value_counts()

In [None]:
df.describe()

In [None]:
df.describe().T.describe().round(2)

In [None]:
df_scaling = (df - df.mean()) / df.std()
df_scaling.describe().round(2)

In [None]:
df_scaling.describe().T.describe().round(4)

In [None]:
sns.violinplot(df_scaling)

In [None]:
lable_name = "Bankrupt?"

X_raw = df.drop(columns=lable_name)
y_raw = df[lable_name]

X_raw.shape, y_raw.shape

In [None]:
from imblearn.over_sampling import SMOTE

sm = SMOTE(random_state=42)
X, y = sm.fit_resample(X_raw, y_raw)
X.shape, y.shape

In [None]:
X

In [None]:
y.value_counts()

In [None]:
from sklearn.preprocessing import OneHotEncoder
ohe = OneHotEncoder()
y_ohe = ohe.fit_transform(y.to_frame())
y_ohe = y_ohe.toarray()
print(ohe.get_feature_names_out())

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_valid, y_train, y_valid = train_test_split(
    X.values, y_ohe, test_size=0.33, random_state=42)

X_train = torch.Tensor(X_train)
X_valid = torch.Tensor(X_valid)
y_train = torch.Tensor(y_train)
y_valid = torch.Tensor(y_valid)
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

In [None]:
input_size = X_train.shape[1]
input_size

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, y_train.shape[1]),
        )

    def forward(self, x):
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3) 
print(model)

In [None]:
logits = model(torch.FloatTensor(X_train))
pred_prob = nn.Softmax(dim=1)(logits)
y_pred = pred_prob.argmax(1)
print(f"Predicted class: {y_pred[:10]}")

In [None]:
nb_epochs = 1000
for epoch in range(nb_epochs+1):
    
    hypothesis = model(torch.FloatTensor(X_train))
#     cost = (y_train * -torch.log(hypothesis)).sum(dim=1).mean()
    cost = F.cross_entropy(hypothesis, torch.FloatTensor(y_train))
    optimizer.zero_grad()
    # gradient
    cost.backward()
    # Weight and bias update
    optimizer.step()

    if epoch % 100 == 0:
        print(f'Epoch {epoch:4d}/{nb_epochs} Cost: {cost.item():.6f}')

In [None]:
# 모델 저장
torch.save(model.state_dict(), 'model.pth')

# 저장된 모델 불러오기
loaded_model = NeuralNetwork()
loaded_model.load_state_dict(torch.load('model.pth'))
loaded_model.eval()