# Importação dos Dados

In [1]:
# Manipulação de Dados
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Pré-processamento dos Dados
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# Modelos Utilizados
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier

# Otimização
from sklearn.model_selection import train_test_split, GridSearchCV

# Métricas de Avaliação: Classificação
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, roc_curve, auc, RocCurveDisplay

In [2]:
# Importação
dfs = pd.read_excel("../data/dados.xlsx", sheet_name = None, index_col = "Empresa")
print("DataFrames disponíveis: ", list(dfs.keys())) # representam indicadores de anos antes da falência

DataFrames disponíveis:  ['df_1y', 'df_2y', 'df_3y', 'df_4y', 'df_5y']


# Tratamento

In [4]:
def tratamento(n):
    # Selecione dados de 1 a 5 anos pré-falência: 
    df = dfs[list(dfs.keys())[n-1]]
    print("DataFrame Escolhido:", n)
    
    # Apagando NA's
    df.dropna(inplace = True)
    print("\nQuantidade de Amostras:\n", df["Alvo"].value_counts())

    # Treino e Teste
    X = df.drop("Alvo", axis = 1)
    y = df["Alvo"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

    # Transformações Lineares
    scaler = StandardScaler()
    X_train_norm = scaler.fit_transform(X_train)
    X_test_norm = scaler.transform(X_test)
    pca = PCA()
    X_train = pca.fit_transform(X_train_norm)
    X_test = pca.transform(X_test_norm)

    print("\nExplicação de Cada Componente:", pca.explained_variance_ratio_.cumsum().round(2))

In [20]:
# Regressão Logística
modelo = LogisticRegression(max_iter = 1000, random_state = 42)
modelo.fit(X_train, y_train)
y_pred = modelo.predict(X_test)
y_pred_train = modelo.predict(X_train)
y_pred_proba_train = modelo.predict_proba(X_train)[:,1]
y_pred_proba = modelo.predict_proba(X_test)[:,1]

# Teste
print(confusion_matrix(y_test, y_pred))
print()
print(accuracy_score(y_test, y_pred))
print()
print(classification_report(y_test, y_pred))

# Calculando a AUC (Area Under the Curve)
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
print("AUC - Teste :", roc_auc)

[[7 1]
 [2 3]]

0.7692307692307693

              precision    recall  f1-score   support

           0       0.78      0.88      0.82         8
           1       0.75      0.60      0.67         5

    accuracy                           0.77        13
   macro avg       0.76      0.74      0.75        13
weighted avg       0.77      0.77      0.76        13

AUC - Teste : 0.575


In [21]:
# Random Forest
modelo = RandomForestClassifier(random_state = 42)
modelo.fit(X_train, y_train)

y_pred = modelo.predict(X_test)
y_pred_train = modelo.predict(X_train)
y_pred_proba = modelo.predict_proba(X_test)[:,1]

# Teste
print(confusion_matrix(y_test, y_pred))
print()
print(accuracy_score(y_test, y_pred))
print()
print(classification_report(y_test, y_pred))

# Calculando a AUC (Area Under the Curve)
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
print("AUC - Teste :", roc_auc)

[[7 1]
 [2 3]]

0.7692307692307693

              precision    recall  f1-score   support

           0       0.78      0.88      0.82         8
           1       0.75      0.60      0.67         5

    accuracy                           0.77        13
   macro avg       0.76      0.74      0.75        13
weighted avg       0.77      0.77      0.76        13

AUC - Teste : 0.8875


In [22]:
# Rede Neural
modelo = MLPClassifier(random_state = 42)
modelo.fit(X_train, y_train)

y_pred = modelo.predict(X_test)
y_pred_train = modelo.predict(X_train)
y_pred_proba = modelo.predict_proba(X_test)[:,1]

# Teste
print(confusion_matrix(y_test, y_pred))
print()
print(accuracy_score(y_test, y_pred))
print()
print(classification_report(y_test, y_pred))

# Calculando a AUC (Area Under the Curve)
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)
print("AUC - Teste :", roc_auc)

[[7 1]
 [3 2]]

0.6923076923076923

              precision    recall  f1-score   support

           0       0.70      0.88      0.78         8
           1       0.67      0.40      0.50         5

    accuracy                           0.69        13
   macro avg       0.68      0.64      0.64        13
weighted avg       0.69      0.69      0.67        13

AUC - Teste : 0.5


