## Imports

In [77]:
%load_ext lab_black
import pandas as pd
import numpy as np
from sdv.metadata import SingleTableMetadata
from sdv.single_table import (
    CTGANSynthesizer,
    TVAESynthesizer,
    GaussianCopulaSynthesizer,
    CopulaGANSynthesizer,
)
from sdv.lite import SingleTablePreset
from sdv.evaluation.single_table import evaluate_quality
import warnings
import time
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.subplots as sp
import plotly.graph_objects as go
import time

sns.set(style="darkgrid", font_scale=0.5)
custom_palette = ["#8b4513", "#90ee90", "#545454", "#6a287e", "#f0be00"]
sns.set_palette(custom_palette)

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.model_selection import (
    train_test_split,
    GridSearchCV,
    cross_val_score,
    StratifiedKFold,
)
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import (
    confusion_matrix,
    accuracy_score,
    classification_report,
    roc_auc_score,
    f1_score,
    precision_score,
)

warnings.filterwarnings("ignore")

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


## Funções

In [78]:
def dummy(df, columns):
    dummy_variables = []
    for column in columns:
        dummies = pd.get_dummies(df[column], prefix=column)
        dummy_variables.append(dummies)
    return dummy_variables

In [79]:
def evaluate_models(X_train, X_test, y_train, y_test, standardscaler=True):
    if standardscaler:
        scaler = StandardScaler().fit(X_train)
    else:
        scaler = MinMaxScaler().fit(X_train)

    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    models = [
        ("SVM", SVC(), {"C": [0.1, 0.5, 1, 5, 10], "kernel": ["linear", "rbf"]}),
        (
            "Decision Tree",
            DecisionTreeClassifier(),
            {"max_depth": [None, 1, 2, 5, 10], "min_samples_split": [2, 3, 4, 5, 10]},
        ),
        (
            "KNN",
            KNeighborsClassifier(),
            {"n_neighbors": [1, 2, 3, 4, 5, 7, 10], "weights": ["uniform", "distance"]},
        ),
        (
            "Random Forest",
            RandomForestClassifier(),
            {"n_estimators": [100, 200, 300], "max_depth": [None, 5, 10]},
        ),
        (
            "Logistic Regression",
            LogisticRegression(max_iter=1000),
            {"C": [0.1, 0.5, 1, 5, 10], "solver": ["liblinear", "sag", "saga"]},
        ),
        (
            "MLP",
            MLPClassifier(max_iter=1000),
            {"hidden_layer_sizes": [(100,), (100, 50)], "alpha": [0.0001, 0.001, 0.01]},
        ),
    ]

    results = []

    stratified_cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

    for name, model, param_grid in models:
        start_time = time.time()

        clf = GridSearchCV(model, param_grid, cv=stratified_cv)
        clf.fit(X_train_scaled, y_train)

        best_model = clf.best_estimator_
        best_params = clf.best_params_

        y_pred = best_model.predict(X_test_scaled)

        accuracy = accuracy_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_pred)

        execution_time = time.time() - start_time

        results.append(
            {
                "Classificador": name,
                "Acurácia": accuracy,
                "Precisão": precision,
                "F1 Score": f1,
                "ROC/AUC": roc_auc,
                "Tempo": execution_time,
                "Best Parameters": best_params,
            }
        )

    results_df = pd.DataFrame(results)
    return results_df

In [80]:
def plot_confusion_matrix(model, y_pred, y_test):
    cm = confusion_matrix(y_test, y_pred)
    classes = np.unique(y_test)

    fig = sp.make_subplots(rows=1, cols=1)

    fig.add_trace(
        go.Heatmap(
            z=cm,
            x=classes,
            y=classes,
            colorscale="Blues",
            reversescale=True,
            showscale=False,
            hoverongaps=False,
            hovertemplate="Actual: %{y}Predicted: %{x}Count: %{z}",
        )
    )

    annotations = []
    for i in range(len(classes)):
        for j in range(len(classes)):
            annotations.append(
                dict(
                    x=classes[j],
                    y=classes[i],
                    text=str(cm[i, j]),
                    showarrow=False,
                    font=dict(color="black"),
                )
            )

    fig.update_layout(
        title="Confusion Matrix", width=600, height=500, annotations=annotations
    )

    fig.update_xaxes(title_text="Predicted")
    fig.update_yaxes(title_text="Actual")

    fig.show()

## Dataset Analysis

In [81]:
df = pd.read_csv("heart_cleveland.csv")
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,1,3,152,223,0,0,181,0,0.0,0,0,2,1
293,39,1,3,118,219,0,0,140,0,1.2,1,0,2,1
294,35,1,3,120,198,0,0,130,1,1.6,1,0,2,1
295,35,0,3,138,183,0,0,182,0,1.4,0,0,0,0


### Informações Gerais sobre o Dataset

| Nome do Atributo | Papel | Tipo | Descrição | Unidades |
| --- | --- | --- | --- | --- |
| age | Feature | Discreto | Idade | anos |
| sex | Feature | Categórico | Sexo | 1 = masculino, 0 = feminino |
| cp | Feature | Categórico | Tipo de Dor no Peito | 0 = angina típica; 1 = angina atípica; 2 = dor não anginosa; 3 = assintomático |
| trestbps | Feature | Discreto | Pressão Arterial de Repouso (na admissão ao hospital) | mm Hg |
| chol | Feature | Discreto | Colesterol Sérico | mg/dl |
| fbs | Feature | Categórico | Açúcar no Sangue em Jejum > 120 mg/dl | 1 = true; 0 = false |
| restecg | Feature | Categórico | Eletrocardiograma de Repouso | 0 = normal; 1 = anormalidade na onda ST-T; 2 = hipertrofia ventricular esquerda |
| thalach | Feature | Discreto | Frequência Cardíaca Máxima Atingida | Hz |
| exang | Feature | Categórico | Angina Induzida por Exercício | 1 = true; 0 = false |
| oldpeak | Feature | Discreto | Depressão de ST Induzida por Exercício em Relação ao Repouso | float |
| slope | Feature | Categórico | Inclinação do Segmento ST de Pico do Exercício | 0 = ascendente; 1 = plano; 2 = descendente |
| ca | Feature | Discreto | Número de Vasos Principais coloridos por Fluoroscopia | 0-3 |
| thal | Feature | Categórico | Classificação da Frequência Cardíaca Máxima Atingida | 0 = normal; 1 = defeito fixo; 2 = defeito reversível |
| condition | Label | Categórico | Condição | 0 = sem doença; 1 = doença |


In [82]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 297 entries, 0 to 296
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   age        297 non-null    int64  
 1   sex        297 non-null    int64  
 2   cp         297 non-null    int64  
 3   trestbps   297 non-null    int64  
 4   chol       297 non-null    int64  
 5   fbs        297 non-null    int64  
 6   restecg    297 non-null    int64  
 7   thalach    297 non-null    int64  
 8   exang      297 non-null    int64  
 9   oldpeak    297 non-null    float64
 10  slope      297 non-null    int64  
 11  ca         297 non-null    int64  
 12  thal       297 non-null    int64  
 13  condition  297 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 32.6 KB


In [83]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
age,297.0,54.542088,9.049736,29.0,48.0,56.0,61.0,77.0
sex,297.0,0.676768,0.4685,0.0,0.0,1.0,1.0,1.0
cp,297.0,2.158249,0.964859,0.0,2.0,2.0,3.0,3.0
trestbps,297.0,131.693603,17.762806,94.0,120.0,130.0,140.0,200.0
chol,297.0,247.350168,51.997583,126.0,211.0,243.0,276.0,564.0
fbs,297.0,0.144781,0.352474,0.0,0.0,0.0,0.0,1.0
restecg,297.0,0.996633,0.994914,0.0,0.0,1.0,2.0,2.0
thalach,297.0,149.599327,22.941562,71.0,133.0,153.0,166.0,202.0
exang,297.0,0.326599,0.469761,0.0,0.0,0.0,1.0,1.0
oldpeak,297.0,1.055556,1.166123,0.0,0.0,0.8,1.6,6.2


## Pre-processamento utilizando OneHotEncoder

In [84]:
cat_features = ["sex", "restecg", "slope", "cp", "exang", "thal", "ca", "fbs"]
num_features = ["age", "oldpeak", "trestbps", "thalach", "chol"]

In [85]:
dummy_variables = dummy(df, cat_features)
df = pd.concat([df] + dummy_variables, axis=1)
df = df.drop(cat_features, axis=1)

In [86]:
df

Unnamed: 0,age,trestbps,chol,thalach,oldpeak,condition,sex_0,sex_1,restecg_0,restecg_1,...,exang_1,thal_0,thal_1,thal_2,ca_0,ca_1,ca_2,ca_3,fbs_0,fbs_1
0,69,160,234,131,0.1,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
1,69,140,239,151,1.8,0,1,0,1,0,...,0,1,0,0,0,0,1,0,1,0
2,66,150,226,114,2.6,0,1,0,1,0,...,0,1,0,0,1,0,0,0,1,0
3,65,138,282,174,1.4,1,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
4,64,110,211,144,1.8,0,0,1,0,0,...,1,1,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,152,223,181,0.0,1,0,1,1,0,...,0,0,0,1,1,0,0,0,1,0
293,39,118,219,140,1.2,1,0,1,1,0,...,0,0,0,1,1,0,0,0,1,0
294,35,120,198,130,1.6,1,0,1,1,0,...,1,0,0,1,1,0,0,0,1,0
295,35,138,183,182,1.4,0,1,0,1,0,...,0,1,0,0,1,0,0,0,1,0


## Divisão Treino e Teste

In [87]:
y = df["condition"]
X = df.drop("condition", axis=1)

In [88]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42
)

In [89]:
print(
    f"X_train: {X_train.shape} \nX_test: {X_test.shape} \ny_train: {y_train.shape} \ny_test: {y_test.shape}"
)

X_train: (207, 28) 
X_test: (90, 28) 
y_train: (207,) 
y_test: (90,)


## Avaliação dos Modelos sem Dados Incrementados

In [90]:
results_df = evaluate_models(X_train, X_test, y_train, y_test, standardscaler=False)
print(results_df)

         Classificador  Acurácia  Precisão  F1 Score   ROC/AUC      Tempo  \
0                  SVM  0.822222  0.809524  0.809524  0.821429   0.154994   
1        Decision Tree  0.744444  0.711111  0.735632  0.745536   0.203001   
2                  KNN  0.855556  0.822222  0.850575  0.857143   3.825998   
3        Random Forest  0.822222  0.782609  0.818182  0.824405  12.298000   
4  Logistic Regression  0.833333  0.787234  0.831461  0.836310   0.299032   
5                  MLP  0.844444  0.818182  0.837209  0.845238  12.358001   

                                   Best Parameters  
0                      {'C': 0.1, 'kernel': 'rbf'}  
1         {'max_depth': 1, 'min_samples_split': 2}  
2         {'n_neighbors': 5, 'weights': 'uniform'}  
3            {'max_depth': 5, 'n_estimators': 100}  
4                  {'C': 5, 'solver': 'liblinear'}  
5  {'alpha': 0.0001, 'hidden_layer_sizes': (100,)}  


### Melhor Modelo sem os Dados Incrementados: KNN

## Criando os Dados Sintetizados

In [91]:
df = pd.read_csv("heart_cleveland.csv")
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,1,3,152,223,0,0,181,0,0.0,0,0,2,1
293,39,1,3,118,219,0,0,140,0,1.2,1,0,2,1
294,35,1,3,120,198,0,0,130,1,1.6,1,0,2,1
295,35,0,3,138,183,0,0,182,0,1.4,0,0,0,0


In [92]:
metadata = SingleTableMetadata()
metadata.detect_from_dataframe(data=df)
metadata

{
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1",
    "columns": {
        "age": {
            "sdtype": "numerical"
        },
        "sex": {
            "sdtype": "numerical"
        },
        "cp": {
            "sdtype": "numerical"
        },
        "trestbps": {
            "sdtype": "numerical"
        },
        "chol": {
            "sdtype": "numerical"
        },
        "fbs": {
            "sdtype": "numerical"
        },
        "restecg": {
            "sdtype": "numerical"
        },
        "thalach": {
            "sdtype": "numerical"
        },
        "exang": {
            "sdtype": "numerical"
        },
        "oldpeak": {
            "sdtype": "numerical"
        },
        "slope": {
            "sdtype": "numerical"
        },
        "ca": {
            "sdtype": "numerical"
        },
        "thal": {
            "sdtype": "numerical"
        },
        "condition": {
            "sdtype": "numerical"
        }
    }
}

In [93]:
# check errors

metadata.validate()

- cat_features = ["sex", "restecg", "slope", "cp", "exang", "thal", "ca", "fbs"]
- num_features = ["age", "oldpeak", "trestbps", "thalach", "chol"]

In [94]:
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,1,3,152,223,0,0,181,0,0.0,0,0,2,1
293,39,1,3,118,219,0,0,140,0,1.2,1,0,2,1
294,35,1,3,120,198,0,0,130,1,1.6,1,0,2,1
295,35,0,3,138,183,0,0,182,0,1.4,0,0,0,0


In [36]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 297 entries, 0 to 296
Data columns (total 14 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   age        297 non-null    int64  
 1   sex        297 non-null    int64  
 2   cp         297 non-null    int64  
 3   trestbps   297 non-null    int64  
 4   chol       297 non-null    int64  
 5   fbs        297 non-null    int64  
 6   restecg    297 non-null    int64  
 7   thalach    297 non-null    int64  
 8   exang      297 non-null    int64  
 9   oldpeak    297 non-null    float64
 10  slope      297 non-null    int64  
 11  ca         297 non-null    int64  
 12  thal       297 non-null    int64  
 13  condition  297 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 32.6 KB


In [44]:
metadata.update_column(
    column_name="age", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="oldpeak", sdtype="numerical", computer_representation="Float"
)

metadata.update_column(
    column_name="trestbps", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="thalach", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="chol", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="sex", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="restecg", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="slope", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="cp", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="exang", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="thal", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="ca", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="fbs", sdtype="numerical", computer_representation="Int32"
)

metadata.update_column(
    column_name="condition", sdtype="numerical", computer_representation="Int32"
)

In [45]:
metadata

{
    "METADATA_SPEC_VERSION": "SINGLE_TABLE_V1",
    "columns": {
        "age": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "sex": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "cp": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "trestbps": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "chol": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "fbs": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "restecg": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "thalach": {
            "sdtype": "numerical",
            "computer_representation": "Int32"
        },
        "exang": {
            "sdtype":

### Constraints

In [52]:
age_constraint = {
    "constraint_class": "ScalarRange",
    "constraint_parameters": {
        "column_name": "age",
        "low_value": 25.0,
        "high_value": 80.0,
        "strict_boundaries": False,
    },
}

### Synthesizer

In [53]:
synthesizer = CTGANSynthesizer(metadata)
synthesizer.add_constraints(constraints=[age_constraint])

In [54]:
%%time
synthesizer.fit(df)

CPU times: total: 16min 21s
Wall time: 2min 44s


In [62]:
df_synth = synthesizer.sample(203)

Sampling rows: 100%|███████████████████████████████████████████████████████████████████████████████████████████| 203/203 [00:00<00:00, 1046.41it/s]


In [61]:
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,1,3,152,223,0,0,181,0,0.0,0,0,2,1
293,39,1,3,118,219,0,0,140,0,1.2,1,0,2,1
294,35,1,3,120,198,0,0,130,1,1.6,1,0,2,1
295,35,0,3,138,183,0,0,182,0,1.4,0,0,0,0


In [65]:
df_new = pd.concat([df, df_synth])
df_new

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,62,1,3,110,203,0,0,163,0,0.1,1,0,0,1
199,47,0,1,130,257,0,2,192,0,0.1,0,2,0,1
200,52,1,1,133,162,1,2,175,0,0.0,2,0,2,1
201,55,1,2,150,156,0,2,155,1,2.9,1,0,0,0


In [66]:
df = df_new

### Pre-processamento utilizando OneHotEncoder

In [67]:
cat_features = ["sex", "restecg", "slope", "cp", "exang", "thal", "ca", "fbs"]
num_features = ["age", "oldpeak", "trestbps", "thalach", "chol"]

In [68]:
dummy_variables = dummy(df, cat_features)
df = pd.concat([df] + dummy_variables, axis=1)
df = df.drop(cat_features, axis=1)

In [69]:
df

Unnamed: 0,age,trestbps,chol,thalach,oldpeak,condition,sex_0,sex_1,restecg_0,restecg_1,...,exang_1,thal_0,thal_1,thal_2,ca_0,ca_1,ca_2,ca_3,fbs_0,fbs_1
0,69,160,234,131,0.1,0,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
1,69,140,239,151,1.8,0,1,0,1,0,...,0,1,0,0,0,0,1,0,1,0
2,66,150,226,114,2.6,0,1,0,1,0,...,0,1,0,0,1,0,0,0,1,0
3,65,138,282,174,1.4,1,0,1,0,0,...,0,1,0,0,0,1,0,0,0,1
4,64,110,211,144,1.8,0,0,1,0,0,...,1,1,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
198,62,110,203,163,0.1,1,0,1,1,0,...,0,1,0,0,1,0,0,0,1,0
199,47,130,257,192,0.1,1,1,0,0,0,...,0,1,0,0,0,0,1,0,1,0
200,52,133,162,175,0.0,1,0,1,0,0,...,0,0,0,1,1,0,0,0,0,1
201,55,150,156,155,2.9,0,0,1,0,0,...,1,1,0,0,1,0,0,0,1,0


### Divisão Treino e Teste

In [70]:
y = df["condition"]
X = df.drop("condition", axis=1)

In [74]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42
)

In [75]:
print(
    f"X_train: {X_train.shape} \nX_test: {X_test.shape} \ny_train: {y_train.shape} \ny_test: {y_test.shape}"
)

X_train: (400, 28) 
X_test: (100, 28) 
y_train: (400,) 
y_test: (100,)


### Avaliação dos Modelos sem Dados Incrementados

In [76]:
results_df = evaluate_models(X_train, X_test, y_train, y_test, standardscaler=False)
print(results_df)

         Classificador  Acurácia  Precisão  F1 Score   ROC/AUC      Tempo  \
0                  SVM      0.75  0.764706  0.806202  0.721101   0.352999   
1        Decision Tree      0.67  0.670732  0.769231  0.604666   0.233036   
2                  KNN      0.71  0.775862  0.756303  0.702186   3.937965   
3        Random Forest      0.67  0.741379  0.722689  0.660151  12.739032   
4  Logistic Regression      0.67  0.750000  0.717949  0.664775   0.727001   
5                  MLP      0.69  0.777778  0.730435  0.690416  36.449968   

                                 Best Parameters  
0                      {'C': 1, 'kernel': 'rbf'}  
1       {'max_depth': 2, 'min_samples_split': 2}  
2      {'n_neighbors': 7, 'weights': 'distance'}  
3         {'max_depth': 10, 'n_estimators': 100}  
4                {'C': 1, 'solver': 'liblinear'}  
5  {'alpha': 0.01, 'hidden_layer_sizes': (100,)}  


### Melhor Modelo sem os Dados Incrementados: KNN