In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np

# Carregar o conjunto de dados
df = pd.read_csv('Dados_Trabalho_TP2.csv')

# Pré-processar os dados
le = LabelEncoder()
for column in df.columns:
    if df[column].dtype == object:
        df[column] = le.fit_transform(df[column])

# Dividir o conjunto de dados em recursos (X) e rótulos (y)
X = df.drop('Label', axis=1)
y = df['Label']

# Dividir o conjunto de dados em treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
#Arvores de decisao com K-Fold Cross-Validation

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from sklearn.metrics import recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler


# Carregar o conjunto de dados
df = pd.read_csv('Dados_Trabalho_TP2.csv')

# Pré-processar os dados
le = LabelEncoder()
for column in df.columns:
    if df[column].dtype == object:
        df[column] = le.fit_transform(df[column])

# Dividir o conjunto de dados em recursos (X) e rótulos (y)
X = df.drop('Label', axis=1)
y = df['Label']

# Normalizar os dados
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Dividir o conjunto de dados em treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir o número de folds
k = 10
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Criar um modelo de Árvore de Decisão
model1 = DecisionTreeClassifier(max_depth=12, min_samples_leaf=5, random_state=42)

scores1 = []
sensitivities = []
specificities = []
f1_scores = []
epsilon = 1e-7  # para evitar divisão por zero
prevs_folds = []
y_folds = []

# Iterar através de cada fold
for train_index, test_index in kf.split(X_train):
    X_train_fold, X_test_fold = X_train.iloc[train_index], X_train.iloc[test_index]
    y_train_fold, y_test_fold = y_train.iloc[train_index], y_train.iloc[test_index]

    # Treinar o modelo nos dados de treino
    model1.fit(X_train_fold, y_train_fold)

    # Fazer previsões nos dados de teste
    y_pred = model1.predict(X_test_fold)

    # Calcular a precisão para este fold
    fold_score = accuracy_score(y_test_fold, y_pred) * 100

    # Calcular a sensibilidade (recall), especificidade e F1 para este fold
    sensitivity = recall_score(y_test_fold, y_pred, average='macro')
    cm = confusion_matrix(y_test_fold, y_pred)
    specificity = cm[0,0] / (cm[0,0] + cm[0,1] + epsilon)
    f1 = f1_score(y_test_fold, y_pred, average='macro')

    # Adicionar as métricas deste fold às listas correspondentes
    scores1.append(fold_score)
    sensitivities.append(sensitivity)
    specificities.append(specificity)
    f1_scores.append(f1)

    # Adicionar as previsões e os labels do conjunto de teste
    prevs_folds.append(y_pred)
    y_folds.append(y_test_fold)

# Calcular a média e o desvio padrão das precisões
mean_accuracy = np.mean(scores1)
std_accuracy = np.std(scores1)

# Calcular a média das sensibilidades, especificidades e F1
mean_sensitivity = np.mean(sensitivities)
mean_specificity = np.mean(specificities)
mean_f1 = np.mean(f1_scores)

print(f"K-Fold Cross-Validation Scores: {scores1}")
print(f"Mean Accuracy: {mean_accuracy}")
print(f"Standard Deviation: {std_accuracy}")
print(f"Mean Sensitivity: {mean_sensitivity}")
print(f"Mean Specificity: {mean_specificity}")
print(f"Mean F1 Score: {mean_f1}")

In [None]:
# SVM

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import numpy as np
from sklearn.metrics import recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler



# Carregar o conjunto de dados 
df = pd.read_csv('Dados_Trabalho_TP2.csv')

# Pré-processar os dados
le = LabelEncoder()
for column in df.columns:
    if df[column].dtype == object:
        df[column] = le.fit_transform(df[column])

# Dividir o conjunto de dados em recursos (X) e rótulos (y)
X = df.drop('Label', axis=1)
y = df['Label']

# Normalizar os dados
scaler = StandardScaler()
X = pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# Dividir o conjunto de dados em treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir o número de folds
k = 5
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Definindo o modelo SVM com kernel linear
kernel = 'linear'
svm = SVC(kernel=kernel)

# Ajuste de parâmetros utilizando GridSearchCV para evitar overfitting
param_grid = {'C': [0.1, 1, 10, 100, 1000], 'gamma': ['scale', 'auto']}
grid = GridSearchCV(svm, param_grid, cv=kf, refit=True, scoring='accuracy', n_jobs=-1)
grid.fit(X_train, y_train)

best_model = grid.best_estimator_

# Avaliar o modelo usando k-fold cross validation
scores = []
sensitivities = []
specificities = []
f1_scores = []
epsilon = 1e-7  # para evitar divisão por zero

for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

    best_model.fit(X_train, y_train)
    y_pred = best_model.predict(X_test)
    fold_score = accuracy_score(y_test, y_pred)
    scores.append(fold_score)



    # Calcular a sensibilidade (recall), especificidade e F1 para este fold
    sensitivity = recall_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    specificity = cm[0,0] / (cm[0,0] + cm[0,1] + epsilon)
    f1 = f1_score(y_test, y_pred, average='macro')

    # Adicionar as métricas deste fold às listas correspondentes
    sensitivities.append(sensitivity)
    specificities.append(specificity)
    f1_scores.append(f1)

mean_score = np.mean(scores) * 100
std_score = np.std(scores)

# Calcular a média das sensibilidades, especificidades e F1
mean_sensitivity = np.mean(sensitivities)
mean_specificity = np.mean(specificities)
mean_f1 = np.mean(f1_scores)

print(f"Kernel: {kernel}, Best Params: {grid.best_params_}, Mean Accuracy: {mean_score}, Std Accuracy: {std_score}")
print(f"Mean Sensitivity: {mean_sensitivity}")
print(f"Mean Specificity: {mean_specificity}")
print(f"Mean F1 Score: {mean_f1}")

In [None]:
#melhor redes Nadam normal

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import recall_score, f1_score, confusion_matrix

import tensorflow as tf
from tensorflow import keras
from keras import layers

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
import numpy as np
epsilon = 1e-10


# Carregar o conjunto de dados
df = pd.read_csv('Dados_Trabalho_TP2.csv')

# Pré-processar os dados
le = LabelEncoder()
for column in df.columns:
    if df[column].dtype == object:
        df[column] = le.fit_transform(df[column])

# Dividir o conjunto de dados em recursos (X) e rótulos (y)
X = df.drop('Label', axis=1)
y = df['Label']

kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
# Dividir o conjunto de dados em treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# np.random.seed(0)
tf.random.set_seed(0)

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

optimizers = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
initializers = ['uniform', 'lecun_uniform', 'normal', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']

acc_per_fold = []
loss_per_fold = []
f1score_per_fold = []
fold_no = 1
epochs_list = [10, 20, 30]
batch_sizes = [16, 32, 64]
results = []

for optimizer in optimizers:
    for initializer in initializers:
        for train, test in kfold.split(X_train, y_train):
            model3 = tf.keras.Sequential([
            tf.keras.Input(shape=(17,)),
            tf.keras.layers.Dense(units=64, activation="relu", kernel_initializer=initializer),  
            tf.keras.layers.Dense(units=48, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=32, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=32, activation="relu", kernel_initializer=initializer), 
            tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer=initializer),
            tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer=initializer),  
            tf.keras.layers.Dense(units=9, activation="softmax", kernel_initializer=initializer)
        ])

        model3.compile(optimizer=optimizer,
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

        # Treina o modelo
        history4 = model3.fit(X_train, y_train, epochs=100, verbose=0)

        # Avalia o modelo com métricas de generalização
        scores3 = model3.evaluate(X_test, y_test, verbose=0)
        acc_per_fold.append(scores3[1] * 100)
        loss_per_fold.append(scores3[0])

        # Faz previsões no conjunto de teste
        y_pred = model3.predict(X_test)
        y_pred = np.argmax(y_pred, axis=1)  # Converte as previsões de one-hot para rótulos

        # Calcula Sensitivity (Recall), Specificity e F1
        sensitivity = recall_score(y_test, y_pred, average='macro')
        cm = confusion_matrix(y_test, y_pred)
        specificity = cm[0,0] / (cm[0,0] + cm[0,1] + epsilon)
        f1 = f1_score(y_test, y_pred, average='macro')

        # Armazena os resultados
        results.append({
            'optimizer': optimizer,
            'initializer': initializer,
            'accuracy': scores3[1] * 100,
            'loss': scores3[0],
            'sensitivity': sensitivity,
            'specificity': specificity,
            'f1': f1
        })

# Encontra o resultado com a maior accuracy
best_result = max(results, key=lambda x: x['accuracy'])
print('Melhor accuracy:', best_result['accuracy'])
print('Melhor otimizador:', best_result['optimizer'])
print('Melhor inicializador:', best_result['initializer'])
print('Sensibilidade:', best_result['sensitivity'])
print('Especificidade:', best_result['specificity'])
print('F1:', best_result['f1'])

import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(history4.history['accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')

# Plot training & validation loss values
plt.subplot(1, 2, 2)
plt.plot(history4.history['loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train'], loc='upper left')

plt.show()

In [None]:
# K-vizinhos-mais-próximos iterando manualmente sobre os parâmetros

import pandas as pd
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score
from sklearn.metrics import accuracy_score, recall_score, confusion_matrix, f1_score


data = pd.read_csv('Dados_Trabalho_TP2.csv')


# Codificação de variáveis categóricas
label_encoders = {}
for column in ['Genero', 'Historico_obesidade_familiar', 'FCCAC', 'FCV', 'CCER', 'Fumador', 'CA', 'MCC', 'FAF', 'TUDE', 'CBA', 'TRANS', 'Label']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Verificação se todas as colunas são numéricas
print(data.dtypes)

# Separar as features e o target
X = data.drop('Label', axis=1)
y = data['Label']

# Verificar se todas as colunas de X são numéricas antes de escalar
print(X.dtypes)

# Normalização dos dados
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Configuração do k-fold cross validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Modelo K-vizinhos-mais-próximos
knn = KNeighborsClassifier()

param_grid = {
    'n_neighbors': list(range(1, 51, 2)),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski'],
    'p': [1, 2],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': list(range(20, 51, 10))
}

best_score = 0
best_params = {}
best_sensitivity = 0
best_specificity = 0
best_f1 = 0

for k in param_grid['n_neighbors']:
    for weight in param_grid['weights']:
        for metric in param_grid['metric']:
            for p in param_grid['p']:
                for algorithm in param_grid['algorithm']:
                    for leaf_size in param_grid['leaf_size']:
                        knn = KNeighborsClassifier(n_neighbors=k, weights=weight, metric=metric, p=p, algorithm=algorithm, leaf_size=leaf_size)
                        for train_index, test_index in kf.split(X):
                            X_train, X_test = X[train_index], X[test_index]
                            y_train, y_test = y[train_index], y[test_index]
                            knn.fit(X_train, y_train)
                            y_pred = knn.predict(X_test)
                            sensitivity = recall_score(y_test, y_pred, average='macro', zero_division=1)
                            cm = confusion_matrix(y_test, y_pred)
                            specificity = cm[0,0] / (cm[0,0] + cm[0,1])
                            f1 = f1_score(y_test, y_pred, average='macro')
                            if sensitivity > best_sensitivity:
                                best_sensitivity = sensitivity
                            if specificity > best_specificity:
                                best_specificity = specificity
                            if f1 > best_f1:
                                best_f1 = f1
                        scores = cross_val_score(knn, X, y, cv=kf, scoring='accuracy')
                        mean_score = scores.mean()
                        if mean_score > best_score:
                            best_score = mean_score
                            best_params = {
                                'n_neighbors': k,
                                'weights': weight,
                                'metric': metric,
                                'p': p,
                                'algorithm': algorithm,
                                'leaf_size': leaf_size
                            }

# Treinar o modelo com os melhores parâmetros
knn.set_params(**best_params)
knn.fit(X, y)

# Avaliação do modelo usando validação cruzada
accuracy_scores = cross_val_score(knn, X, y, cv=kf, scoring='accuracy')

# Converter a accuracy para uma escala de 1 a 100
accuracy_scores = accuracy_scores * 100

print(f'Melhores parâmetros: {best_params}')
print(f'Média de accuracy: {accuracy_scores.mean()}')
print(f'Desvio padrão da accuracy: {accuracy_scores.std()}')
print(f'Melhor Sensitivity: {best_sensitivity}')
print(f'Melhor Specificity: {best_specificity}')
print(f'Melhor F1 Score: {best_f1}')

In [None]:
# KNN usando a grid search

import pandas as pd
from sklearn.model_selection import train_test_split, KFold, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score

# Carregar o dataset
data = pd.read_csv('Dados_Trabalho_TP2.csv')

# Codificação de variáveis categóricas
label_encoders = {}
for column in ['Genero', 'Historico_obesidade_familiar', 'FCCAC', 'FCV', 'CCER', 'Fumador', 'CA', 'MCC', 'FAF', 'TUDE', 'CBA', 'TRANS', 'Label']:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Verificação se todas as colunas são numéricas
print(data.dtypes)

# Separar as features e o target
X = data.drop('Label', axis=1)
y = data['Label']

# Verificar se todas as colunas de X são numéricas antes de escalar
print(X.dtypes)

# Normalizaçao dos dados
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Configuração do k-fold cross validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Modelo K-vizinhos-mais-próximos
knn = KNeighborsClassifier()

# Ajuste de parâmetros com validação cruzada
param_grid = {
    'n_neighbors': list(range(1, 51, 2)),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan', 'chebyshev', 'minkowski'],
    'p': [1, 2],
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': list(range(20, 51, 10))
}

grid_search = GridSearchCV(knn, param_grid, cv=kf, scoring='accuracy')
grid_search.fit(X, y)

# Melhores parâmetros encontrados
best_params = grid_search.best_params_

# Treinar o modelo com os melhores parâmetros
knn.set_params(**best_params)
knn.fit(X, y)

# Avaliação do modelo usando validação cruzada
accuracy_scores = cross_val_score(knn, X, y, cv=kf, scoring='accuracy')

print(f'Melhores parâmetros: {best_params}')
print(f'Média de accuracy: {accuracy_scores.mean()}')
print(f'Desvio padrão da accuracy: {accuracy_scores.std()}')


In [None]:
#4.2 a)

from scipy import stats
import numpy as np

# Pontuações do modelo SVM
scores_svm = scores

# Imprime as pontuações de accuracy do modelo SVM
print(f"Pontuações de accuracy do Modelo SVM: {scores_svm}")

# Extrai as pontuações de accuracy da lista 'results'
scores_model3 = [result['accuracy'] for result in results]

# Imprime as pontuações de accuracy do modelo 3
print(f"Pontuações de accuracy do Modelo 3: {scores_model3}")

# Realiza o teste t
t_stat_svm_3, p_val_svm_3 = stats.ttest_ind(scores_svm, scores_model3)

# Imprime os resultados
print(f"Modelo SVM vs Modelo 3: t statistic: {t_stat_svm_3:.2f}, p value: {p_val_svm_3:.12f}")

# Verifica se a diferença é significativa
if p_val_svm_3 < 0.05:
    print("Há uma diferença significativa no desempenho dos dois modelos.")
else:
    print("Não há uma diferença significativa no desempenho dos dois modelos.")

# Identifica o modelo com melhor desempenho
if np.mean(scores_svm) > np.mean(scores_model3):
    print("O Modelo SVM tem melhor desempenho.")
else:
    print("O Modelo 3 (Rede Neural) tem melhor desempenho.")

In [None]:
#4.2 c) Seleção de Atributos

from sklearn.ensemble import RandomForestClassifier
import pandas as pd
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import StandardScaler

# Supondo que 'df' é o seu DataFrame e 'Label' é a coluna alvo
X = df.drop('Label', axis=1)
y = df['Label']

# Converte as variáveis categóricas em numéricas
X = pd.get_dummies(X)

# Usa a função SelectKBest para selecionar os 10 melhores atributos
selector = SelectKBest(score_func=f_classif, k=10)
fit = selector.fit(X, y)

# Obtém os scores para cada atributo
scores = pd.DataFrame(fit.scores_)
columns = pd.DataFrame(X.columns)

# Junta os dois dataframes para melhor visualização
featureScores = pd.concat([columns, scores], axis=1)
featureScores.columns = ['Atributo','Score']  # nomeando as colunas
print(featureScores.nlargest(10,'Score'))  # imprime os 10 melhores atributos

# Usa a função SelectKBest para selecionar os 10 melhores atributos
selector = SelectKBest(score_func=f_classif, k=10)
X_new = selector.fit_transform(X, y)



# divdir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.2, random_state=42)

# Normalizar os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


# Treina o modelo com os 10 melhores atributos
model1.fit(X_train, y_train)
accuracy = model1.score(X_test, y_test)
print('accuracy do Modelo 1:', accuracy)

# Modelo 2
best_model.fit(X_train, y_train)
accuracy2 = best_model.score(X_test, y_test)
print('accuracy do Modelo 2:', accuracy2)

# Modelo 3
model3 = tf.keras.Sequential([
    tf.keras.Input(shape=(10,)),  
    tf.keras.layers.Dense(units=64, activation="relu", kernel_initializer='normal'),  
    tf.keras.layers.Dense(units=48, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=32, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=32, activation="relu", kernel_initializer='normal'), 
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),  
    tf.keras.layers.Dense(units=9, activation="softmax", kernel_initializer='normal')
])

# Compila o modelo
model3.compile(optimizer='Nadam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])
# Treina o modelo
model3.fit(X_train, y_train, verbose=0)

# Avalia o modelo
loss, accuracy3 = model3.evaluate(X_test, y_test)
print('accuracy do Modelo 3:', accuracy3)

# Modelo 4
knn.fit(X_train, y_train)
accuracy4 = knn.score(X_test, y_test)
print('accuracy do Modelo 4:', accuracy4)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from tensorflow import keras
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from scipy import stats
from sklearn.model_selection import KFold, GridSearchCV

from sklearn.preprocessing import StandardScaler, LabelEncoder

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import tensorflow as tf

# Carregar o conjunto de dados
df = pd.read_csv('Dados_Trabalho_TP2.csv')

# Pré-processar os dados
le = LabelEncoder()
for column in df.columns:
    if df[column].dtype == object:
        df[column] = le.fit_transform(df[column])

# Adicionar o novo preditor
df['IMC'] = df['Peso'] / (df['Altura'] ** 2)

# Combinar os atributos 'Fumador' e 'CBA' em um único atributo
df['Fumador_CBA'] = df['Fumador'] + df['CBA']

atributos = ['Genero', 'Idade', 'Altura', 'Peso', 'Historico_obesidade_familiar', 'FCCAC', 'FCV', 'NRP', 'CCER', 'Fumador', 'CA', 'MCC', 'FAF', 'TUDE', 'CBA', 'TRANS']

atributos_com_IMC_Fumador_CBA = atributos + ['IMC', 'Fumador_CBA']

X = df[atributos_com_IMC_Fumador_CBA]  # Agora inclui os novos preditores IMC e Fumador_CBA
y = df['Label']

# Dividir o conjunto de dados em treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalizar os dados
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Definindo o modelo SVM com kernel linear
kernel = 'linear'
svm = SVC(kernel=kernel)

# Ajuste de parâmetros utilizando GridSearchCV para evitar overfitting
param_grid = {'C': [ 100], 'gamma': ['scale']}
kf = KFold(n_splits=5, shuffle=True, random_state=42)
grid = GridSearchCV(svm, param_grid, cv=kf, refit=True, scoring='accuracy', n_jobs=-1)
grid.fit(X_train, y_train)

best_model = grid.best_estimator_

# Avaliar o modelo no conjunto de teste
y_pred = best_model.predict(X_test)
accuracy_svm = accuracy_score(y_test, y_pred)
print('accuracy do Modelo SVM com IMC e Fumador_CBA:', accuracy_svm)


# Treinar o modelo 3 com os atributos selecionados
model3 = tf.keras.Sequential([
    tf.keras.Input(shape=(18,)),  
    tf.keras.layers.Dense(units=64, activation="relu", kernel_initializer='normal'),  
    tf.keras.layers.Dense(units=48, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=32, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=32, activation="relu", kernel_initializer='normal'), 
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),
    tf.keras.layers.Dense(units=16, activation="relu", kernel_initializer='normal'),  
    tf.keras.layers.Dense(units=9, activation="softmax", kernel_initializer='normal')
])

# Compila o modelo
model3.compile(optimizer='Nadam',
                      loss='sparse_categorical_crossentropy',
                      metrics=['accuracy'])

# Treina o modelo
model3.fit(X_train, y_train, epochs=100, verbose=0)

# Avalia o modelo
loss, accuracy3 = model3.evaluate(X_test, y_test)
print('accuracy do Modelo Redes com IMC e Fumador_CBA:', accuracy3)



# Pontuações do modelo SVM
scores_svm = scores

# Extrai as pontuações de accuracy da lista 'results'
scores_model3 = [result['accuracy'] for result in results]

# Realiza o teste t de uma amostra para o modelo SVM
t_stat_svm, p_value_svm = stats.ttest_1samp(scores_svm, accuracy_svm)

# Realiza o teste t de uma amostra para o modelo 3
t_stat_model3, p_value_model3 = stats.ttest_1samp(scores_model3, accuracy3)

# Imprime o valor p para o modelo SVM
print("Valor p para o modelo SVM: ", p_value_svm)

# Imprime o valor p para o modelo 3
print("Valor p para o modelo 3: ", p_value_model3)

# Verifica se a diferença é significativa para o modelo SVM
if p_value_svm < 0.05:
    print("Existe uma diferença significativa no desempenho do modelo SVM com a utilização do IMC e Fumador_CBA.")
else:
    print("Não existe uma diferença significativa no desempenho do modelo SVM com a IMC e Fumador_CBA.")

# Verifica se a diferença é significativa para o modelo 3
if p_value_model3 < 0.05:
    print("Existe uma diferença significativa no desempenho do modelo de redes com a utilização do IMC e Fumador_CBA.")
else:
    print("Não existe uma diferença significativa no desempenho do modelo de redes com a utilização do IMC e Fumador_CBA.")