<a href="https://colab.research.google.com/github/VR97-png/Projeto-SIN-492/blob/main/Projeto.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import keras
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, KFold
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import f_classif, SelectKBest, f_regression
from keras.models import Sequential
from keras.utils import normalize
from keras.layers import Dense, Flatten, Dropout
from scipy.stats import zscore
import tensorflow as tf
import torch
from torch import nn
from torchviz import make_dot, make_dot_from_trace



In [None]:
parquet_url = '/content/dataset_SIN492.parquet'
df = pd.read_parquet(parquet_url)

In [None]:
X = df.drop('target', axis=1)
Y = df['target']

In [None]:
 # Metodo ANOVA para achar as melhores features
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Seleção de features usando ANOVA
k_best = SelectKBest(score_func=f_regression, k=6)
X_train_selected = k_best.fit_transform(X_train, y_train)

# Imprima as pontuações ANOVA para cada característica
anova_scores = k_best.scores_
selected_features = X_train.columns[k_best.get_support()]
print("Features ANOVA:")
for feature in selected_features:
    print(feature)
    X_anova = X[selected_features]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_anova, Y, test_size=0.2, random_state=42)

# Definir o modelo
clf = DecisionTreeClassifier()

# Definir os hiperparâmetros para a busca em grade
param_grid = {
    'criterion': ['gini'],
    'max_depth': range(1, 40),
    'min_samples_split': range(2, 60),
    'min_samples_leaf': range(1, 60),
    'random_state': [42]
}

# Configurar a busca em grade
grid_search = GridSearchCV(clf, param_grid, cv=8)

# Realizar a busca em grade
grid_search.fit(X_train, y_train)

# Imprimir os melhores hiperparâmetros encontrados
print(f'Os melhores hiperparâmetros encontrados foram: {grid_search.best_params_}')

# Usar o melhor modelo para fazer previsões
y_pred = grid_search.predict(X_test)

# Avaliar a acurácia do modelo
accuracy = accuracy_score(y_test, y_pred)
print(f'A acurácia do modelo é: {accuracy}')

In [None]:
# Split do conjunto ANOVA de dados e rótulos
X_train, X_test, y_train, y_test = train_test_split(X_anova, Y, test_size=0.2, random_state=42)

# Criar um modelo de árvore de decisão para classificação
clf = DecisionTreeClassifier(criterion='gini', max_depth=7, min_samples_split=47, min_samples_leaf=23, random_state=42)

# Crie um objeto de validação cruzada KFold
kf = KFold(n_splits=8, shuffle=True, random_state=42)

# Realize a validação cruzada e obtenha as pontuações de acurácia
cv_scores = cross_val_score(clf, X_train, y_train, cv=kf, scoring='accuracy')

print(f'A pontuação média de validação cruzada é: {np.mean(cv_scores)}')

# Treinar o modelo no conjunto de treinamento
clf.fit(X_train, y_train)

# Fazer previsões no conjunto de teste
y_pred = clf.predict(X_test)

# Avaliar a acurácia do modelo
accuracy = accuracy_score(y_test, y_pred)

print(f'A acurácia do modelo no conjunto de teste é: {accuracy}')

plt.figure(figsize=(12, 8))
plot_tree(clf, feature_names=X_anova.columns, class_names=list(map(str, clf.classes_)), filled=True, rounded=True)
plt.show()

# Gerar a matriz de confusão
conf_matrix = confusion_matrix(y_test, y_pred)

# Visualize a matriz de confusão usando seaborn
plt.figure(figsize=(4, 4))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Greens", cbar=False, annot_kws={"size": 12})
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# Plotando o gráfico de perda
plt.figure(figsize=(5, 4))
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Normalização (z-score)
scaler = StandardScaler()

X_scaled = scaler.fit_transform(X)
X_scaled = pd.DataFrame(X_scaled, columns=X.columns)

# Divida os dados em conjuntos de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

# Crie um modelo sequencial
model = Sequential()

# Adicione camadas ao modelo
model.add(Dense(9, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(2, activation='softmax'))

# Compile o modelo
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Converta y_train e y_test para representação categórica
y_train = keras.utils.to_categorical(y_train, num_classes=2)
y_test = keras.utils.to_categorical(y_test, num_classes=2)

# Treine o modelo
history = model.fit(X_train, y_train, epochs=500, batch_size=32,
                    validation_data=(X_test, y_test), verbose=0)

# Avalie o modelo no conjunto de teste
y_pred = np.argmax(model.predict(X_test), axis=-1)
accuracy = accuracy_score(np.argmax(y_test, axis=-1), y_pred)
print(f'A precisão do modelo é: {accuracy}')


# Converta y_test de volta para rótulos de classe
y_test_classes = np.argmax(y_test, axis=-1)

# Crie a matriz de confusão
conf_matrix = confusion_matrix(y_test_classes, y_pred)

# Visualize a matriz de confusão usando seaborn
plt.figure(figsize=(5, 4))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", cbar=False, annot_kws={"size": 8})
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.title('Confusion Matrix')
plt.show()

# Obtendo os valores de perda e validação do histórico
train_loss = history.history['loss']
val_loss = history.history['val_loss']

# Plotando o gráfico de perda
plt.figure(figsize=(5, 4))
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
