

[Dataset](https://www.kaggle.com/datasets/rashikrahmanpritom/heart-attack-analysis-prediction-dataset)



In [1]:
import numpy as np
np.set_printoptions(suppress=True) # Suprime notação cientifica
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix,roc_curve,auc
from sklearn.decomposition import PCA
import pandas as pd 
import random
import matplotlib.pyplot as plt

In [2]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
# Removendo a primeira linha do dataset contendo o nome dos atributos
!sed 1d "/content/gdrive/MyDrive/Reconhecimento de Padrões/Trabalho/heart.csv" > dataset.csv

# Colunas dos dados
!cut dataset.csv -d"," -f1-13 > data.csv

#Coluna dos rótulos
!cut dataset.csv -d"," -f14 > labels.lab

In [4]:
# Carregamento dos dados
def loadData(f_data="data.csv",f_labels="labels.lab"):
     X = np.loadtxt(f_data,delimiter=",")

     y  = open(f_labels,"r").readlines()

     y = [1 if i =="1\n" else 0 for i in y]
     
     return X, y

In [5]:
def standardizeData(data,method="min"):
  # Transpoe a matriz de dados invertendo linhas e colunas
  data_T = data.T

  # Cria uma nova estrutura de dados para armazenar os valores normalizados
  new_data_T = []

  if method == "min": # Normaliza os valores pelo min e max
    for i in data_T:
      new_data_T.append(((i - np.min(i)) / (np.max(i) - np.min(i))))

  elif method == "std":  # Normaliza pelo desvio padrão
    for i in data_T:
      new_data = new_data_T.append((i - np.mean(i)) / ((np.std(i, ddof=1))**2))

  # Retorna a nova matriz normalizada no formato original
  return np.array(new_data_T).T 

In [6]:
# Divisão do dataset em conjuntos de treinamento e teste, proporção 2/3
def dataSplit(X,y,test_size=0.33,shuffle=True):
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random.randint(10,48),shuffle=shuffle) 
   return X_train, X_test, y_train, y_test

In [7]:
def printInformation(name, var, n_components, acc, cm, tpr, tnr,singular_vals):
  print("========================Informations==================================")
  print(f"Classifier: {name}")
  print(f"Accumulated Variance: {var}")
  print(f"Qtd of components: {n_components}")
  print(f"Accuracy: {acc}")
  print(f"True Positive Rating: {tpr}")
  print(f"True Negative Rating: {tnr}")
  print(f"Principal Components Values: {singular_vals}")
  print(f"\nConfusion Matrix\n {cm}")

In [8]:
def trainModels(var, model, classifier_name):
  X,y = loadData() # Carregar os dados

  # Normalização dos dados
  X = standardizeData(X)

  # PCA 
  pca = PCA(n_components=var)
  new_dataset = pca.fit_transform(np.copy(X), y) # Ajusta o modelo e aplica a redução de dimensionalidade


  # Divisão dos dados entre treinamento e teste
  X_train, X_test, y_train, y_test = dataSplit(new_dataset,y)
  model.fit(X_train,y_train) # Faz o treinamento 

  pred = model.predict(X_test) # Vetor com a classificação de cada amostra do teste
  # score = model.predict_proba(X_test) 

  # Acurácia
  # print(np.asarray(y_test).shape)
  # print((np.sum(pred == np.asarray(y_test)))/ np.asarray(y_test).shape[0])

  singular_vals = pca.singular_values_ # Valores dos componentes principais
  acc_variance = round(sum(pca.explained_variance_ratio_), 2)
  print(f"acc_variance: {acc_variance}")

  # Matriz de confusão
  # Primeiro parametro: O que realmente é
  # Segundo parametro: O que a rede retornou como verdade
  cm = confusion_matrix(y_test, pred)
  tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()

  # Avalia o resultado das predições 
  # Accuracy, True Positive, True Negative
  acc = ((tn+tp) / (tn+fp+fn+tp)) 
  tpr = (tp / (tp + fn) ) 
  tnr = (tn / (tn + fp))
  
  printInformation(classifier_name, var, pca.n_components_, acc, cm, tpr, tnr,singular_vals)

  return  acc, tpr, tnr,cm


**Documentação**


[SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)




1.  [Kernels](https://scikit-learn.org/stable/auto_examples/svm/plot_svm_kernels.html#)


[NB](https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html?highlight=gaussiannb#sklearn.naive_bayes.GaussianNB)

[DecisionTreeClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html?highlight=decisiontreeclassifier#sklearn.tree.DecisionTreeClassifier)






In [9]:
def main():
  variances = [0.75, 0.90, 0.99]
 
  for var in variances:
    resultNB = trainModels(var, GaussianNB(), "Naive Bayes");
    resultSVML = trainModels(var, SVC(kernel="linear",probability=True), "SVM Linear");
    resultSVMR = trainModels(var,  SVC(kernel="rbf",probability=True), "SVM RBF");
    resultCART = trainModels(var, DecisionTreeClassifier(splitter="random"),"CART");

if __name__ == "__main__":
  main()

acc_variance: 0.76
Classifier: Naive Bayes
Accumulated Variance: 0.75
Qtd of components: 5
Accuracy: 0.77
True Positive Rating: 0.9565217391304348
True Negative Rating: 0.6111111111111112
Principal Components Values: [9.46442617 7.88012699 6.44173723 5.45120698 5.23886846]

Confusion Matrix
 [[33 21]
 [ 2 44]]
acc_variance: 0.76
Classifier: SVM Linear
Accumulated Variance: 0.75
Qtd of components: 5
Accuracy: 0.81
True Positive Rating: 0.8461538461538461
True Negative Rating: 0.7708333333333334
Principal Components Values: [9.46442617 7.88012699 6.44173723 5.45120698 5.23886846]

Confusion Matrix
 [[37 11]
 [ 8 44]]
acc_variance: 0.76
Classifier: SVM RBF
Accumulated Variance: 0.75
Qtd of components: 5
Accuracy: 0.75
True Positive Rating: 0.8653846153846154
True Negative Rating: 0.625
Principal Components Values: [9.46442617 7.88012699 6.44173723 5.45120698 5.23886846]

Confusion Matrix
 [[30 18]
 [ 7 45]]
acc_variance: 0.76
Classifier: CART
Accumulated Variance: 0.75
Qtd of components: 