

[Dataset](https://www.kaggle.com/datasets/rashikrahmanpritom/heart-attack-analysis-prediction-dataset)



In [113]:
import numpy as np
np.set_printoptions(suppress=True) # Suprime notação cientifica
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix,roc_curve,auc
from sklearn.decomposition import PCA
import pandas as pd 
import random
import matplotlib.pyplot as plt

In [114]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [115]:
# Removendo a primeira linha do dataset contendo o nome dos atributos
!sed 1d "/content/gdrive/MyDrive/Reconhecimento de Padrões/Trabalho/heart.csv" > dataset.csv

# Colunas dos dados
!cut dataset.csv -d"," -f1-13 > data.csv

#Coluna dos rótulos
!cut dataset.csv -d"," -f14 > labels.lab

In [116]:
# Carregamento dos dados
def loadData(f_data="data.csv",f_labels="labels.lab"):
     X = np.loadtxt(f_data,delimiter=",")

     y  = open(f_labels,"r").readlines()

     y = [1 if i =="1\n" else 0 for i in y]

     y = np.array(y)
     
     print((y[y == 0]).shape)
     print((y[y == 1]).shape)
     return X, y

In [117]:
def standardizeData(data,method="min"):
  # Transpoe a matriz de dados invertendo linhas e colunas
  data_T = data.T

  # Cria uma nova estrutura de dados para armazenar os valores normalizados
  new_data_T = []

  if method == "min": # Normaliza os valores pelo min e max
    for i in data_T:
      new_data_T.append(((i - np.min(i)) / (np.max(i) - np.min(i))))

  elif method == "std":  # Normaliza pelo desvio padrão
    for i in data_T:
      new_data = new_data_T.append((i - np.mean(i)) / ((np.std(i, ddof=1))**2))

  # Retorna a nova matriz normalizada no formato original
  return np.array(new_data_T).T 

In [118]:
# Divisão do dataset em conjuntos de treinamento e teste, proporção 2/3
def dataSplit(X,y,test_size=0.33,shuffle=True):
   X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random.randint(10,48),shuffle=shuffle) 
   return X_train, X_test, y_train, y_test

In [119]:
def printInformation(name, n_components, acc, cm, tpr, tnr,singular_vals):
  print("========================Informations==================================")
  print(f"Classifier: {name}")
  print(f"Qtd of components: {n_components}")
  print(f"Accuracy: {acc}")
  print(f"True Positive Rating: {tpr}")
  print(f"True Negative Rating: {tnr}")
  print(f"Principal Components Values: {singular_vals}")
  print(f"\nConfusion Matrix\n {cm}")

In [120]:
def trainModels(var, model, classifier_name):
  X,y = loadData() # Carregar os dados

  # print(X.shape)

  # Normalização dos dados
  X = standardizeData(X)

  # PCA 
  pca = PCA(n_components=var)
  new_dataset = pca.fit_transform(np.copy(X), y) # Ajusta o modelo e aplica a redução de dimensionalidade


  # Divisão dos dados entre treinamento e teste
  X_train, X_test, y_train, y_test = dataSplit(new_dataset,y)
  model.fit(X_train,y_train) # Faz o treinamento 

  pred = model.predict(X_test) # Vetor com a classificação de cada amostra do teste
  score = model.predict_proba(X_test) 

  # Acurácia
  # print(np.asarray(y_test).shape)
  # print((np.sum(pred == np.asarray(y_test)))/ np.asarray(y_test).shape[0])

  singular_vals = pca.singular_values_ # Valores dos componentes principais
  acc_variance = round(sum(pca.explained_variance_ratio_), 6)
  print(f"acc_variance: {acc_variance}")

  # Matriz de confusão
  # Primeiro parametro: O que realmente é
  # Segundo parametro: O que a rede retornou como verdade
  cm = confusion_matrix(y_test, pred)
  tn, fp, fn, tp = confusion_matrix(y_test, pred).ravel()

  # Avalia o resultado das predições 
  # Accuracy, True Positive, True Negative
  acc = ((tn+tp) / (tn+fp+fn+tp)) 
  tpr = (tp / (tp + fn) ) 
  tnr = (tn / (tn + fp))
  
  printInformation(classifier_name, pca.n_components_, acc, cm, tpr, tnr,singular_vals)

  return  acc, tpr, tnr,cm


**Documentação**


[SVM](https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC)




1.  [Kernels](https://scikit-learn.org/stable/auto_examples/svm/plot_svm_kernels.html#)


[NB](https://scikit-learn.org/stable/modules/generated/sklearn.naive_bayes.GaussianNB.html?highlight=gaussiannb#sklearn.naive_bayes.GaussianNB)

[DecisionTreeClassifier](https://scikit-learn.org/stable/modules/generated/sklearn.tree.DecisionTreeClassifier.html?highlight=decisiontreeclassifier#sklearn.tree.DecisionTreeClassifier)






In [121]:
def main():
  components = [5, 8, 13]
  
  for cp in components:
    resultNB = trainModels(cp, GaussianNB(), "Naive Bayes");
    resultSVML = trainModels(cp, SVC(kernel="linear",probability=True), "SVM Linear");
    resultSVMR = trainModels(cp,  SVC(kernel="rbf",probability=True), "SVM RBF");
    resultCART = trainModels(cp, DecisionTreeClassifier(splitter="random"),"CART");

if __name__ == "__main__":
  main()

(138,)
(165,)
acc_variance: 0.755613
Classifier: Naive Bayes
Qtd of components: 5
Accuracy: 0.85
True Positive Rating: 0.8771929824561403
True Negative Rating: 0.813953488372093
Principal Components Values: [9.46442617 7.88012699 6.44173723 5.45120698 5.23886846]

Confusion Matrix
 [[35  8]
 [ 7 50]]
(138,)
(165,)
acc_variance: 0.755613
Classifier: SVM Linear
Qtd of components: 5
Accuracy: 0.84
True Positive Rating: 0.8421052631578947
True Negative Rating: 0.8372093023255814
Principal Components Values: [9.46442617 7.88012699 6.44173723 5.45120698 5.23886846]

Confusion Matrix
 [[36  7]
 [ 9 48]]
(138,)
(165,)
acc_variance: 0.755613
Classifier: SVM RBF
Qtd of components: 5
Accuracy: 0.75
True Positive Rating: 0.8653846153846154
True Negative Rating: 0.625
Principal Components Values: [9.46442617 7.88012699 6.44173723 5.45120698 5.23886846]

Confusion Matrix
 [[30 18]
 [ 7 45]]
(138,)
(165,)
acc_variance: 0.755613
Classifier: CART
Qtd of components: 5
Accuracy: 0.78
True Positive Rating