In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [9]:
import pandas as pd
import numpy as np
import math

In [7]:
dataset = pd.read_csv('/content/drive/MyDrive/heart attack/heart.csv')

In [8]:
dataset

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,57,0,0,140,241,0,1,123,1,0.2,1,0,3,0
299,45,1,3,110,264,0,1,132,0,1.2,1,0,3,0
300,68,1,0,144,193,1,1,141,0,3.4,1,2,3,0
301,57,1,0,130,131,0,1,115,1,1.2,1,1,3,0


In [26]:
X = np.array(dataset[['thalach', 'trestbps', 'chol']])
y = np.array(dataset['target'])

In [67]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [75]:
class gaussClf:
    
    def separate_by_classes(self, X, y):
        ''' This function separates our dataset in subdatasets by classes '''
        self.classes = np.unique(y)
        classes_index = {}
        subdatasets = {}
        cls, counts = np.unique(y, return_counts=True)
        self.class_freq = dict(zip(cls, counts))
        print(self.class_freq)
        for class_type in self.classes:
            classes_index[class_type] = np.argwhere(y==class_type)
            subdatasets[class_type] = X[classes_index[class_type], :]
            self.class_freq[class_type] = self.class_freq[class_type]/sum(list(self.class_freq.values()))
        return subdatasets

    def fit(self, X, y):
        ''' The fitting function '''
        separated_X = self.separate_by_classes(X, y)
        self.means = {}
        self.std = {}
        for class_type in self.classes:
            # Here we calculate the mean and the standart deviation from datasets
            self.means[class_type] = np.mean(separated_X[class_type], axis=0)[0]
            self.std[class_type] = np.std(separated_X[class_type], axis=0)[0]

    def calculate_probability(self, x, mean, stdev):
        ''' This function calculates the class probability using gaussian distribution '''
        exponent = math.exp(-((x - mean) ** 2 / (2 * stdev ** 2)))
        return (1 / (math.sqrt(2 * math.pi) * stdev)) * exponent

    def predict_proba(self, X):
      ''' This function predicts the probability for every class '''
      self.class_prob = {cls:math.log(self.class_freq[cls], math.e) for cls in self.classes}
      for cls in self.classes:
          for i in range(len(self.means)):
              self.class_prob[cls]+=math.log(self.calculate_probability(X[i], self.means[cls][i], self.std[cls][i]), math.e)
      self.class_prob = {cls: math.e**self.class_prob[cls] for cls in self.class_prob}
      return self.class_prob

    def predict(self, X):
      ''' This funtion predicts the class of a sample '''
      pred = []
      for x in X:
          pred_class = None
          max_prob = 0
          for cls, prob in self.predict_proba(x).items():
              if prob>max_prob:
                  max_prob = prob
                  pred_class = cls
          pred.append(pred_class)
      return pred

    def test_score(self, X_test, y_test):
      y_pred = self.predict(X_test)
      count = 0
      for i in range(len(y_pred)):
        if y_pred[i] == y_test[i]:
          count += 1
      return count / len(y_pred)

In [76]:
gn = gaussClf()
gn.fit(X_train,y_train)

{0: 109, 1: 133}


In [77]:
gn.test_score(X_test, y_test)

0.7049180327868853

In [78]:
from sklearn.metrics import classification_report

y_pred = gn.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.82      0.48      0.61        29
           1       0.66      0.91      0.76        32

    accuracy                           0.70        61
   macro avg       0.74      0.69      0.69        61
weighted avg       0.74      0.70      0.69        61



In [79]:
from sklearn.naive_bayes import GaussianNB
clf = GaussianNB()
clf.fit(X_train, y_train)

GaussianNB()

In [80]:
clf.score(X_test, y_test)

0.7540983606557377

In [81]:
from sklearn.metrics import classification_report

y_pred = clf.predict(X_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.77      0.69      0.73        29
           1       0.74      0.81      0.78        32

    accuracy                           0.75        61
   macro avg       0.76      0.75      0.75        61
weighted avg       0.76      0.75      0.75        61

