In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Reading the data

In [2]:
df = pd.read_csv('heart.csv')
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2,1
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2,1
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2,1
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2,1


In [81]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB


In [22]:
df.isna().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [3]:
x, y = df.drop('target', axis=1), df['target']

In [4]:
x.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,3,145,233,1,0,150,0,2.3,0,0,1
1,37,1,2,130,250,0,1,187,0,3.5,0,0,2
2,41,0,1,130,204,0,0,172,0,1.4,2,0,2
3,56,1,1,120,236,0,1,178,0,0.8,2,0,2
4,57,0,0,120,354,0,1,163,1,0.6,2,0,2


In [5]:
y.head()

0    1
1    1
2    1
3    1
4    1
Name: target, dtype: int64

## Data preprocessing

In [6]:
# min-max normalization
def normalize(data, min_vals=None, max_vals=None):
    if min_vals is None:
        min_vals = np.min(data, axis=0)
    if max_vals is None:
        max_vals = np.max(data, axis=0)
    normalized_data = (data - min_vals) / (max_vals - min_vals + 1e-8)
    return normalized_data, min_vals, max_vals

In [7]:
# split data into train and test sets
def train_test_split(x, y, test_size=0.2, random_state=None):
    if random_state is not None:
        np.random.seed(random_state)
    
    num_samples = len(x)
    num_test_samples = int(test_size * num_samples)
    
    indices = np.arange(num_samples)
    np.random.shuffle(indices)
    
    if isinstance(x, pd.DataFrame):
        x = x.reset_index(drop=True).to_numpy()
    if isinstance(y, pd.DataFrame):
        y = y.reset_index(drop=True).to_numpy()
    
    x_train = x[indices[num_test_samples:]]
    y_train = y[indices[num_test_samples:]]
    x_test = x[indices[:num_test_samples]]
    y_test = y[indices[:num_test_samples]]
    
    return x_train, x_test, y_train, y_test

In [8]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=42)
y_train, y_test = y_train.to_numpy(), y_test.to_numpy()

In [9]:
y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

In [10]:
x_train_norm, min_vals_train, max_vals_train = normalize(x_train)

In [11]:
min_vals_train.shape

(13,)

In [12]:
x_test.shape

(60, 13)

In [13]:
x_test_norm, _, _ = normalize(x_test, min_vals_train, max_vals_train)

## MLP

In [14]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [15]:
def sigmoid_derivative(x):
    return x * (1 - x)

In [19]:
class MLP:
    def __init__(self, input_dim, hidden_units, output_dim, learning_rate):
        self.input_dim = input_dim
        self.hidden_units = hidden_units
        self.output_dim = output_dim
        self.learning_rate = learning_rate

        # Initialize weights with random values
        self.weights1 = np.random.randn(self.input_dim, self.hidden_units)
        self.weights2 = np.random.randn(self.hidden_units, self.output_dim)

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        return x * (1 - x)

    def forward_propagation(self, X):
        self.hidden_layer_input = np.dot(X, self.weights1)
        self.hidden_layer_output = self.sigmoid(self.hidden_layer_input)
        self.output_layer_input = np.dot(self.hidden_layer_output, self.weights2)
        self.output = self.sigmoid(self.output_layer_input)

    def backward_propagation(self, X, y):
        self.error = y - self.output
        self.output_delta = self.error * self.sigmoid_derivative(self.output)

        self.hidden_error = self.output_delta.dot(self.weights2.T)
        self.hidden_delta = self.hidden_error * self.sigmoid_derivative(self.hidden_layer_output)

        self.weights2 += self.hidden_layer_output.T.dot(self.output_delta) * self.learning_rate
        self.weights1 += X.T.dot(self.hidden_delta) * self.learning_rate

    def train(self, X_train, y_train, X_test, y_test, epochs, batch_size, tolerance=1e-5):
        self.error_ = []
        for epoch in range(epochs):
            for i in range(0, X_train.shape[0], batch_size):
                X_batch = X_train[i:i+batch_size]
                y_batch = y_train[i:i+batch_size]
                self.forward_propagation(X_batch)
                self.backward_propagation(X_batch, y_batch)

            # Compute error for the epoch
            self.forward_propagation(X_test)
            error = np.mean(np.abs(y_test - self.output))
            self.error_.append(error)

            # Early stopping
            if len(self.error_) > 2 and np.abs(self.error_[-1] - self.error_[-2]) < tolerance:
                print(f"Early stopping on epoch {epoch}, with error {error}")
                break

    def predict(self, X):
        self.forward_propagation(X)
        return np.round(self.output)  # Round predictions to 0 or 1

    def confusion_matrix(self, X, y):
        predictions = self.predict(X)
        TP = np.sum((predictions == 1) & (y == 1))
        TN = np.sum((predictions == 0) & (y == 0))
        FP = np.sum((predictions == 1) & (y == 0))
        FN = np.sum((predictions == 0) & (y == 1))
        return TP, TN, FP, FN

    def precision(self, X, y):
        TP, _, FP, _ = self.confusion_matrix(X, y)
        return TP / (TP + FP)

    def accuracy(self, X, y):
        TP, TN, FP, FN = self.confusion_matrix(X, y)
        return (TP + TN) / (TP + TN + FP + FN)

    def sensitivity(self, X, y):  # Also known as recall
        TP, _, _, FN = self.confusion_matrix(X, y)
        return TP / (TP + FN)

    def specificity(self, X, y):
        _, TN, FP, _ = self.confusion_matrix(X, y)
        return TN / (TN + FP)

In [20]:
mlp = MLP(input_dim=x_train.shape[1], hidden_units=10, output_dim=1, learning_rate=0.01)
mlp.train(x_train_norm, y_train, x_test_norm, y_test, epochs=1000, batch_size=4)

In [21]:
precision = mlp.precision(x_test_norm, y_test)
accuracy = mlp.accuracy(x_test_norm, y_test)
sensitivity = mlp.sensitivity(x_test_norm, y_test)
specificity = mlp.specificity(x_test_norm, y_test)

print(f"Precision: {precision}")
print(f"Accuracy: {accuracy}")
print(f"Sensitivity: {sensitivity}")
print(f"Specificity: {specificity}")

Precision: 0.9032258064516129
Accuracy: 0.8833333333333333
Sensitivity: 0.875
Specificity: 0.8928571428571429


## Decision tree

In [82]:
df = pd.read_csv('heart.csv')

In [83]:
df.isna().sum()

age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal        0
target      0
dtype: int64

In [84]:
x = df.drop('target', axis=1)
y = df['target']

In [85]:
indices = np.arange(x.shape[0])
np.random.shuffle(indices)
split_idx = int(x.shape[0] * 0.8)

In [86]:
x_train, x_test = x.iloc[indices[:split_idx], :], x.iloc[indices[split_idx:], :]
y_train, y_test = y.iloc[indices[:split_idx]], y.iloc[indices[split_idx:]]

In [98]:
x_train = x_train.values
x_test = x_test.values
y_train = y_train.values
y_test = y_test.values

In [87]:
def entropy(y):
    hist = np.bincount(y)
    ps = hist / len(y)
    return -np.sum([p * np.log2(p) for p in ps if p > 0])

In [88]:
def confusion_matrix(y_true, y_pred):
    tp = sum((y_true == 1) & (y_pred == 1))
    tn = sum((y_true == 0) & (y_pred == 0))
    fp = sum((y_true == 0) & (y_pred == 1))
    fn = sum((y_true == 1) & (y_pred == 0))
    return tp, tn, fp, fn

In [89]:
def precision(y_true, y_pred):
    tp, _, fp, _ = confusion_matrix(y_true, y_pred)
    return tp / (tp + fp)

In [90]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

In [91]:
def recall(y_true, y_pred):
    tp, _, _, fn = confusion_matrix(y_true, y_pred)
    return tp / (tp + fn)

In [92]:
def specificity(y_true, y_pred):
    _, tn, fp, _ = confusion_matrix(y_true, y_pred)
    return tn / (tn + fp)

In [93]:
class Node:
    def __init__(self, predicted_class, feature_index=0, threshold=0, left=None, right=None):
        self.predicted_class = predicted_class
        self.feature_index = feature_index
        self.threshold = threshold
        self.left = left
        self.right = right

    def is_leaf_node(self):
        return self.left is None and self.right is None

In [94]:
from collections import Counter


class DecisionTree:
    def __init__(self, min_samples_split=2, max_depth=100, n_feats=None):
        self.min_samples_split = min_samples_split
        self.max_depth = max_depth
        self.n_feats = n_feats
        self.root = None

    def fit(self, X, y):
        self.n_feats = X.shape[1] if not self.n_feats else min(self.n_feats, X.shape[1])
        self.root = self._grow_tree(np.array(X), np.array(y))

    def predict(self, X):
        return np.array([self._traverse_tree(x, self.root) for x in X])

    def _grow_tree(self, X, y, depth=0):
        n_samples, n_features = X.shape
        n_labels = len(np.unique(y))

        if (depth >= self.max_depth
                or n_labels == 1
                or n_samples < self.min_samples_split):
            leaf_value = self._most_common_label(y)
            return Node(predicted_class=leaf_value)

        feat_idxs = np.random.choice(n_features, self.n_feats, replace=False)

        best_feat, best_thresh = self._best_criteria(X, y, feat_idxs)
        best_thresh = float(best_thresh)  # ensure best_thresh is a float
        
        left_idxs, right_idxs = self._split(X[:, best_feat], best_thresh)
        left = self._grow_tree(X[left_idxs, :], y[left_idxs], depth+1)
        right = self._grow_tree(X[right_idxs, :], y[right_idxs], depth+1)
        return Node(predicted_class=self._most_common_label(y), feature_index=best_feat, threshold=best_thresh, left=left, right=right)

    def _best_criteria(self, X, y, feat_idxs):
        best_gain = -1
        split_idx, split_thresh = None, None
        for feat_idx in feat_idxs:
            X_column = X[:, feat_idx]
            thresholds = np.unique(X_column)
            for threshold in thresholds:
                gain = self._information_gain(y, X_column, threshold)

                if gain > best_gain:
                    best_gain = gain
                    split_idx = feat_idx
                    split_thresh = threshold

        return split_idx, split_thresh

    def _information_gain(self, y, X_column, split_thresh):
        parent_entropy = entropy(y)

        left_idxs, right_idxs = self._split(X_column, split_thresh)

        if len(left_idxs) == 0 or len(right_idxs) == 0:
            return 0

        n = len(y)
        n_l, n_r = len(left_idxs), len(right_idxs)
        e_l, e_r = entropy(y[left_idxs]), entropy(y[right_idxs])
        child_entropy = (n_l / n) * e_l + (n_r / n) * e_r

        ig = parent_entropy - child_entropy
        return ig

    def _split(self, X_column, split_thresh):
        X_column = np.array(X_column)
        left_idxs = np.argwhere(X_column <= split_thresh).flatten()
        right_idxs = np.argwhere(X_column > split_thresh).flatten()
        return left_idxs, right_idxs

    def _traverse_tree(self, x, node):
        if node.is_leaf_node():
            return node.predicted_class

        if x[node.feature_index] <= node.threshold:
            return self._traverse_tree(x, node.left)
        return self._traverse_tree(x, node.right)

    def _most_common_label(self, y):
        counter = Counter(y)
        most_common = counter.most_common(1)[0][0]
        return most_common

In [102]:
tree = DecisionTree(max_depth=3)

In [103]:
tree.fit(x_train, y_train)

In [104]:
y_pred = tree.predict(x_test)

In [105]:
accuracy_score = accuracy(y_test, y_pred)
precision_score = precision(y_test, y_pred)
recall_score = recall(y_test, y_pred)
specificity_score = specificity(y_test, y_pred)

print(f"Accuracy: {accuracy_score}")
print(f"Precision: {precision_score}")
print(f"Recall: {recall_score}")
print(f"Specificity: {specificity_score}")

Accuracy: 0.7540983606557377
Precision: 0.7380952380952381
Recall: 0.8857142857142857
Specificity: 0.5769230769230769
