In [64]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from google.colab import drive
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from imblearn.over_sampling import SMOTE

In [65]:
# Mounting Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [66]:
# Load dataset
heart_data = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Biomedical Signal Analysis/heart_disease.csv')

In [67]:
heart_data.size
heart_data.shape

(919, 14)

In [68]:
# Display initial data info to understand structure
print("Dataset Info:")
print(heart_data.info())
print("\nSample Data:")
heart_data.head()


Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 919 entries, 0 to 918
Data columns (total 14 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   age           919 non-null    int64  
 1   sex           913 non-null    float64
 2   cp            918 non-null    float64
 3   trestbps      919 non-null    float64
 4   chol          919 non-null    float64
 5   fbs           913 non-null    float64
 6   restecg       914 non-null    float64
 7   thalch        919 non-null    float64
 8   exang         915 non-null    float64
 9   oldpeak       919 non-null    float64
 10  slope         917 non-null    float64
 11  ca            915 non-null    float64
 12  thal          919 non-null    int64  
 13  heart_status  915 non-null    float64
dtypes: float64(12), int64(2)
memory usage: 100.6 KB
None

Sample Data:


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalch,exang,oldpeak,slope,ca,thal,heart_status
0,63,0.0,0.0,145.0,233.0,1.0,2.0,150.0,,2.3,2.0,0.0,1,0.0
1,67,0.0,3.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,1.0,3.0,0,2.0
2,67,0.0,3.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,1.0,2.0,2,1.0
3,37,0.0,2.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,2.0,0.0,0,0.0
4,41,1.0,1.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,0.0,0.0,0,0.0


In [69]:
# Getting rid of rows with empty cells
heart_data = heart_data.fillna(0)
heart_data.size
heart_data.shape

(919, 14)

In [70]:
# Data preprocessing according to dataset description
# Encode categorical variables and standardize numeric features
# Mapping categorical columns to ensure all are numeric
heart_data.loc[:, 'sex'] = heart_data['sex'].astype(int)
heart_data.loc[:, 'cp'] = heart_data['cp'].astype(int)
heart_data.loc[:, 'fbs'] = heart_data['fbs'].astype(int)
heart_data.loc[:, 'restecg'] = heart_data['restecg'].astype(int)
heart_data.loc[:, 'exang'] = heart_data['exang'].astype(int)
heart_data.loc[:, 'slope'] = heart_data['slope'].astype(int)
heart_data.loc[:, 'thal'] = heart_data['thal'].astype(int)

# Defining features and labels
X = heart_data.drop(columns = 'heart_status', axis=1).values  # Features
y = heart_data['heart_status']  # Target

# Train-test split with stratification
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [71]:
print(X.shape, X_train.shape, X_test.shape)

(919, 13) (735, 13) (184, 13)


In [72]:
# Defining the neural network for multi-class classification
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(5, activation='softmax'))  # 5 output nodes for classes 0-4

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.0001), loss='sparse_categorical_crossentropy', metrics=['accuracy'])


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [73]:
# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=16, validation_split=0.2)


Epoch 1/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 106ms/step - accuracy: 0.1746 - loss: 1.6068 - val_accuracy: 0.2245 - val_loss: 1.5618
Epoch 2/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.2699 - loss: 1.5604 - val_accuracy: 0.3673 - val_loss: 1.5168
Epoch 3/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.3736 - loss: 1.4951 - val_accuracy: 0.4218 - val_loss: 1.4706
Epoch 4/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4518 - loss: 1.4356 - val_accuracy: 0.4422 - val_loss: 1.4281
Epoch 5/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.4391 - loss: 1.3970 - val_accuracy: 0.4490 - val_loss: 1.3892
Epoch 6/100
[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4074 - loss: 1.3849 - val_accuracy: 0.4354 - val_loss: 1.3545
Epoch 7/100
[1m37/37[0m [32m━

<keras.src.callbacks.history.History at 0x7c880acd9b70>

In [74]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 130ms/step - accuracy: 0.6813 - loss: 0.9387
Test Loss: 0.9245287179946899, Test Accuracy: 0.6630434989929199


In [75]:
# Define the new patient data
new_patients = {
    "Patient CL": [64, 1, 3, 110, 211, 0, 0, 144, 1, 1.8, 1, 0, 2],
    "Patient CK": [63, 1, 3, 140, 463, 0, 2, 104, 0, 4, 1, 3, 2],
    "Patient CT": [54, 1, 0, 98, 306, 1, 0, 128, 1, 0, 1, 1, 0],
    "Patient CJ": [39, 0, 1, 120, 260, 0, 1, 202, 0, 0.9, 2, 0, 2],
    "Patient CD": [77, 1, 1, 356, 145, 0, 1, 143, 0, 0, 1, 0, 2],
    "Patient CZ": [45, 0, 3, 102, 333, 0, 2, 117, 1, 0, 1, 3, 1],
    "Patient CA": [22, 1, 3, 108, 194, 1, 0, 136, 0, 1.9, 1, 3, 0],
    "Patient CF": [51, 0, 2, 190, 0, 0, 0, 92, 0, 2.4, 1, 2, 0],
    "Patient CV": [48, 0, 2, 140, 141.15, 0, 1, 93.34, 1, 1.5, 1, 3, 2],
    "Patient CM": [56, 0, 1, 135.82, 142, 1, 0, 226.17, 1, 2.545, 1, 2, 2]
}

# Convert to DataFrame
new_patients_df = pd.DataFrame(new_patients).T
new_patients_df.columns = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
                           'thalch', 'exang', 'oldpeak', 'slope', 'ca', 'thal']

# Preprocess the new patient data
new_patients_scaled = scaler.transform(new_patients_df)

# Predict heart disease status
predictions = model.predict(new_patients_scaled)
predicted_classes = np.argmax(predictions, axis=1)

# Display predictions
for patient, prediction in zip(new_patients.keys(), predicted_classes):
    print(f"{patient}: Predicted Heart Disease Status = {prediction}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 140ms/step
Patient CL: Predicted Heart Disease Status = 1
Patient CK: Predicted Heart Disease Status = 3
Patient CT: Predicted Heart Disease Status = 0
Patient CJ: Predicted Heart Disease Status = 1
Patient CD: Predicted Heart Disease Status = 1
Patient CZ: Predicted Heart Disease Status = 3
Patient CA: Predicted Heart Disease Status = 3
Patient CF: Predicted Heart Disease Status = 2
Patient CV: Predicted Heart Disease Status = 3
Patient CM: Predicted Heart Disease Status = 3
