In [23]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

# Load the heart disease dataset
data_frame = pd.read_csv('/content/sample_data/heart.csv')

# Split the dataset into training (75%)c and testing (25%) sets
train_df, test_df = train_test_split(data_frame, test_size=0.25, random_state=42)

# Split the features and target variable for the training and testing sets
X_train = train_df.drop('HeartDisease', axis=1)
y_train = train_df['HeartDisease']
X_test = test_df.drop('HeartDisease', axis=1)
y_test = test_df['HeartDisease']

# Define simulation parameters for each classifier
sim_params = {
    'k_nearest_neighbhor': {'k': 5},
    'decision_tree': {'maximun_depth': 5},
    'random_forest': {'n_estimators': 90, 'maximun_depth': 4}
}

# Define an empty list to store the results of each classifier
results = []

# Loop through each classifier and evaluate its performance
for classifier_name, classifier_params in sim_params.items():
    # Instantiate the classifier with the given parameters
    if classifier_name == 'k_nearest_neighbhor':
        clf = KNeighborsClassifier(n_neighbors=classifier_params['k'])
    elif classifier_name == 'decision_tree':
        clf = DecisionTreeClassifier(max_depth=classifier_params['maximun_depth'])
    elif classifier_name == 'random_forest':
        clf = RandomForestClassifier(n_estimators=classifier_params['n_estimators'], 
                                      max_depth=classifier_params['maximun_depth'])
    else:
        raise ValueError(f'Invalid classifier name: {classifier_name}')
    
    # Train the classifier on the training set
    clf.fit(X_train, y_train)
    
    # Make predictions on the testing set
    y_pred = clf.predict(X_test)
    
    # Calculate the confusion matrix and performance metrics
    conf_matrix = confusion_matrix(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    
    # Add the results to the list
    results.append({
        'Classifier': classifier_name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1,
        'Confusion Matrix': conf_matrix
    })

# Print the results
for res in results:
    print(f"\nResults for {res['Classifier']}:\n")
    print("Confusion Matrix:\n", res['Confusion Matrix'])
    print("Accuracy:", res['Accuracy'])
    print("Precision:", res['Precision'])
    print("Recall:", res['Recall'])
    print("F1 Score:", res['F1 Score'])



Results for k_nearest_neighbhor:

Confusion Matrix:
 [[66 32]
 [40 92]]
Accuracy: 0.6869565217391305
Precision: 0.7419354838709677
Recall: 0.696969696969697
F1 Score: 0.71875

Results for decision_tree:

Confusion Matrix:
 [[ 86  12]
 [ 21 111]]
Accuracy: 0.8565217391304348
Precision: 0.9024390243902439
Recall: 0.8409090909090909
F1 Score: 0.8705882352941177

Results for random_forest:

Confusion Matrix:
 [[ 85  13]
 [ 15 117]]
Accuracy: 0.8782608695652174
Precision: 0.9
Recall: 0.8863636363636364
F1 Score: 0.8931297709923665
