In [3]:
# Import necessary libraries
import pandas as pd
import numpy as np
import urllib.request
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# URL of the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"

# Retrieve and save the dataset
urllib.request.urlretrieve(url, "heart.csv")

# Column names as per the dataset description
column_names = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg", 
    "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"
]

# Load the dataset into a DataFrame
df = pd.read_csv("heart.csv", names=column_names)

# Replace missing values marked with '?' with NaN and drop those rows
df.replace('?', np.nan, inplace=True)
df.dropna(inplace=True)

# Convert columns to the appropriate data type
df = df.astype(float)

# Split the data into features and target
X = df.drop("target", axis=1)
y = df["target"]

# Normalize the feature data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Logistic Regression
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
y_pred_log_reg = log_reg.predict(X_test)

print("Logistic Regression Results")
print("Accuracy:", accuracy_score(y_test, y_pred_log_reg))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_log_reg))
print("Classification Report:\n", classification_report(y_test, y_pred_log_reg))

# Decision Tree Classifier
tree_clf = DecisionTreeClassifier(random_state=42)
tree_clf.fit(X_train, y_train)
y_pred_tree = tree_clf.predict(X_test)

print("\nDecision Tree Classifier Results")
print("Accuracy:", accuracy_score(y_test, y_pred_tree))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_tree))
print("Classification Report:\n", classification_report(y_test, y_pred_tree))


Logistic Regression Results
Accuracy: 0.5666666666666667
Confusion Matrix:
 [[47  1  0  1  0]
 [ 9  2  1  4  1]
 [ 3  2  2  1  2]
 [ 1  3  4  0  3]
 [ 2  0  0  1  0]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.76      0.96      0.85        49
         1.0       0.25      0.12      0.16        17
         2.0       0.29      0.20      0.24        10
         3.0       0.00      0.00      0.00        11
         4.0       0.00      0.00      0.00         3

    accuracy                           0.57        90
   macro avg       0.26      0.26      0.25        90
weighted avg       0.49      0.57      0.52        90


Decision Tree Classifier Results
Accuracy: 0.5222222222222223
Confusion Matrix:
 [[38  8  1  1  1]
 [ 5  5  3  2  2]
 [ 1  4  2  2  1]
 [ 1  3  3  2  2]
 [ 0  1  1  1  0]]
Classification Report:
               precision    recall  f1-score   support

         0.0       0.84      0.78      0.81        49
         1.0  