In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_curve
import matplotlib.pyplot as plt
import joblib
import requests
import io

# Load the Cleveland Heart Disease dataset
url = "https://drive.usercontent.google.com/download?id=1-6dPDup5aYIRuJGrw60cSTnL2jtm9P__&export=download&authuser=0"

# Download the CSV file from the URL
response = requests.get(url)
response.raise_for_status()  # Raise an exception for bad responses (4xx or 5xx)

print(response.text)

column_names = [
    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg",
    "thalach", "exang", "oldpeak", "slope", "ca", "thal", "target"
]
data = pd.read_csv(io.StringIO(response.text), header=None, names=column_names, na_values="?")

# Data Preprocessing
data = data.dropna()
data = pd.get_dummies(data, drop_first=True)
X = data.drop("target", axis=1)
y = data["target"].apply(lambda x: 1 if x > 0 else 0)

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

# Model Training
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Model Evaluation
print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("\nClassification Report:\n", classification_report(y_test, y_pred))

cm = confusion_matrix(y_test, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()

fpr, tpr, _ = roc_curve(y_test, model.predict_proba(X_test)[:, 1])
plt.plot(fpr, tpr)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.show()

# Save Model
joblib.dump(model, "heart_disease_predictor.pkl")


FileNotFoundError: [Errno 2] No such file or directory: 'Datasets/Cleveland Dataset.csv'