In [8]:
import requests
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

In [2]:
DATASET_HEART_DISEASE = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"


In [3]:
COLUMN_NAMES = ['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg', 'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'target']

In [4]:
def download_data():
    heart_disease_data = requests.get(DATASET_HEART_DISEASE).text
    with open('heart_disease.csv', 'w') as f:
        f.write(heart_disease_data)

In [5]:
def load_data():
    df = pd.read_csv('heart_disease.csv', header=None, names=COLUMN_NAMES, na_values='?')
    df = df.dropna()
    return df

In [11]:
def main():
    download_data()
    df = load_data()

    # Split the data
    X = df.drop('target', axis=1)
    y = df['target']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scale the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the Random Forest Classifier and tune hyperparameters using GridSearchCV
    params = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    clf = RandomForestClassifier(random_state=42)
    grid_search = GridSearchCV(clf, params, cv=5, n_jobs=-1, verbose=1)
    grid_search.fit(X_train_scaled, y_train)

    # Evaluate the model
    best_clf = grid_search.best_estimator_
    y_pred = best_clf.predict(X_test_scaled)
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print("Classification Report:")
    print(classification_report(y_test, y_pred))

In [12]:
if __name__ == '__main__':
    main()

Fitting 5 folds for each of 108 candidates, totalling 540 fits
Accuracy: 0.65
Classification Report:
              precision    recall  f1-score   support

           0       0.75      1.00      0.86        36
           1       0.00      0.00      0.00         9
           2       0.20      0.20      0.20         5
           3       0.40      0.29      0.33         7
           4       0.00      0.00      0.00         3

    accuracy                           0.65        60
   macro avg       0.27      0.30      0.28        60
weighted avg       0.51      0.65      0.57        60



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
