In [1]:
import warnings
import numpy as np
import pandas as pd
warnings.filterwarnings('ignore')
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split

In [2]:
# Load the Heart Disease dataset
url = "https://raw.githubusercontent.com/goradbj1/dataairevolution/refs/heads/main/datasets/heart_disease_data.csv"

# Load dataset from URL
df = pd.read_csv(url, index_col = 0)
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,63.0,1.0,1.0,145.0,233.0,1.0,2.0,150.0,0.0,2.3,3.0,0.0,6.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,2.0,108.0,1.0,1.5,2.0,3.0,3.0,2
2,67.0,1.0,4.0,120.0,229.0,0.0,2.0,129.0,1.0,2.6,2.0,2.0,7.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0.0,187.0,0.0,3.5,3.0,0.0,3.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,2.0,172.0,0.0,1.4,1.0,0.0,3.0,0


In [3]:
# Select only few relevant columns
df = df[['age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'target']]

# Replace missing values ('?') with NaN and drop them for simplicity
df = df.replace('?', np.nan).dropna()

# Convert target into binary classification (0 = no disease, 1 = disease)
df['target'] = df['target'].apply(lambda x: 1 if x > 0 else 0)
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,target
0,63.0,1.0,1.0,145.0,233.0,1.0,0
1,67.0,1.0,4.0,160.0,286.0,0.0,1
2,67.0,1.0,4.0,120.0,229.0,0.0,1
3,37.0,1.0,3.0,130.0,250.0,0.0,0
4,41.0,0.0,2.0,130.0,204.0,0.0,0
...,...,...,...,...,...,...,...
298,45.0,1.0,1.0,110.0,264.0,0.0,1
299,68.0,1.0,4.0,144.0,193.0,1.0,1
300,57.0,1.0,4.0,130.0,131.0,0.0,1
301,57.0,0.0,2.0,130.0,236.0,0.0,1


In [4]:
# Features and target
X = df.drop('target', axis=1)
y = df['target']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Initialize AdaBoostClassifier with DecisionTree as base estimator
ada_classifier = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=1), n_estimators=3)

# Fit the model
ada_classifier.fit(X_train, y_train)

In [6]:
# Extract information
print("Number of weak learners:", len(ada_classifier.estimators_))

# Print the weight (alpha) assigned to each weak learner
print("\nAlpha values (weights) of weak learners:")
print(ada_classifier.estimator_weights_)

Number of weak learners: 3

Alpha values (weights) of weak learners:
[1. 1. 1.]


In [7]:
# Go through each weak learner (Decision Tree stump)
for i, tree in enumerate(ada_classifier.estimators_):
    # Print weak learner i
    print(f"\nWeak Learner {i + 1}:")
    
    # Get predictions from the weak learner on the training data
    train_pred = tree.predict(X_train)
    print(f"Weak Learner Predictions (Training Data): {train_pred}")
    
    # Check how well it performed on the training data
    accuracy = accuracy_score(y_train, train_pred)
    print(f"Weak Learner Training Accuracy: {accuracy:.2f}")


Weak Learner 1:
Weak Learner Predictions (Training Data): [0 0 0 0 1 0 0 1 1 0 0 0 0 1 0 0 1 1 1 0 0 0 0 1 0 1 0 0 1 0 0 1 0 1 1 1 1
 1 1 1 0 1 0 1 0 1 0 1 0 1 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 1 1 0 1 0
 0 1 1 1 0 1 1 0 1 0 0 1 1 0 0 1 1 1 0 1 0 1 0 1 0 0 1 0 0 1 0 0 1 1 0 1 0
 1 1 1 1 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 1 0 1 0 0 1 0 1 0 1 1 0 0 1 0 0 0 1
 0 1 0 1 1 0 0 1 1 0 0 1 1 0 1 1 1 0 0 1 0 0 1 1 1 0 0 1 1 0 0 0 1 0 1 0 1
 1 1 0 1 0 0 0 1 0 0 0 1 0 1 1 0 1 1 1 0 0 1 0 1 0 0 0 1 0 1 0 0 0 0 0 0 1
 1 0 0 0 1 0 0 0 1 1 0 1 1 0 0 0 1 1 1 1]
Weak Learner Training Accuracy: 0.75

Weak Learner 2:
Weak Learner Predictions (Training Data): [1 1 1 0 1 1 0 0 1 0 1 1 1 0 0 1 0 0 1 1 0 1 1 0 0 1 1 1 1 1 1 1 1 1 0 1 0
 1 1 1 1 0 0 0 1 1 1 1 1 1 0 1 0 0 1 1 1 1 0 1 1 1 1 0 1 1 1 1 0 0 1 1 0 1
 1 0 0 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 1 0 0 1 0 1 0 1 1 1 1 1 1 1
 1 1 1 0 0 1 0 0 1 0 1 1 0 1 1 1 1 0 0 1 1 1 0 1 1 0 1 0 1 0 0 0 0 0 1 1 0
 0 1 1 1 1 0 1 1 1 0 1 1 0 1 1 0 1 1 0 1 1 1 0 0 1 1 

In [8]:
# Final predictions of the ensemble on test data
final_pred = ada_classifier.predict(X_test)
print("\nFinal Predictions of the AdaBoost model (Test Data):", final_pred)

# Calculate final accuracy
final_accuracy = accuracy_score(y_test, final_pred)
print(f"Final Test Accuracy: {final_accuracy:.2f}")


Final Predictions of the AdaBoost model (Test Data): [0 1 1 1 0 1 1 1 1 1 0 0 0 1 1 0 0 1 1 1 0 0 1 0 1 0 1 1 1 1 0 1 0 0 1 1 1
 1 1 1 0 0 0 0 1 1 0 0 1 0 1 0 0 0 1 1 0 0 1 1 1]
Final Test Accuracy: 0.79


In [9]:
import joblib

# Save the model
joblib.dump(ada_classifier, 'ada_boost_model.pkl')

['ada_boost_model.pkl']