## Adaboost Code

In [5]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
from sklearn.model_selection import StratifiedKFold,  \
                                         cross_validate
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

In [6]:
# Load dataset
data = pd.read_csv("dia_clean.csv")
data

Unnamed: 0.1,Unnamed: 0,preg,plas,pres,skin,mass,pedi,age,class
0,0,6,148.0,72.0,35.0,33.6,0.627,50,1
1,1,1,85.0,66.0,29.0,26.6,0.351,31,0
2,2,8,183.0,64.0,29.0,23.3,0.672,32,1
3,3,1,89.0,66.0,23.0,28.1,0.167,21,0
4,4,0,137.0,40.0,35.0,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
728,763,10,101.0,76.0,48.0,32.9,0.171,63,0
729,764,2,122.0,70.0,27.0,36.8,0.340,27,0
730,765,5,121.0,72.0,23.0,26.2,0.245,30,0
731,766,1,126.0,60.0,29.0,30.1,0.349,47,1


In [11]:
# Drop index column if present
if "Unnamed: 0" in data.columns:
    data = data.drop(columns=["Unnamed: 0"])
data.head()

Unnamed: 0,preg,plas,pres,skin,mass,pedi,age,class
0,6,148.0,72.0,35.0,33.6,0.627,50,1
1,1,85.0,66.0,29.0,26.6,0.351,31,0
2,8,183.0,64.0,29.0,23.3,0.672,32,1
3,1,89.0,66.0,23.0,28.1,0.167,21,0
4,0,137.0,40.0,35.0,43.1,2.288,33,1


In [13]:
# Features and target
X = data.drop(columns=["class"])
y = data["class"]

In [15]:
# Base learner (weak classifier)
base_estimator = DecisionTreeClassifier(max_depth=1, \
                        random_state=42)
# AdaBoost model
model = AdaBoostClassifier(
    estimator=base_estimator,   
    n_estimators=100,
    learning_rate=1.0,
    random_state=42
)

In [7]:
# Stratified KFold
skf = StratifiedKFold(n_splits=5, shuffle=True, \
                      random_state=42)
# Evaluate multiple metrics with cross_validate function
scoring = ['accuracy', 'precision', 'recall', 'f1']
results = cross_validate(adaboost, X, y, cv=skf, \
                scoring=scoring, return_train_score=False)

In [8]:
# Print results
print("Accuracy per fold:", results['test_accuracy'])
print("Precision per fold:", results['test_precision'])
print("Recall per fold:", results['test_recall'])
print("F1-score per fold:", results['test_f1'])

Accuracy per fold: [0.7755102  0.76190476 0.74829932 0.7260274  0.73972603]
Precision per fold: [0.69565217 0.69047619 0.65116279 0.6        0.62      ]
Recall per fold: [0.62745098 0.56862745 0.56       0.6        0.62      ]
F1-score per fold: [0.65979381 0.62365591 0.60215054 0.6        0.62      ]


In [9]:
print("\nMean Accuracy:", np.mean(results['test_accuracy']))
print("Mean Precision:", np.mean(results['test_precision']))
print("Mean Recall:", np.mean(results['test_recall']))
print("Mean F1-score:", np.mean(results['test_f1']))


Mean Accuracy: 0.7502935420743639
Mean Precision: 0.6514582310173818
Mean Recall: 0.5952156862745098
Mean F1-score: 0.6211200532091786
