In [None]:
import pandas as pd
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Load dataset
diabetes = load_diabetes()

# Convert into a DataFrame
X = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
y = pd.Series(diabetes.target)

# Discretize the target variable into categories
# For example: Low, Medium, High based on percentiles
y = pd.cut(y, bins=[-np.inf, 100, 200, np.inf], labels=[0, 1, 2])

# Check for and fill null values (if any)
#print(X.isnull().sum())

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardization
sc = StandardScaler()
X_train_sc = sc.fit_transform(X_train)
X_test_sc = sc.transform(X_test)

# Define classifiers
lr = LogisticRegression(max_iter=1000)
knn = KNeighborsClassifier(n_neighbors=5)
dt = DecisionTreeClassifier(max_depth=3, random_state=42)

# Hard voting
vc_hard = VotingClassifier(estimators=[('lr', lr), ('knn', knn), ('dt', dt)], voting='hard')
vc_hard.fit(X_train_sc, y_train)

y_pred_hard = vc_hard.predict(X_test_sc)

print("Hard Voting Accuracy\n", accuracy_score(y_test, y_pred_hard))
print("Hard Voting Classification Report\n", classification_report(y_test, y_pred_hard))

# Soft voting
vc_soft = VotingClassifier(estimators=[('lr', lr), ('knn', knn), ('dt', dt)], voting='soft')
vc_soft.fit(X_train_sc, y_train)

y_pred_soft = vc_soft.predict(X_test_sc)

print("Soft Voting Accuracy\n", accuracy_score(y_test, y_pred_soft))
print("Soft Voting Classification Report\n", classification_report(y_test, y_pred_soft))


#bagging
from sklearn.ensemble import RandomForestClassifier

model=RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42)
model.fit(X_train_sc, y_train)


from sklearn.metrics import accuracy_score, classification_report

y_pred=model.predict(X_test_sc)

print("Accuracy\n",accuracy_score(y_test, y_pred))
print("Classification Score\n", classification_report(y_test, y_pred))

Hard Voting Accuracy
 0.5617977528089888
Hard Voting Classification Report
               precision    recall  f1-score   support

           0       0.65      0.71      0.68        34
           1       0.43      0.47      0.45        32
           2       0.65      0.48      0.55        23

    accuracy                           0.56        89
   macro avg       0.57      0.55      0.56        89
weighted avg       0.57      0.56      0.56        89

Soft Voting Accuracy
 0.5842696629213483
Soft Voting Classification Report
               precision    recall  f1-score   support

           0       0.70      0.68      0.69        34
           1       0.45      0.56      0.50        32
           2       0.69      0.48      0.56        23

    accuracy                           0.58        89
   macro avg       0.61      0.57      0.58        89
weighted avg       0.61      0.58      0.59        89

Accuracy
 0.5617977528089888
Classification Score
               precision    recall  