**Ensemble Learning**

In [None]:
import pandas as pd


# Load the data from the CSV file
data = pd.read_csv('/content/diabetes_prediction_dataset.csv')

# Perform one-hot encoding on 'gender' and 'smoking_history' columns
df = pd.get_dummies(data, columns=['gender', 'smoking_history'])

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

X = df.drop(columns=['diabetes'])
y = df['diabetes']
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Bagging with Logistic Regression
bagging = BaggingClassifier(estimator=LogisticRegression(max_iter=1000), n_estimators=50, random_state=42)
bagging.fit(X_train, y_train)
y_pred_bagging = bagging.predict(X_test)

accuracy_bagging = accuracy_score(y_test, y_pred_bagging)
precision_bagging = precision_score(y_test, y_pred_bagging)
recall_bagging = recall_score(y_test, y_pred_bagging)
f1_bagging = f1_score(y_test, y_pred_bagging)
# Print the accuracy of each model
print("Bagging - Accuracy:", accuracy_bagging)
print("Bagging - Precision:", precision_bagging)
print("Bagging - Recall:", recall_bagging)
print("Bagging - F1 Score:", f1_bagging)


Bagging - Accuracy: 0.95895
Bagging - Precision: 0.8638228055783429
Bagging - Recall: 0.6165105386416861
Bagging - F1 Score: 0.7195080286983259


In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import precision_score, recall_score, f1_score


adaboost = AdaBoostClassifier(base_estimator=LogisticRegression(), n_estimators=50, random_state=42)
adaboost.fit(X_train, y_train)
y_pred_adaboost = adaboost.predict(X_test)
accuracy_adaboost = accuracy_score(y_test, y_pred_adaboost)
precision_adaboost = precision_score(y_test, y_pred_adaboost)
recall_adaboost = recall_score(y_test, y_pred_adaboost)
f1_adaboost = f1_score(y_test, y_pred_adaboost)

print("AdaBoost - Accuracy:", accuracy_adaboost)
print("AdaBoost - Precision:", precision_adaboost)
print("AdaBoost - Recall:", recall_adaboost)
print("AdaBoost - F1 Score:", f1_adaboost)



AdaBoost - Accuracy: 0.95945
AdaBoost - Precision: 0.8528717545239969
AdaBoost - Recall: 0.6346604215456675
AdaBoost - F1 Score: 0.7277609936220208


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score

random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)
y_pred_rf = random_forest.predict(X_test)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
precision_rf = precision_score(y_test, y_pred_rf)
recall_rf = recall_score(y_test, y_pred_rf)
f1_rf = f1_score(y_test, y_pred_rf)
print("Random Forest - Accuracy:", accuracy_rf)
print("Random Forest - Precision:", precision_rf)
print("Random Forest - Recall:", recall_rf)
print("Random Forest - F1 Score:", f1_rf)

print("Comparison of Accuracies:\n Bagging: {}\n AdaBoost: {}\n Random Forest: {}".format(accuracy_bagging, accuracy_adaboost, accuracy_rf))


Random Forest - Accuracy: 0.97005
Random Forest - Precision: 0.9468170829975826
Random Forest - Recall: 0.6879391100702577
Random Forest - F1 Score: 0.7968802984062394
Comparison of Accuracies:
 Bagging: 0.9576
 AdaBoost: 0.95945
 Random Forest: 0.97005
