In [1]:
# Import necessary libraries
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import pandas as pd
import warnings

# Ignore all warnings
warnings.filterwarnings("ignore")

# Your code here

# Load dataset
data = pd.read_csv('diabetes.csv')

# Split data into features and target
X = data.drop('Outcome', axis=1)
y = data['Outcome']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
rf_predictions = rf_model.predict(X_test)

# Calculate accuracy
rf_accuracy = accuracy_score(y_test, rf_predictions)
print(f"Random Forest Accuracy: {rf_accuracy}")


Random Forest Accuracy: 0.7207792207792207


In [2]:
# Import necessary libraries
from sklearn.ensemble import GradientBoostingClassifier

# Train Gradient Boosting model
gb_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
gb_model.fit(X_train, y_train)

# Make predictions
gb_predictions = gb_model.predict(X_test)

# Calculate accuracy
gb_accuracy = accuracy_score(y_test, gb_predictions)
print(f"Gradient Boosting Accuracy: {gb_accuracy}")


Gradient Boosting Accuracy: 0.7467532467532467


In [3]:
# Import necessary libraries
from sklearn.ensemble import AdaBoostClassifier

# Train AdaBoost model
ada_model = AdaBoostClassifier(n_estimators=100, random_state=42)
ada_model.fit(X_train, y_train)

# Make predictions
ada_predictions = ada_model.predict(X_test)

# Calculate accuracy
ada_accuracy = accuracy_score(y_test, ada_predictions)
print(f"AdaBoost Accuracy: {ada_accuracy}")


AdaBoost Accuracy: 0.7402597402597403


In [4]:
# Import necessary libraries
from sklearn.ensemble import StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

# Define base models
base_models = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('gb', GradientBoostingClassifier(n_estimators=100, random_state=42)),
    ('ada', AdaBoostClassifier(n_estimators=100, random_state=42))
]

# Define meta-learner (Logistic Regression)
stacked_model = StackingClassifier(
    estimators=base_models,
    final_estimator=LogisticRegression(),
    cv=5
)

# Train stacked model
stacked_model.fit(X_train, y_train)

# Make predictions
stacked_predictions = stacked_model.predict(X_test)

# Calculate accuracy
stacked_accuracy = accuracy_score(y_test, stacked_predictions)
print(f"Stacked Model Accuracy: {stacked_accuracy}")


Stacked Model Accuracy: 0.7467532467532467


In [5]:
import joblib

# Assuming you choose the stacked model as the best one
joblib.dump(stacked_model, 'diabetes_ensemble_model.pkl')


['diabetes_ensemble_model.pkl']