In [1]:
!pip install seaborn

Collecting seaborn
  Downloading seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Downloading seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2


In [None]:
import pickle # import pickle module to save and load trained models from files
import streamlit as st # import streamlit framework to build the web application interface
import pandas as pd # import pandas for data manipulation and dataframe operations
import numpy as np # import numpy for numerical computations and array operations
from sklearn.model_selection import train_test_split # import train_test_split function to divide dataset into training and testing sets
# import labelencoder to convert categorical labels into numeric values
# import standardscaler to standardize feature values
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.tree import DecisionTreeClassifier# import decisiontreeclassifier which will be used as weak learner
# import ensemble methods for building ensemble models
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier, GradientBoostingClassifier, StackingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression# import logistic regression model used as meta learner in stacking
from sklearn.svm import SVC# import support vector classifier used in stacking and voting
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, precision_score, recall_score, f1_score# import evaluation metrics to measure model performance
import seaborn as sns # import seaborn for advanced statistical plotting
import matplotlib.pyplot as plt # import matplotlib for creating plots and visualizations
import random# import random module to randomly sample data points



# use streamlit caching to store trained models and avoid retraining
# cache_resource ensures models are stored across reruns
@st.cache_resource
def train_and_save_models(X_train, y_train):

    # create a weak decision tree model with maximum depth of 1
    # max_depth=1 makes it a decision stump
    # random_state ensures reproducibility
    weak_model = DecisionTreeClassifier(max_depth=1, random_state=42)

    # train the weak model using training data
    weak_model.fit(X_train, y_train)

    # open a file in write-binary mode to save the weak model
    with open("weak_model.pkl", "wb") as f:
        # serialize and store the trained model into the file
        pickle.dump(weak_model, f)

    # create bagging classifier
    # estimator defines the base learner
    # n_estimators is number of trees
    # random_state ensures same random behavior every run
    bagging_model = BaggingClassifier(estimator=DecisionTreeClassifier(max_depth=None), n_estimators=50, random_state=42)

    # train bagging model
    bagging_model.fit(X_train, y_train)

    # save bagging model to file
    with open("bagging_model.pkl", "wb") as f:
        pickle.dump(bagging_model, f)

    # create adaboost classifier
    # estimator is weak learner
    # n_estimators defines number of boosting rounds
    adb_model = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=1), n_estimators=50, random_state=42)

    # train adaboost model
    adb_model.fit(X_train, y_train)

    # save adaboost model
    with open("adb_model.pkl", "wb") as f:
        pickle.dump(adb_model, f)

    # create gradient boosting classifier
    # n_estimators defines number of boosting stages
    # learning_rate controls contribution of each tree
    # max_depth=1 makes weak learners
    gb_model = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=1,random_state=42)

    # train gradient boosting model
    gb_model.fit(X_train, y_train)

    # save gradient boosting model
    with open("gb_model.pkl", "wb") as f:
        pickle.dump(gb_model, f)

    # define base estimators for stacking model
    estimators = [
        # decision tree as first base learner
        ("dt", DecisionTreeClassifier(max_depth=1)),
        # support vector classifier with probability output enabled
        ("svc", SVC(probability=True))
    ]

    # create stacking classifier
    # estimators are base models
    # final_estimator combines their outputs
    # cv=5 means 5-fold cross validation for meta learner training
    stacking_model = StackingClassifier(
        estimators=estimators,
        final_estimator=LogisticRegression(),
        cv=5
    )

    # train stacking model
    stacking_model.fit(X_train, y_train)

    # save stacking model
    with open("stacking_model.pkl", "wb") as f:
        pickle.dump(stacking_model, f)

    # create voting classifier
    # estimators define models participating in voting
    # voting="soft" means average predicted probabilities
    voting_model = VotingClassifier(
        estimators=[
            ("dt", DecisionTreeClassifier(max_depth=1)),
            ("rf", BaggingClassifier(DecisionTreeClassifier(), n_estimators=10)),
            ("svc", SVC(probability=True))
        ],
        voting="soft"
    )

    # train voting model
    voting_model.fit(X_train, y_train)

    # save voting model
    with open("voting_model.pkl", "wb") as f:
        pickle.dump(voting_model, f)

    # return all trained models
    return weak_model, bagging_model, adb_model, gb_model, stacking_model, voting_model
