In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.ensemble import AdaBoostClassifier
import joblib




In [25]:
# Load your dataset
dataset = pd.read_csv("April26_Dataset_17172_13_Argumented.csv")

# Separate features and target variable
X = dataset.drop('dropout', axis=1)
y = dataset['dropout']

# Split the dataset into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# List of classifiers
classifiers = {
    'Random Forest': RandomForestClassifier(n_estimators=100, random_state=0),
    'Logistic Regression': LogisticRegression(solver='liblinear'),
    'K-Nearest Neighbors': KNeighborsClassifier(),
    'Decision Tree': DecisionTreeClassifier(),
    'Gaussian Naive Bayes': GaussianNB(),
    'Multilayer Perceptron': MLPClassifier(),
    'Support Vector Machine': SVC(),
    'Linear Discriminant Analysis': LinearDiscriminantAnalysis(),
    'AdaBoost': AdaBoostClassifier()
}

In [46]:
for col in X.columns:
    print(col, X[col].unique())

location_name [0 1]
home_language [2 0 1 3]
hh_occupation [0 5 2 3 1 4 6]
hh_edu [1 0 3 2]
hh_size [ 7  8  5 11 10  6 17  2 15  4  9  1  3 12 14 18 16 13 29 30 20 19 78 34
 25 24 21 28 22 23 31 26 35 73]
school_distanceKm [  2   0   1   3   6   4  30   5  15  45   7  27   8  20  10   9  12  25
  11  14 100  60]
age [11 12 13 14 15 16]
gender [2 1]
mothers_edu [1 2 3 0]
grade [11  9 10 12]
meansToSchool [0 1 2 3]
hh_children [ 7  6  4  3  8  5 17 13  2  1  9 10 11 14 16 12 18 15]


In [26]:
# Train and save each model
for clf_name, clf in classifiers.items():
    print(f"Training {clf_name}...")
    clf.fit(x_train, y_train)

    # Save the trained model using joblib
    model_filename = f"{clf_name.replace(' ', '_').lower()}_model.joblib"
    joblib.dump(clf, model_filename)
    print(f"{clf_name} trained and saved as {model_filename}\n")

print("All models trained and saved successfully!")

Training Random Forest...
Random Forest trained and saved as random_forest_model.joblib

Training Logistic Regression...
Logistic Regression trained and saved as logistic_regression_model.joblib

Training K-Nearest Neighbors...
K-Nearest Neighbors trained and saved as k-nearest_neighbors_model.joblib

Training Decision Tree...
Decision Tree trained and saved as decision_tree_model.joblib

Training Gaussian Naive Bayes...
Gaussian Naive Bayes trained and saved as gaussian_naive_bayes_model.joblib

Training Multilayer Perceptron...
Multilayer Perceptron trained and saved as multilayer_perceptron_model.joblib

Training Support Vector Machine...
Support Vector Machine trained and saved as support_vector_machine_model.joblib

Training Linear Discriminant Analysis...
Linear Discriminant Analysis trained and saved as linear_discriminant_analysis_model.joblib

Training AdaBoost...
AdaBoost trained and saved as adaboost_model.joblib

All models trained and saved successfully!


In [30]:
import joblib
import pandas as pd

def load_models():
    """
    Load all the saved models.
    Returns a dictionary containing model names as keys and the corresponding loaded model as values.
    """
    models = {}
    model_files = [
        "random_forest_model.joblib",
        "logistic_regression_model.joblib",
        "k-nearest_neighbors_model.joblib",
        "decision_tree_model.joblib",
        "gaussian_naive_bayes_model.joblib",
        "multilayer_perceptron_model.joblib",
        "support_vector_machine_model.joblib",
        "linear_discriminant_analysis_model.joblib",
        "adaboost_model.joblib"
    ]

    for model_file in model_files:
        model_name = model_file.split('_model')[0].title().replace('_', ' ')
        model = joblib.load(model_file)
        models[model_name] = model

    return models

def make_inference(input_data):
    """
    Make predictions using all loaded models.
    :param input_data: Pandas DataFrame containing input data for inference.
    :return: Dictionary containing model names as keys and corresponding predictions as values.
    """
    models = load_models()
    predictions = {}

    for model_name, model in models.items():
        # Assuming 'input_data' is a DataFrame with the same columns as the training data
        # Adjust the input_data accordingly based on your specific needs
        model_predictions = model.predict(input_data)
        predictions[model_name] = model_predictions

    return predictions




In [33]:
# input_data sample from dataset
input_data = dataset.drop('dropout', axis=1).iloc[0]

In [35]:
# Make predictions using all models
all_predictions = make_inference([input_data])

# Print or use the predictions as needed
for model_name, model_predictions in all_predictions.items():
    print(f"Predictions from {model_name}:\n{model_predictions}")

Predictions from Random Forest:
[1]
Predictions from Logistic Regression:
[1]
Predictions from K-Nearest Neighbors:
[1]
Predictions from Decision Tree:
[1]
Predictions from Gaussian Naive Bayes:
[1]
Predictions from Multilayer Perceptron:
[1]
Predictions from Support Vector Machine:
[1]
Predictions from Linear Discriminant Analysis:
[1]
Predictions from Adaboost:
[1]


