In [5]:
# ------------- Setup and Imports -------------
import pandas as pd
import numpy as np
import os
import sys

sys.path.append("../")

from src.functions import *
from sklearn.svm import SVR
from sklearn.pipeline import Pipeline
from sklearn.impute import KNNImputer
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import BayesianRidge
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import FunctionTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score


# Seed for reproducibility
np.random.seed(42)

In [13]:
# ------------- Load Datasets -------------
dev_df = load_dataset("../data/development_final_data.csv")
val_df = load_dataset("../data/evaluation_final_data.csv")


dev_df_raw = pd.read_csv('~/Assignment-1/data/assignment1_dev_set.csv')
val_df_raw = pd.read_csv('~/Assignment-1/data/assignment1_val_set.csv')

# ------------- Split target and features -------------
X_dev, y_dev = split_features_target(dev_df, 'BMI')
X_val, y_val = split_features_target(val_df, 'BMI')

y_dev = dev_df_raw['BMI']
y_val = val_df_raw['BMI']


# ------------- Directories for Saving Models -------------
os.makedirs("../models", exist_ok=True)
os.makedirs("../final_models", exist_ok=True)

In [14]:
# Define BMI threshold 
threshold = 25  

# Convert to binary target
y_dev_class = (y_dev >= threshold).astype(int)
y_val_class = (y_val >= threshold).astype(int)


In [15]:
# Initialize classifiers
classifiers = {
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "GaussianNB": GaussianNB()
}

for name, clf in classifiers.items():
    model = train_model(X_dev, y_dev_class, clf)
    
    # Save the model
    save_model(model, f"../models/{name}_classifier.joblib")
    
    # Evaluate
    metrics = evaluate_classification_model(model, X_dev, y_dev_class, X_val, y_val_class)
    print(f"Metrics for {name}:\n", pd.DataFrame([metrics]))

Metrics for LogisticRegression:
    train_accuracy  train_precision  train_recall  train_f1  test_accuracy  \
0        0.873211         0.865169      0.898833  0.881679       0.696682   

   test_precision  test_recall   test_f1  
0        0.711864     0.736842  0.724138  
Metrics for GaussianNB:
    train_accuracy  train_precision  train_recall  train_f1  test_accuracy  \
0        0.742331         0.722034      0.828794  0.771739        0.64455   

   test_precision  test_recall   test_f1  
0        0.644444     0.763158  0.698795  
