# Setup

In [64]:
from pathlib import Path
import pandas as pd
import numpy as np
import random, os

In [65]:
import sys
sys.path.append('..')

In [66]:
RNG = 42
np.random.seed(RNG)
random.seed(RNG)
os.environ["PYTHONHASHSEED"] = str(RNG)

In [67]:
def load_datasets(data_root: str | Path = "data",
                  tasks: tuple[str, ...] = ("binary", "multiclass"),
                  splits: tuple[str, ...] = ("train", "val", "test")) -> dict:

    data_root = Path(data_root)
    datasets  = {}

    for task in tasks:
        task_dir     = data_root / task
        task_dict    = {}

        for split in splits:
            split_dict = {}
            for kind in ("X", "y"):
                file_path = task_dir / f"{kind}_{split}.pkl"
                split_dict[kind] = pd.read_pickle(file_path)
            task_dict[split] = split_dict

        datasets[task] = task_dict

    return datasets

In [68]:
from typing import Tuple, Literal
import pandas as pd

def load_split(
    preprocessing_type: Literal["cleaned_only", "full_process"], 
    sampling_method: Literal["undersampled", "oversampled"],
    classification_type: Literal["binary", "multiclass"] 
) -> Tuple[
    Tuple[pd.DataFrame, pd.Series],  # train: (X_train, y_train)
    Tuple[pd.DataFrame, pd.Series],  # val: (X_val, y_val)
    Tuple[pd.DataFrame, pd.Series]   # test: (X_test, y_test)
]:
    """
    Load different types of splits from the data
    
    Args:
        preprocessing_type: must be "cleaned_only" or "full_process"
        sampling_method: must be "undersampled" or "oversampled"
        classification_type: must be "binary" or "multiclass"
    
    Returns:
        Tuple of (train, val, test) splits, where each split is (X, y)
        - train: (X_train, y_train)
        - val: (X_val, y_val)  
        - test: (X_test, y_test)
    """
    dataset = load_datasets(f"../data/{preprocessing_type}/{sampling_method}")[classification_type]
    split_names = ["train", "val", "test"]
    
    return tuple([(lambda split: (dataset[split]["X"], dataset[split]["y"]))(split) for split in split_names])

# Baseline Comparisons 

Goal is to identify which models to use for ensemble as well as see which dataset would provide better results

In [69]:
def combine_text(X):
    X = X.copy() 

    combined = X["resume_text"].astype(str) + " [SEP] " + X["job_description_text"].astype(str)

    return combined.values

In [70]:
# Pipeline components
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer
from sklearn.feature_extraction.text import TfidfVectorizer

## Binary

In [71]:
from utils import ExperimentManager, Experiment

manager = ExperimentManager(f"../runs/binary/baselines/", ["Fit", "Not Fit"])
CLASSIFICATION_TYPE = "binary"

### Linear Models

RidgeClassifier performs the best. Unclear if stop word removal and lemmatization are beneficial. Undersampling performs the best.


In [72]:
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import LinearSVC

linear_models = {
    "Logistic Regression": LogisticRegression(
        penalty='l2',
        C=1.0,
        solver='lbfgs',
        max_iter=1000,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    ),
    
    "Ridge": RidgeClassifier(
        alpha=1.0, 
        class_weight='balanced', 
        random_state=42
    ),
    
    "LinearSVC": LinearSVC(
        C=1.0,
        class_weight='balanced',
        dual=False,
        random_state=42,
        max_iter=2000
    )
}

MODEL_FAMILY = "linear_models"

In [73]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6004

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.6000
   Micro F1:     0.6004
   Weighted F1:  0.6000

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.6007  R: 0.6004
   Micro    - P: 0.6004  R: 0.6004
   Weighted - P: 0.6007  R: 0.6004

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5951     0.6278     0.6110        857
   Not Fit              0.6062     0.5729     0.5891        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6007     0.6004     0.6000       1714
   weighted avg         0.6007     0.6004     0.6000       1714

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“   

In [74]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.6004       âœ… Completed
Ridge classifier               0.6009       âœ… Completed
LinearSVC classifier           0.5980       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_undersampled.csv


In [75]:

PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.5831

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.5827
   Micro F1:     0.5831
   Weighted F1:  0.5827

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.5835  R: 0.5831
   Micro    - P: 0.5831  R: 0.5831
   Weighted - P: 0.5835  R: 0.5831

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5783     0.6142     0.5957        902
   Not Fit              0.5887     0.5521     0.5698        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5835     0.5831     0.5827       1804
   weighted avg         0.5835     0.5831     0.5827       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“   

In [76]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.5831       âœ… Completed
Ridge classifier               0.5804       âœ… Completed
LinearSVC classifier           0.5765       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_undersampled.csv


In [77]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.5898

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.5896
   Micro F1:     0.5898
   Weighted F1:  0.5896

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.5900  R: 0.5898
   Micro    - P: 0.5898  R: 0.5898
   Weighted - P: 0.5900  R: 0.5898

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5858     0.6131     0.5991        902
   Not Fit              0.5942     0.5665     0.5800        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5900     0.5898     0.5896       1804
   weighted avg         0.5900     0.5898     0.5896       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“   

In [78]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.5898       âœ… Completed
Ridge classifier               0.5931       âœ… Completed
LinearSVC classifier           0.5759       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_oversampled.csv


In [79]:
PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.5898

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.5896
   Micro F1:     0.5898
   Weighted F1:  0.5896

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.5900  R: 0.5898
   Micro    - P: 0.5898  R: 0.5898
   Weighted - P: 0.5900  R: 0.5898

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5858     0.6131     0.5991        902
   Not Fit              0.5942     0.5665     0.5800        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5900     0.5898     0.5896       1804
   weighted avg         0.5900     0.5898     0.5896       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“   

In [80]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.5898       âœ… Completed
Ridge classifier               0.5931       âœ… Completed
LinearSVC classifier           0.5759       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_oversampled.csv


### Tree Models

RidgeClassifier performs the best. Unclear if stop word removal and lemmatization are beneficial. Undersampling performs the best.


In [81]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

tree_models = {
    "Random Forest": RandomForestClassifier(
        n_estimators=100,
        max_depth=None,
        min_samples_split=5,
        min_samples_leaf=2,
        max_features='sqrt',
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    ),
    
    "ExtraTrees": ExtraTreesClassifier(
        n_estimators=100,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1,
        max_depth=None,
        min_samples_split=5,
        min_samples_leaf=2
    )
}

MODEL_FAMILY = "tree_models"

In [82]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.6476

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.6475
   Micro F1:     0.6476
   Weighted F1:  0.6475

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.6478  R: 0.6476
   Micro    - P: 0.6476  R: 0.6476
   Weighted - P: 0.6478  R: 0.6476

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6420     0.6674     0.6545        857
   Not Fit              0.6537     0.6278     0.6405        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6478     0.6476     0.6475       1714
   weighted avg         0.6478     0.6476     0.6475       1714

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [83]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.6476       âœ… Completed
ExtraTrees classifier          0.6342       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_undersampled.csv


In [84]:

PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.6447

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.6446
   Micro F1:     0.6447
   Weighted F1:  0.6446

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.6448  R: 0.6447
   Micro    - P: 0.6447  R: 0.6447
   Weighted - P: 0.6448  R: 0.6447

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6411     0.6574     0.6492        902
   Not Fit              0.6485     0.6319     0.6401        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6448     0.6447     0.6446       1804
   weighted avg         0.6448     0.6447     0.6446       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [85]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.6447       âœ… Completed
ExtraTrees classifier          0.6253       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_undersampled.csv


In [86]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.6341

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.6341
   Micro F1:     0.6341
   Weighted F1:  0.6341

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.6341  R: 0.6341
   Micro    - P: 0.6341  R: 0.6341
   Weighted - P: 0.6341  R: 0.6341

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6344     0.6330     0.6337        902
   Not Fit              0.6338     0.6353     0.6346        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6341     0.6341     0.6341       1804
   weighted avg         0.6341     0.6341     0.6341       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [87]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.6341       âœ… Completed
ExtraTrees classifier          0.6347       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_oversampled.csv


In [88]:
PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.6341

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.6341
   Micro F1:     0.6341
   Weighted F1:  0.6341

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.6341  R: 0.6341
   Micro    - P: 0.6341  R: 0.6341
   Weighted - P: 0.6341  R: 0.6341

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.6344     0.6330     0.6337        902
   Not Fit              0.6338     0.6353     0.6346        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.6341     0.6341     0.6341       1804
   weighted avg         0.6341     0.6341     0.6341       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [89]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.6341       âœ… Completed
ExtraTrees classifier          0.6347       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_oversampled.csv


### Naive Bayes Models

RidgeClassifier performs the best. Unclear if stop word removal and lemmatization are beneficial. Undersampling performs the best.


In [90]:
from sklearn.naive_bayes import MultinomialNB, BernoulliNB, ComplementNB

naive_bayes_models = {
    "MultinomialNB": MultinomialNB(
        alpha=1.0,
        fit_prior=True
    ),
    
    "BernoulliNB": BernoulliNB(
        alpha=1.0,
        binarize=0.0,
        fit_prior=True
    ),
    
    "ComplementNB": ComplementNB(
        alpha=1.0,
        fit_prior=True,
        norm=False
    )
}

MODEL_FAMILY = "naive_bayes_models"

In [91]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.5951

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.5930
   Micro F1:     0.5951
   Weighted F1:  0.5930

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.5971  R: 0.5951
   Micro    - P: 0.5951  R: 0.5951
   Weighted - P: 0.5971  R: 0.5951

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5832     0.6663     0.6220        857
   Not Fit              0.6109     0.5239     0.5641        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5971     0.5951     0.5930       1714
   weighted avg         0.5971     0.5951     0.5930       1714

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [92]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.5951       âœ… Completed
BernoulliNB classifier         0.6050       âœ… Completed
ComplementNB classifier        0.5951       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_undersampled.csv


In [93]:

PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.5820

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.5803
   Micro F1:     0.5820
   Weighted F1:  0.5803

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.5834  R: 0.5820
   Micro    - P: 0.5820  R: 0.5820
   Weighted - P: 0.5834  R: 0.5820

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5727     0.6463     0.6073        902
   Not Fit              0.5941     0.5177     0.5533        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5834     0.5820     0.5803       1804
   weighted avg         0.5834     0.5820     0.5803       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [94]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.5820       âœ… Completed
BernoulliNB classifier         0.5831       âœ… Completed
ComplementNB classifier        0.5820       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_undersampled.csv


In [95]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.5754

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.5734
   Micro F1:     0.5754
   Weighted F1:  0.5734

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.5768  R: 0.5754
   Micro    - P: 0.5754  R: 0.5754
   Weighted - P: 0.5768  R: 0.5754

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5664     0.6430     0.6023        902
   Not Fit              0.5872     0.5078     0.5446        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5768     0.5754     0.5734       1804
   weighted avg         0.5768     0.5754     0.5734       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [96]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.5754       âœ… Completed
BernoulliNB classifier         0.5859       âœ… Completed
ComplementNB classifier        0.5754       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_oversampled.csv


In [97]:
PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,804
   Classes: 2
   Overall Accuracy: 0.5754

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.5734
   Micro F1:     0.5754
   Weighted F1:  0.5734

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.5768  R: 0.5754
   Micro    - P: 0.5754  R: 0.5754
   Weighted - P: 0.5768  R: 0.5754

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5664     0.6430     0.6023        902
   Not Fit              0.5872     0.5078     0.5446        902
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5768     0.5754     0.5734       1804
   weighted avg         0.5768     0.5754     0.5734       1804

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted â†’
   True â†“        F

In [98]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.5754       âœ… Completed
BernoulliNB classifier         0.5859       âœ… Completed
ComplementNB classifier        0.5754       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_oversampled.csv


## Multiclass

In [99]:
from utils import ExperimentManager, Experiment

CLASSIFICATION_TYPE = "multiclass"
manager = ExperimentManager(f"../runs/{CLASSIFICATION_TYPE}/baselines/", ["Good Fit", "Potential Fit", "Not Fit"])


### Linear Models

RidgeClassifier performs the best. Unclear if stop word removal and lemmatization are beneficial. Undersampling performs the best.


In [100]:
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.svm import LinearSVC

linear_models = {
    "Logistic Regression": LogisticRegression(
        penalty='l2',
        C=1.0,
        solver='lbfgs',
        max_iter=1000,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    ),
    
    "Ridge": RidgeClassifier(
        alpha=1.0, 
        class_weight='balanced', 
        random_state=42
    ),
    
    "LinearSVC": LinearSVC(
        C=1.0,
        class_weight='balanced',
        dual=False,
        random_state=42,
        max_iter=2000
    )
}

MODEL_FAMILY = "linear_models"

In [101]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,332
   Classes: 3
   Overall Accuracy: 0.4324

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4317
   Micro F1:     0.4324
   Weighted F1:  0.4317

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4328  R: 0.4324
   Micro    - P: 0.4324  R: 0.4324
   Weighted - P: 0.4328  R: 0.4324

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4559     0.4077     0.4304        444
   Not Fit              0.4442     0.4932     0.4674        444
   Potential Fit        0.3982     0.3964     0.3973        444
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4328     0.4324     0.4317       1332
   weighted avg         0.4328     0.4324     0.4317       1332

ðŸ”¢ CONFUSION MATRIX
   Rows: True L

In [102]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.4324       âœ… Completed
Ridge classifier               0.4369       âœ… Completed
LinearSVC classifier           0.4317       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_undersampled.csv


In [103]:

PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4539

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4538
   Micro F1:     0.4539
   Weighted F1:  0.4538

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4550  R: 0.4539
   Micro    - P: 0.4539  R: 0.4539
   Weighted - P: 0.4550  R: 0.4539

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4903     0.4422     0.4650        857
   Not Fit              0.4593     0.5006     0.4791        857
   Potential Fit        0.4155     0.4189     0.4172        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4550     0.4539     0.4538       2571
   weighted avg         0.4550     0.4539     0.4538       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True L

In [104]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.4539       âœ… Completed
Ridge classifier               0.4547       âœ… Completed
LinearSVC classifier           0.4411       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_undersampled.csv


In [105]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4508

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4502
   Micro F1:     0.4508
   Weighted F1:  0.4502

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4513  R: 0.4508
   Micro    - P: 0.4508  R: 0.4508
   Weighted - P: 0.4513  R: 0.4508

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4690     0.4142     0.4399        857
   Not Fit              0.4663     0.5088     0.4866        857
   Potential Fit        0.4187     0.4294     0.4240        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4513     0.4508     0.4502       2571
   weighted avg         0.4513     0.4508     0.4502       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True L

In [106]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.4508       âœ… Completed
Ridge classifier               0.4422       âœ… Completed
LinearSVC classifier           0.4356       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_oversampled.csv


In [107]:
PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in linear_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Logistic Regression classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4508

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4502
   Micro F1:     0.4508
   Weighted F1:  0.4502

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4513  R: 0.4508
   Micro    - P: 0.4508  R: 0.4508
   Weighted - P: 0.4513  R: 0.4508

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4690     0.4142     0.4399        857
   Not Fit              0.4663     0.5088     0.4866        857
   Potential Fit        0.4187     0.4294     0.4240        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4513     0.4508     0.4502       2571
   weighted avg         0.4513     0.4508     0.4502       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True L

In [108]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Logistic Regression classifier 0.4508       âœ… Completed
Ridge classifier               0.4422       âœ… Completed
LinearSVC classifier           0.4356       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_oversampled.csv


### Tree Models

RidgeClassifier performs the best. Unclear if stop word removal and lemmatization are beneficial. Undersampling performs the best.


In [109]:
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier

tree_models = {
    "Random Forest": RandomForestClassifier(
        n_estimators=100,
        max_depth=None,
        min_samples_split=5,
        min_samples_leaf=2,
        max_features='sqrt',
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    ),
    
    "ExtraTrees": ExtraTreesClassifier(
        n_estimators=100,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1,
        max_depth=None,
        min_samples_split=5,
        min_samples_leaf=2
    )
}

MODEL_FAMILY = "tree_models"

In [110]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,332
   Classes: 3
   Overall Accuracy: 0.4459

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4453
   Micro F1:     0.4459
   Weighted F1:  0.4453

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4511  R: 0.4459
   Micro    - P: 0.4459  R: 0.4459
   Weighted - P: 0.4511  R: 0.4459

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.5029     0.3919     0.4405        444
   Not Fit              0.4497     0.5135     0.4795        444
   Potential Fit        0.4008     0.4324     0.4160        444
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4511     0.4459     0.4453       1332
   weighted avg         0.4511     0.4459     0.4453       1332

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [111]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.4459       âœ… Completed
ExtraTrees classifier          0.4467       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_undersampled.csv


In [112]:

PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4391

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4260
   Micro F1:     0.4391
   Weighted F1:  0.4260

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4487  R: 0.4391
   Micro    - P: 0.4391  R: 0.4391
   Weighted - P: 0.4487  R: 0.4391

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4971     0.2964     0.3713        857
   Not Fit              0.4254     0.6616     0.5178        857
   Potential Fit        0.4237     0.3594     0.3889        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4487     0.4391     0.4260       2571
   weighted avg         0.4487     0.4391     0.4260       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [113]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.4391       âœ… Completed
ExtraTrees classifier          0.4516       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_undersampled.csv


In [114]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4539

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4455
   Micro F1:     0.4539
   Weighted F1:  0.4455

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4729  R: 0.4539
   Micro    - P: 0.4539  R: 0.4539
   Weighted - P: 0.4729  R: 0.4539

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.5614     0.3256     0.4121        857
   Not Fit              0.4267     0.6418     0.5126        857
   Potential Fit        0.4306     0.3944     0.4117        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4729     0.4539     0.4455       2571
   weighted avg         0.4729     0.4539     0.4455       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [115]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.4539       âœ… Completed
ExtraTrees classifier          0.4255       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_oversampled.csv


In [116]:
PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in tree_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: Random Forest classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4539

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4455
   Micro F1:     0.4539
   Weighted F1:  0.4455

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4729  R: 0.4539
   Micro    - P: 0.4539  R: 0.4539
   Weighted - P: 0.4729  R: 0.4539

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.5614     0.3256     0.4121        857
   Not Fit              0.4267     0.6418     0.5126        857
   Potential Fit        0.4306     0.3944     0.4117        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4729     0.4539     0.4455       2571
   weighted avg         0.4729     0.4539     0.4455       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [117]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
Random Forest classifier       0.4539       âœ… Completed
ExtraTrees classifier          0.4255       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_oversampled.csv


### Naive Bayes Models

RidgeClassifier performs the best. Unclear if stop word removal and lemmatization are beneficial. Undersampling performs the best.


In [118]:
from sklearn.naive_bayes import MultinomialNB, BernoulliNB, ComplementNB

naive_bayes_models = {
    "MultinomialNB": MultinomialNB(
        alpha=1.0,
        fit_prior=True
    ),
    
    "BernoulliNB": BernoulliNB(
        alpha=1.0,
        binarize=0.0,
        fit_prior=True
    ),
    
    "ComplementNB": ComplementNB(
        alpha=1.0,
        fit_prior=True,
        norm=False
    )
}

MODEL_FAMILY = "naive_bayes_models"

In [119]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 1,332
   Classes: 3
   Overall Accuracy: 0.4287

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4248
   Micro F1:     0.4287
   Weighted F1:  0.4248

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4283  R: 0.4287
   Micro    - P: 0.4287  R: 0.4287
   Weighted - P: 0.4283  R: 0.4287

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4291     0.5450     0.4802        444
   Not Fit              0.4665     0.4077     0.4351        444
   Potential Fit        0.3895     0.3333     0.3592        444
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4283     0.4287     0.4248       1332
   weighted avg         0.4283     0.4287     0.4248       1332

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [120]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.4287       âœ… Completed
BernoulliNB classifier         0.4497       âœ… Completed
ComplementNB classifier        0.4384       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_undersampled.csv


In [121]:

PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "undersampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4465

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4459
   Micro F1:     0.4465
   Weighted F1:  0.4459

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4498  R: 0.4465
   Micro    - P: 0.4465  R: 0.4465
   Weighted - P: 0.4498  R: 0.4465

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4613     0.4936     0.4769        857
   Not Fit              0.4791     0.3874     0.4284        857
   Potential Fit        0.4089     0.4586     0.4323        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4498     0.4465     0.4459       2571
   weighted avg         0.4498     0.4465     0.4459       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [122]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.4465       âœ… Completed
BernoulliNB classifier         0.4364       âœ… Completed
ComplementNB classifier        0.4489       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_undersampled.csv


In [123]:
PREPROCESSING_TYPE = "cleaned_only"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4411

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4401
   Micro F1:     0.4411
   Weighted F1:  0.4401

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4439  R: 0.4411
   Micro    - P: 0.4411  R: 0.4411
   Weighted - P: 0.4439  R: 0.4411

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4613     0.4936     0.4769        857
   Not Fit              0.4666     0.3746     0.4155        857
   Potential Fit        0.4037     0.4551     0.4279        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4439     0.4411     0.4401       2571
   weighted avg         0.4439     0.4411     0.4401       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [124]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []



=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.4411       âœ… Completed
BernoulliNB classifier         0.4321       âœ… Completed
ComplementNB classifier        0.4430       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_cleaned_only_oversampled.csv


In [125]:
PREPROCESSING_TYPE = "full_process"
SAMPLING_METHOD = "oversampled"

splits = load_split(
    preprocessing_type=PREPROCESSING_TYPE,
    sampling_method=SAMPLING_METHOD,
    classification_type=CLASSIFICATION_TYPE
    )

for name, model in naive_bayes_models.items():

    # Pipeline
    def pipeline_factory(params):
        # Since weâ€™re not using params here, we just return the static pipeline
        return Pipeline([
        ("join", FunctionTransformer(combine_text, validate=False)),
        ('tfidf', TfidfVectorizer()),  # Convert text to numeric
        ('clf', model)  
    ])

    experiment = Experiment(
        name=f"{name} classifier",
        description=f"""
{CLASSIFICATION_TYPE} {name} with TF-IDF and no hyperparameter tuning. 
Dataset has {PREPROCESSING_TYPE} preprocessing and is {SAMPLING_METHOD}
""",
        pipeline_factory=pipeline_factory
    )

    print("========= Training Baseline Models =========")
    manager.run_experiment(experiment, splits=splits)


=== Running Experiment: MultinomialNB classifier ===

ðŸŽ¯ TEST SET EVALUATION RESULTS

ðŸ“Š OVERVIEW
   Test Samples: 2,571
   Classes: 3
   Overall Accuracy: 0.4411

ðŸŽ¯ MAIN PERFORMANCE METRICS
   Macro F1:     0.4401
   Micro F1:     0.4411
   Weighted F1:  0.4401

ðŸ“ˆ PRECISION/RECALL SUMMARY
   Macro    - P: 0.4439  R: 0.4411
   Micro    - P: 0.4411  R: 0.4411
   Weighted - P: 0.4439  R: 0.4411

ðŸ“‹ DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Good Fit             0.4613     0.4936     0.4769        857
   Not Fit              0.4666     0.3746     0.4155        857
   Potential Fit        0.4037     0.4551     0.4279        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.4439     0.4411     0.4401       2571
   weighted avg         0.4439     0.4411     0.4401       2571

ðŸ”¢ CONFUSION MATRIX
   Rows: True Labels,

In [None]:
dir = f"../experiment_summaries/{MODEL_FAMILY}/{CLASSIFICATION_TYPE}"
file = f"summary_{PREPROCESSING_TYPE}_{SAMPLING_METHOD}.csv"

manager.compare_experiments()
manager.export_experiment_summary(dir = dir, filename = file)
manager.experiments = []


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
MultinomialNB classifier       0.4411       âœ… Completed
BernoulliNB classifier         0.4321       âœ… Completed
ComplementNB classifier        0.4430       âœ… Completed
ðŸ“Š Experiment summary exported to: summary_full_process_oversampled.csv


Exception ignored in: <function ResourceTracker.__del__ at 0x7fa68eba8540>
Traceback (most recent call last):
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7fbbf54ac540>
Traceback (most recent call last):
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exceptio