In [4]:
import sys
sys.path.append('..')

In [5]:
import warnings
import multiprocessing
warnings.filterwarnings("ignore", category=ResourceWarning)

# Also suppress multiprocessing warnings
import sys
import os
os.environ['PYTHONWARNINGS'] = 'ignore::ResourceWarning'

# Data Loading

In [6]:
from pathlib import Path
import pandas as pd

def load_datasets(data_root: str | Path = "data",
                  tasks: tuple[str, ...] = ("binary", "multiclass"),
                  splits: tuple[str, ...] = ("train", "val", "test")) -> dict:

    data_root = Path(data_root)
    datasets  = {}

    for task in tasks:
        task_dir     = data_root / task
        task_dict    = {}

        for split in splits:
            split_dict = {}
            for kind in ("X", "y"):
                file_path = task_dir / f"{kind}_{split}.pkl"
                split_dict[kind] = pd.read_pickle(file_path)
            task_dict[split] = split_dict

        datasets[task] = task_dict

    return datasets

In [7]:
from typing import Tuple, Literal
import pandas as pd

def load_split(
    preprocessing_type: Literal["cleaned_only", "full_process"],
    sampling_method: Literal["undersampled", "oversampled"],
    classification_type: Literal["binary", "multiclass"]
) -> Tuple[
    Tuple[pd.DataFrame, pd.Series],  # train: (X_train, y_train)
    Tuple[pd.DataFrame, pd.Series],  # val: (X_val, y_val)
    Tuple[pd.DataFrame, pd.Series]   # test: (X_test, y_test)
]:
    """
    Load different types of splits from the data
    
    Args:
        preprocessing_type: must be "cleaned_only" or "full_process"
        sampling_method: must be "undersampled" or "oversampled"
        classification_type: must be "binary" or "multiclass"
    
    Returns:
        Tuple of (train, val, test) splits, where each split is (X, y)
        - train: (X_train, y_train)
        - val: (X_val, y_val)  
        - test: (X_test, y_test)
    """
    dataset = load_datasets(
        f"../data/{preprocessing_type}/{sampling_method}")[classification_type]
    split_names = ["train", "val", "test"]

    return tuple([(lambda split: (dataset[split]["X"], dataset[split]["y"]))(split) for split in split_names])

# Experiments

In [8]:
def combine_text(X):
    X = X.copy()

    combined = X["resume_text"].astype(
        str) + " [SEP] " + X["job_description_text"].astype(str)

    return combined.values

In [9]:
splits = load_split(preprocessing_type="cleaned_only", sampling_method="undersampled", classification_type="binary")

In [10]:
SEED = 42

## Experiment 3: Feature Selection methods

### F-Test Configurations

Best for:
- EBM F-test 1000 @ 65.05%
- Ridge F-test: 1000 @ 64.94%
- LogReg F-test: 1000 @ 65.11%

In [30]:
from utils import ExperimentManager, Experiment

f_test_manager = ExperimentManager(f"../runs/ensembles/f-test/", ["Fit", "Not Fit"])

In [31]:
k_sizes = [10, 50, 100, 500, 1000, 5000]

In [32]:
from sklearn.feature_selection import f_classif

for size in k_sizes:

    def pipeline_factory(params):

        clf_lr = LogisticRegression(random_state=SEED)
        clf_rf = RandomForestClassifier(random_state=SEED)
        clf_nb = BernoulliNB()

        stacking_clf = StackingClassifier(
            estimators=[
                ('lr', clf_lr),
                ('nb', clf_nb),
                ('rf', clf_rf)
            ],
            final_estimator=ExplainableBoostingClassifier(random_state=SEED),
            cv=5,
            n_jobs=1
        )

        return Pipeline([
            ("join", FunctionTransformer(combine_text, validate=False)),
            ('tfidf', TfidfVectorizer()),  # Convert text to numeric
            ('selector', SelectKBest(f_classif, k=size)),
            ('clf', stacking_clf)
        ])
    
    experiment = Experiment(
        name=f"EBM meta f-test {size} stack",
        description=f"Stack ensemble classifier with EBM classifier and f-test {size}",
        pipeline_factory=pipeline_factory
    )

    f_test_manager.run_experiment(experiment, splits=splits)



=== Running Experiment: EBM meta f-test 10 stack ===

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.5210

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.5013
   Micro F1:     0.5210
   Weighted F1:  0.5013

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.5250  R: 0.5210
   Micro    - P: 0.5210  R: 0.5210
   Weighted - P: 0.5250  R: 0.5210

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5150     0.7200     0.6005        857
   Not Fit              0.5349     0.3221     0.4020        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5250     0.5210     0.5013       1714
   weighted avg         0.5250     0.5210     0.5013       1714

🔢 CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted →
   True ↓        Fit  Not Fit 
   Fit   

In [33]:
for size in k_sizes:

    def pipeline_factory(params):

        clf_lr = LogisticRegression(random_state=SEED)
        clf_rf = RandomForestClassifier(random_state=SEED)
        clf_nb = BernoulliNB()

        stacking_clf = StackingClassifier(
            estimators=[
                ('lr', clf_lr),
                ('nb', clf_nb),
                ('rf', clf_rf)
            ],
            final_estimator=RidgeClassifier(random_state=SEED),
            cv=5,
            n_jobs=1
        )

        return Pipeline([
            ("join", FunctionTransformer(combine_text, validate=False)),
            ('tfidf', TfidfVectorizer()),  # Convert text to numeric
            ('selector', SelectKBest(f_classif, k=size)),
            ('clf', stacking_clf)
        ])
    
    experiment = Experiment(
        name=f"ridge meta f-test {size} stack",
        description=f"Stack ensemble classifier with ridge classifier and f-test {size}",
        pipeline_factory=pipeline_factory
    )

    f_test_manager.run_experiment(experiment, splits=splits)


=== Running Experiment: ridge meta f-test 10 stack ===

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.5152

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.4964
   Micro F1:     0.5152
   Weighted F1:  0.4964

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.5178  R: 0.5152
   Micro    - P: 0.5152  R: 0.5152
   Weighted - P: 0.5178  R: 0.5152

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5109     0.7083     0.5936        857
   Not Fit              0.5247     0.3221     0.3991        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5178     0.5152     0.4964       1714
   weighted avg         0.5178     0.5152     0.4964       1714

🔢 CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted →
   True ↓        Fit  Not Fit 
   Fit 

In [34]:
for size in k_sizes:

    def pipeline_factory(params):

        clf_lr = LogisticRegression(random_state=SEED)
        clf_rf = RandomForestClassifier(random_state=SEED)
        clf_nb = BernoulliNB()

        stacking_clf = StackingClassifier(
            estimators=[
                ('lr', clf_lr),
                ('nb', clf_nb),
                ('rf', clf_rf)
            ],
            final_estimator=LogisticRegression(random_state=SEED),
            cv=5,
            n_jobs=1
        )

        return Pipeline([
            ("join", FunctionTransformer(combine_text, validate=False)),
            ('tfidf', TfidfVectorizer()),  # Convert text to numeric
            ('selector', SelectKBest(f_classif, k=size)),
            ('clf', stacking_clf)
        ])
    
    experiment = Experiment(
        name=f"LogReg meta f-test{size} stack",
        description=f"Stack ensemble classifier with LogReg classifier and f-test{size}",
        pipeline_factory=pipeline_factory
    )

    f_test_manager.run_experiment(experiment, splits=splits)


=== Running Experiment: LogReg meta f-test10 stack ===

🎯 TEST SET EVALUATION RESULTS

📊 OVERVIEW
   Test Samples: 1,714
   Classes: 2
   Overall Accuracy: 0.5152

🎯 MAIN PERFORMANCE METRICS
   Macro F1:     0.4954
   Micro F1:     0.5152
   Weighted F1:  0.4954

📈 PRECISION/RECALL SUMMARY
   Macro    - P: 0.5180  R: 0.5152
   Micro    - P: 0.5152  R: 0.5152
   Weighted - P: 0.5180  R: 0.5152

📋 DETAILED CLASSIFICATION REPORT
   Class             Precision     Recall   F1-Score    Support
   ---------------- ---------- ---------- ---------- ----------
   Fit                  0.5109     0.7130     0.5952        857
   Not Fit              0.5251     0.3174     0.3956        857
   ---------------- ---------- ---------- ---------- ----------
   macro avg            0.5180     0.5152     0.4954       1714
   weighted avg         0.5180     0.5152     0.4954       1714

🔢 CONFUSION MATRIX
   Rows: True Labels, Columns: Predicted Labels
   Predicted →
   True ↓        Fit  Not Fit 
   Fit 

In [35]:
f_test_manager.compare_experiments()

filename = "f-test_selector_comparisons.csv"
dir = "../experiment_summaries/ensemble"

f_test_manager.export_experiment_summary(dir, filename)
f_test_manager.close()


=== Experiment Comparison (accuracy) ===
Experiment                     Test Score   Status    
-------------------------------------------------------
EBM meta f-test 10 stack       0.5210       ✅ Completed
EBM meta f-test 50 stack       0.5403       ✅ Completed
EBM meta f-test 100 stack      0.5898       ✅ Completed
EBM meta f-test 500 stack      0.6354       ✅ Completed
EBM meta f-test 1000 stack     0.6406       ✅ Completed
EBM meta f-test 5000 stack     0.6459       ✅ Completed
ridge meta f-test 10 stack     0.5152       ✅ Completed
ridge meta f-test 50 stack     0.5333       ✅ Completed
ridge meta f-test 100 stack    0.5881       ✅ Completed
ridge meta f-test 500 stack    0.6301       ✅ Completed
ridge meta f-test 1000 stack   0.6429       ✅ Completed
ridge meta f-test 5000 stack   0.6546       ✅ Completed
LogReg meta f-test10 stack     0.5152       ✅ Completed
LogReg meta f-test50 stack     0.5274       ✅ Completed
LogReg meta f-test100 stack    0.5852       ✅ Completed
LogReg 

Exception ignored in: <function ResourceTracker.__del__ at 0x7fa50d399da0>
Traceback (most recent call last):
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7faf9ba91da0>
Traceback (most recent call last):
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 77, in __del__
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 86, in _stop
  File "/home/maveron/.conda/envs/304/lib/python3.12/multiprocessing/resource_tracker.py", line 111, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exceptio