In [37]:
import pandas as pd
import numpy as np

from sklearn.dummy import DummyClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


In [38]:
df = pd.read_parquet("en_merged_df.parquet")

In [39]:
df["speaker_id"] = df["filename"].apply(lambda x: x.split("/wav/")[0])

In [40]:
unique_speakers = df["speaker_id"].unique()

In [41]:
train_speakers, temp_speakers = train_test_split(unique_speakers, test_size=0.3, random_state=42)
val_speakers, test_speakers = train_test_split(temp_speakers, test_size=0.5, random_state=42)

In [42]:
def assign_split(speaker_id):
    if speaker_id in train_speakers:
        return "train"
    elif speaker_id in val_speakers:
        return "val"
    elif speaker_id in test_speakers:
        return "test"
    else:
        return "unknown"  # safety net

df["split"] = df["speaker_id"].apply(assign_split)

In [43]:
df["split"].value_counts()

split
train    31464
val       7076
test      6755
Name: count, dtype: int64

In [44]:
le = LabelEncoder()
df["gender_encoded"] = le.fit_transform(df["gender"])

# Confirm encoding
print("Label encoding map:", dict(zip(le.classes_, le.transform(le.classes_))))

Label encoding map: {'female': 0, 'male': 1}


In [45]:
feature_cols = [
    "mean_freq_kHz",     # Central tendency
    "std_freq_kHz",      # Spread of frequencies
    "skewness",          # Shape of spectrum
    "kurtosis",          # Shape of spectrum
    "mode_freq_kHz",     # Dominant frequency
    "sp_entropy",        # Spectral entropy
    "flatness",          # Spectral flatness (tonal vs. noise-like)
    "centroid_kHz",      # Spectral centroid (perceived brightness)
    "modindx"            # Modulation index (amplitude variation)
    "age_range"
]


In [58]:
X_train = df[df["split"] == "train"][feature_cols]
y_train = df[df["split"] == "train"]["gender_encoded"]

X_val = df[df["split"] == "val"][feature_cols]
y_val = df[df["split"] == "val"]["gender_encoded"]

X_test = df[df["split"] == "test"][feature_cols]
y_test = df[df["split"] == "test"]["gender_encoded"]

# ML

In [59]:
models = {
    "Dummy (Most Frequent)": DummyClassifier(strategy="most_frequent"),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "Elastic Net (Logistic)": LogisticRegression(penalty="elasticnet", l1_ratio=0.5, solver="saga", max_iter=1000),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Naive Bayes": GaussianNB(),
    "Decision Tree": DecisionTreeClassifier(),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Gradient Boosting": GradientBoostingClassifier(),
    "AdaBoost": AdaBoostClassifier(),
    "Support Vector Machine": SVC(),
    "MLP Classifier": MLPClassifier(hidden_layer_sizes=(100,), max_iter=500),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric="logloss")
}

# SMOTE

In [48]:
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Training, validation, and test evaluation loop with SMOTE
for name, model in models.items():
    print(f"\n=== {name} ===")

    pipeline = Pipeline([
        ("smote", SMOTE(random_state=42)),
        ("scaler", StandardScaler()),
        ("classifier", model)
    ])

    # Fit on training data
    pipeline.fit(X_train, y_train)

    # Train evaluation
    train_preds = pipeline.predict(X_train)
    train_acc = accuracy_score(y_train, train_preds)
    print(f"Train Accuracy: {train_acc:.4f}")
    print("Train Classification Report:")
    print(classification_report(y_train, train_preds, target_names=le.classes_))

    # Validation evaluation
    val_preds = pipeline.predict(X_val)
    val_acc = accuracy_score(y_val, val_preds)
    print(f"Validation Accuracy: {val_acc:.4f}")
    print("Validation Classification Report:")
    print(classification_report(y_val, val_preds, target_names=le.classes_))

    # Test evaluation
    test_preds = pipeline.predict(X_test)
    test_acc = accuracy_score(y_test, test_preds)
    print(f"Test Accuracy: {test_acc:.4f}")
    print("Test Classification Report:")
    print(classification_report(y_test, test_preds, target_names=le.classes_))

# Show summary table
results_df = pd.DataFrame(results)
print("\n=== Summary Table ===")
print(results_df.sort_values(by="Test Accuracy", ascending=False).reset_index(drop=True))


=== Dummy (Most Frequent) ===
Train Accuracy: 0.0698
Train Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.13      2195
        male       0.00      0.00      0.00     29269

    accuracy                           0.07     31464
   macro avg       0.03      0.50      0.07     31464
weighted avg       0.00      0.07      0.01     31464

Validation Accuracy: 0.0725
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.14       513
        male       0.00      0.00      0.00      6563

    accuracy                           0.07      7076
   macro avg       0.04      0.50      0.07      7076
weighted avg       0.01      0.07      0.01      7076

Test Accuracy: 0.0487
Test Classification Report:
              precision    recall  f1-score   support

      female       0.05      1.00      0.09       329
        male       0.00      0.00      0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

      female       0.09      0.61      0.16      2195
        male       0.95      0.53      0.68     29269

    accuracy                           0.54     31464
   macro avg       0.52      0.57      0.42     31464
weighted avg       0.89      0.54      0.64     31464

Validation Accuracy: 0.5760
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.56      0.16       513
        male       0.94      0.58      0.72      6563

    accuracy                           0.58      7076
   macro avg       0.52      0.57      0.44      7076
weighted avg       0.88      0.58      0.68      7076

Test Accuracy: 0.5504
Test Classification Report:
              precision    recall  f1-score   support

      female       0.06      0.57      0.11       329
        male       0.96      0.55      0.70      6426

    accuracy                           0.55      6755
   macro avg    



Train Accuracy: 0.6817
Train Classification Report:
              precision    recall  f1-score   support

      female       0.14      0.68      0.23      2195
        male       0.97      0.68      0.80     29269

    accuracy                           0.68     31464
   macro avg       0.55      0.68      0.52     31464
weighted avg       0.91      0.68      0.76     31464

Validation Accuracy: 0.6844
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.13      0.61      0.22       513
        male       0.96      0.69      0.80      6563

    accuracy                           0.68      7076
   macro avg       0.55      0.65      0.51      7076
weighted avg       0.90      0.68      0.76      7076

Test Accuracy: 0.6789
Test Classification Report:
              precision    recall  f1-score   support

      female       0.08      0.55      0.14       329
        male       0.97      0.69      0.80      6426

    accuracy      

# ADASYN

In [51]:
from imblearn.over_sampling import ADASYN
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler

# Training, validation, and test evaluation loop with ADASYN
for name, model in models.items():
    print(f"\n=== {name} ===")

    pipeline = Pipeline([
        ("adasyn", ADASYN(random_state=42)),
        ("scaler", StandardScaler()),
        ("classifier", model)
    ])

    # Fit on training data
    pipeline.fit(X_train, y_train)

    # Train evaluation
    train_preds = pipeline.predict(X_train)
    train_acc = accuracy_score(y_train, train_preds)
    print(f"Train Accuracy: {train_acc:.4f}")
    print("Train Classification Report:")
    print(classification_report(y_train, train_preds, target_names=le.classes_))

    # Validation evaluation
    val_preds = pipeline.predict(X_val)
    val_acc = accuracy_score(y_val, val_preds)
    print(f"Validation Accuracy: {val_acc:.4f}")
    print("Validation Classification Report:")
    print(classification_report(y_val, val_preds, target_names=le.classes_))

    # Test evaluation
    test_preds = pipeline.predict(X_test)
    test_acc = accuracy_score(y_test, test_preds)
    print(f"Test Accuracy: {test_acc:.4f}")
    print("Test Classification Report:")
    print(classification_report(y_test, test_preds, target_names=le.classes_))

# Show summary table
results_df = pd.DataFrame(results)
print("\n=== Summary Table ===")
print(results_df.sort_values(by="Test Accuracy", ascending=False).reset_index(drop=True))



=== Dummy (Most Frequent) ===
Train Accuracy: 0.0698
Train Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.13      2195
        male       0.00      0.00      0.00     29269

    accuracy                           0.07     31464
   macro avg       0.03      0.50      0.07     31464
weighted avg       0.00      0.07      0.01     31464

Validation Accuracy: 0.0725
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.14       513
        male       0.00      0.00      0.00      6563

    accuracy                           0.07      7076
   macro avg       0.04      0.50      0.07      7076
weighted avg       0.01      0.07      0.01      7076

Test Accuracy: 0.0487
Test Classification Report:
              precision    recall  f1-score   support

      female       0.05      1.00      0.09       329
        male       0.00      0.00      0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train Accuracy: 0.5297
Train Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.62      0.15      2195
        male       0.95      0.52      0.67     29269

    accuracy                           0.53     31464
   macro avg       0.52      0.57      0.41     31464
weighted avg       0.89      0.53      0.64     31464

Validation Accuracy: 0.5715
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.57      0.16       513
        male       0.94      0.57      0.71      6563

    accuracy                           0.57      7076
   macro avg       0.52      0.57      0.44      7076
weighted avg       0.88      0.57      0.67      7076

Test Accuracy: 0.5436
Test Classification Report:
              precision    recall  f1-score   support

      female       0.06      0.56      0.11       329
        male       0.96      0.54      0.69      6426

    accuracy      



Train Accuracy: 0.6785
Train Classification Report:
              precision    recall  f1-score   support

      female       0.14      0.68      0.23      2195
        male       0.97      0.68      0.80     29269

    accuracy                           0.68     31464
   macro avg       0.55      0.68      0.51     31464
weighted avg       0.91      0.68      0.76     31464

Validation Accuracy: 0.6830
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.13      0.59      0.21       513
        male       0.96      0.69      0.80      6563

    accuracy                           0.68      7076
   macro avg       0.54      0.64      0.51      7076
weighted avg       0.90      0.68      0.76      7076

Test Accuracy: 0.6759
Test Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.60      0.15       329
        male       0.97      0.68      0.80      6426

    accuracy      

# SMOTETonek

In [54]:
from imblearn.combine import SMOTETomek
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Store results
results = []

# Training, validation, and test evaluation loop with SMOTETomek
for name, model in models.items():
    print(f"\n=== {name} ===")

    pipeline = Pipeline([
        ("smotetomek", SMOTETomek(random_state=42)),
        ("scaler", StandardScaler()),
        ("classifier", model)
    ])

    # Fit on training data
    pipeline.fit(X_train, y_train)

    # Train evaluation
    train_preds = pipeline.predict(X_train)
    train_acc = accuracy_score(y_train, train_preds)
    print(f"Train Accuracy: {train_acc:.4f}")
    print("Train Classification Report:")
    print(classification_report(y_train, train_preds, target_names=le.classes_))

    # Validation evaluation
    val_preds = pipeline.predict(X_val)
    val_acc = accuracy_score(y_val, val_preds)
    print(f"Validation Accuracy: {val_acc:.4f}")
    print("Validation Classification Report:")
    print(classification_report(y_val, val_preds, target_names=le.classes_))

    # Test evaluation
    test_preds = pipeline.predict(X_test)
    test_acc = accuracy_score(y_test, test_preds)
    print(f"Test Accuracy: {test_acc:.4f}")
    print("Test Classification Report:")
    print(classification_report(y_test, test_preds, target_names=le.classes_))

    # Save results
    results.append({
        "Model": name,
        "Train Accuracy": train_acc,
        "Validation Accuracy": val_acc,
        "Test Accuracy": test_acc
    })

# Show summary table
results_df = pd.DataFrame(results)
print("\n=== Summary Table ===")
print(results_df.sort_values(by="Test Accuracy", ascending=False).reset_index(drop=True))



=== Dummy (Most Frequent) ===
Train Accuracy: 0.0698
Train Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.13      2195
        male       0.00      0.00      0.00     29269

    accuracy                           0.07     31464
   macro avg       0.03      0.50      0.07     31464
weighted avg       0.00      0.07      0.01     31464

Validation Accuracy: 0.0725
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.14       513
        male       0.00      0.00      0.00      6563

    accuracy                           0.07      7076
   macro avg       0.04      0.50      0.07      7076
weighted avg       0.01      0.07      0.01      7076

Test Accuracy: 0.0487
Test Classification Report:
              precision    recall  f1-score   support

      female       0.05      1.00      0.09       329
        male       0.00      0.00      0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Train Accuracy: 0.5356
Train Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.62      0.16      2195
        male       0.95      0.53      0.68     29269

    accuracy                           0.54     31464
   macro avg       0.52      0.58      0.42     31464
weighted avg       0.89      0.54      0.64     31464

Validation Accuracy: 0.5733
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.55      0.16       513
        male       0.94      0.57      0.71      6563

    accuracy                           0.57      7076
   macro avg       0.52      0.56      0.44      7076
weighted avg       0.88      0.57      0.67      7076

Test Accuracy: 0.5483
Test Classification Report:
              precision    recall  f1-score   support

      female       0.06      0.57      0.11       329
        male       0.96      0.55      0.70      6426

    accuracy      



Train Accuracy: 0.6880
Train Classification Report:
              precision    recall  f1-score   support

      female       0.14      0.67      0.23      2195
        male       0.97      0.69      0.80     29269

    accuracy                           0.69     31464
   macro avg       0.55      0.68      0.52     31464
weighted avg       0.91      0.69      0.76     31464

Validation Accuracy: 0.6943
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.13      0.59      0.22       513
        male       0.96      0.70      0.81      6563

    accuracy                           0.69      7076
   macro avg       0.55      0.65      0.51      7076
weighted avg       0.90      0.69      0.77      7076

Test Accuracy: 0.6885
Test Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.58      0.15       329
        male       0.97      0.69      0.81      6426

    accuracy      

# Oversample

In [57]:
from imblearn.over_sampling import RandomOverSampler
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Store results
results_over = []

for name, model in models.items():
    print(f"\n=== {name} (RandomOverSampler) ===")

    pipeline = Pipeline([
        ("oversample", RandomOverSampler(random_state=42)),
        ("scaler", StandardScaler()),
        ("classifier", model)
    ])

    pipeline.fit(X_train, y_train)

    train_preds = pipeline.predict(X_train)
    val_preds = pipeline.predict(X_val)
    test_preds = pipeline.predict(X_test)

    train_acc = accuracy_score(y_train, train_preds)
    val_acc = accuracy_score(y_val, val_preds)
    test_acc = accuracy_score(y_test, test_preds)

    print(f"Train Accuracy: {train_acc:.4f}")
    print("Train Classification Report:")
    print(classification_report(y_train, train_preds, target_names=le.classes_))

    print(f"Validation Accuracy: {val_acc:.4f}")
    print("Validation Classification Report:")
    print(classification_report(y_val, val_preds, target_names=le.classes_))

    print(f"Test Accuracy: {test_acc:.4f}")
    print("Test Classification Report:")
    print(classification_report(y_test, test_preds, target_names=le.classes_))

    results_over.append({
        "Model": name,
        "Train Accuracy": train_acc,
        "Validation Accuracy": val_acc,
        "Test Accuracy": test_acc
    })

results_over_df = pd.DataFrame(results_over)
print("\n=== RandomOverSampler Summary Table ===")
print(results_over_df.sort_values(by="Test Accuracy", ascending=False).reset_index(drop=True))



=== Dummy (Most Frequent) (RandomOverSampler) ===
Train Accuracy: 0.0698
Train Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.13      2195
        male       0.00      0.00      0.00     29269

    accuracy                           0.07     31464
   macro avg       0.03      0.50      0.07     31464
weighted avg       0.00      0.07      0.01     31464

Validation Accuracy: 0.0725
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.14       513
        male       0.00      0.00      0.00      6563

    accuracy                           0.07      7076
   macro avg       0.04      0.50      0.07      7076
weighted avg       0.01      0.07      0.01      7076

Test Accuracy: 0.0487
Test Classification Report:
              precision    recall  f1-score   support

      female       0.05      1.00      0.09       329
        male       0.0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

      female       0.09      0.55      0.16       513
        male       0.94      0.58      0.72      6563

    accuracy                           0.58      7076
   macro avg       0.52      0.57      0.44      7076
weighted avg       0.88      0.58      0.68      7076

Test Accuracy: 0.5563
Test Classification Report:
              precision    recall  f1-score   support

      female       0.06      0.56      0.11       329
        male       0.96      0.56      0.70      6426

    accuracy                           0.56      6755
   macro avg       0.51      0.56      0.41      6755
weighted avg       0.92      0.56      0.68      6755


=== Elastic Net (Logistic) (RandomOverSampler) ===
Train Accuracy: 0.5429
Train Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.60      0.16      2195
        male       0.95      0.54      0.69     29269

    accuracy                



Train Accuracy: 0.6706
Train Classification Report:
              precision    recall  f1-score   support

      female       0.15      0.76      0.24      2195
        male       0.97      0.66      0.79     29269

    accuracy                           0.67     31464
   macro avg       0.56      0.71      0.52     31464
weighted avg       0.92      0.67      0.75     31464

Validation Accuracy: 0.6629
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.13      0.67      0.22       513
        male       0.96      0.66      0.78      6563

    accuracy                           0.66      7076
   macro avg       0.55      0.67      0.50      7076
weighted avg       0.90      0.66      0.74      7076

Test Accuracy: 0.6604
Test Classification Report:
              precision    recall  f1-score   support

      female       0.10      0.72      0.17       329
        male       0.98      0.66      0.79      6426

    accuracy      

# Undersample

In [60]:
from imblearn.under_sampling import RandomUnderSampler
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
import pandas as pd

# Store results
results_under = []

for name, model in models.items():
    print(f"\n=== {name} (RandomUnderSampler) ===")

    pipeline = Pipeline([
        ("undersample", RandomUnderSampler(random_state=42)),
        ("scaler", StandardScaler()),
        ("classifier", model)
    ])

    pipeline.fit(X_train, y_train)

    train_preds = pipeline.predict(X_train)
    val_preds = pipeline.predict(X_val)
    test_preds = pipeline.predict(X_test)

    train_acc = accuracy_score(y_train, train_preds)
    val_acc = accuracy_score(y_val, val_preds)
    test_acc = accuracy_score(y_test, test_preds)

    print(f"Train Accuracy: {train_acc:.4f}")
    print("Train Classification Report:")
    print(classification_report(y_train, train_preds, target_names=le.classes_))

    print(f"Validation Accuracy: {val_acc:.4f}")
    print("Validation Classification Report:")
    print(classification_report(y_val, val_preds, target_names=le.classes_))

    print(f"Test Accuracy: {test_acc:.4f}")
    print("Test Classification Report:")
    print(classification_report(y_test, test_preds, target_names=le.classes_))

    results_under.append({
        "Model": name,
        "Train Accuracy": train_acc,
        "Validation Accuracy": val_acc,
        "Test Accuracy": test_acc
    })

results_under_df = pd.DataFrame(results_under)
print("\n=== RandomUnderSampler Summary Table ===")
print(results_under_df.sort_values(by="Test Accuracy", ascending=False).reset_index(drop=True))



=== Dummy (Most Frequent) (RandomUnderSampler) ===
Train Accuracy: 0.0698
Train Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.13      2195
        male       0.00      0.00      0.00     29269

    accuracy                           0.07     31464
   macro avg       0.03      0.50      0.07     31464
weighted avg       0.00      0.07      0.01     31464

Validation Accuracy: 0.0725
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.07      1.00      0.14       513
        male       0.00      0.00      0.00      6563

    accuracy                           0.07      7076
   macro avg       0.04      0.50      0.07      7076
weighted avg       0.01      0.07      0.01      7076

Test Accuracy: 0.0487
Test Classification Report:
              precision    recall  f1-score   support

      female       0.05      1.00      0.09       329
        male       0.

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

      female       0.09      0.62      0.16      2195
        male       0.95      0.53      0.68     29269

    accuracy                           0.53     31464
   macro avg       0.52      0.57      0.42     31464
weighted avg       0.89      0.53      0.64     31464

Validation Accuracy: 0.5718
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.55      0.16       513
        male       0.94      0.57      0.71      6563

    accuracy                           0.57      7076
   macro avg       0.52      0.56      0.43      7076
weighted avg       0.88      0.57      0.67      7076

Test Accuracy: 0.5433
Test Classification Report:
              precision    recall  f1-score   support

      female       0.06      0.56      0.11       329
        male       0.96      0.54      0.69      6426

    accuracy                           0.54      6755
   macro avg    



Train Accuracy: 0.6738
Train Classification Report:
              precision    recall  f1-score   support

      female       0.15      0.75      0.24      2195
        male       0.97      0.67      0.79     29269

    accuracy                           0.67     31464
   macro avg       0.56      0.71      0.52     31464
weighted avg       0.92      0.67      0.75     31464

Validation Accuracy: 0.6759
Validation Classification Report:
              precision    recall  f1-score   support

      female       0.14      0.69      0.24       513
        male       0.97      0.67      0.79      6563

    accuracy                           0.68      7076
   macro avg       0.55      0.68      0.52      7076
weighted avg       0.91      0.68      0.75      7076

Test Accuracy: 0.6691
Test Classification Report:
              precision    recall  f1-score   support

      female       0.09      0.66      0.16       329
        male       0.97      0.67      0.79      6426

    accuracy      