In [47]:
import pandas as pd
import numpy as np
import random
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.metrics import hamming_loss
from sklearn.metrics import average_precision_score
from sklearn.metrics import f1_score
from sklearn.metrics import classification_report
from sklearn.metrics import jaccard_score
from sklearn.metrics import make_scorer
from sklearn.naive_bayes import GaussianNB
from sklearn.utils.class_weight import compute_sample_weight
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_validate
from sklearn.model_selection import KFold
from sklearn.multiclass import OneVsRestClassifier
from skmultilearn.problem_transform import BinaryRelevance
from skmultilearn.problem_transform import ClassifierChain
from skmultilearn.ensemble import MajorityVotingClassifier
from skmultilearn.cluster import FixedLabelSpaceClusterer
from skmultilearn.ensemble import RakelO
from skmultilearn.model_selection import IterativeStratification
from sklearn.multioutput import MultiOutputClassifier
from xgboost import XGBClassifier

from mlxtend.evaluate.time_series import GroupTimeSeriesSplit
from mlxtend.evaluate.time_series import plot_splits
from mlxtend.evaluate.time_series import print_cv_info
from mlxtend.evaluate.time_series import print_split_info
import optuna
from functools import partial
from collected_functions import *

Set base variable values

In [28]:
random_state = 42
np.random.seed(random_state)


In [57]:
def display_label_cardinality_error(
        y_true: pd.DataFrame,
        y_pred: np.ndarray,
    ) ->None:
    '''
    Create a scatterplot from y_true and y_pred to show accuracy of number of labels predicted.
    Created with the help of Google Gemini
    '''
    if not isinstance(y_pred, np.ndarray):
        y_pred = y_pred.toarray()

    # 1. Calculate Cardinalities (Sum across the label axis, axis=1)
    true_cardinality = np.sum(y_true, axis=1)
    pred_cardinality = np.sum(y_pred, axis=1)

    # 2. Calculate Hamming Loss per sample
    # Hamming loss for sample i = (number of mismatches) / N_LABELS
    mismatches = np.sum(y_true != y_pred, axis=1)
    hamming_loss_Score_ = mismatches / y_true.shape[1]

    # 3. Create the Visualization DataFrame
    df_error_vis = pd.DataFrame({
        'Sample_ID': [f'Sample_{i}' for i in range(y_true.shape[0])],
        'True_Cardinality': true_cardinality,
        'Predicted_Cardinality': pred_cardinality,
        'Hamming_Loss_Score_': hamming_loss_Score_,
        'Total_Errors': mismatches
    })

    fig = px.scatter(
        df_error_vis,
        x='True_Cardinality',
        y='Predicted_Cardinality',
        # Color the points by the error metric
        color='Hamming_Loss_Score_',
        # Scale the size of the points by the number of errors
        # size='Total_Errors',
        hover_data=['Sample_ID', 'Total_Errors'],
        title='Multilabel Error Analysis: Predicted vs. True Label Cardinality',
        color_continuous_scale=px.colors.sequential.Plasma, # Use a good sequential color scale
    )

    # Set axis limits and labels for a clear square comparison
    max_cardinality = y_true.shape[1]
    fig.update_xaxes(
        range=[-0.5, max_cardinality + 0.5],
        tickvals=np.arange(0, max_cardinality + 1),
        title="True Label Cardinality (Complexity)"
    )
    fig.update_yaxes(
        range=[-0.5, max_cardinality + 0.5],
        tickvals=np.arange(0, max_cardinality + 1),
        title="Predicted Label Cardinality (Model Output)"
    )

    # Add a diagonal line for perfect cardinality prediction (where X=Y)
    fig.add_shape(
        type="line", line=dict(dash='dash', color='gray'),
        x0=0, y0=0, x1=max_cardinality, y1=max_cardinality
    )

    fig.show()

Adjust Pandas settings to display more information before truncation.

In [14]:
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

Load combined MA and DDS training data

In [15]:
df = pd.read_parquet('combined_training_set.parquet')
print(df.shape)
df.head()

(6566, 21)


Unnamed: 0,incidentId,state,incidentTypeMA,region,maType,maPriority,supportFunction,agencyId,maId,declarationType,assistanceRequested,statementOfWork,femaDeclarationString,incidentType,incidentBeginDate,fipsStateCode,designatedIncidentTypes,declarationTitle,year,month,day
45,2018072401,CA,Fire,9,FOS,High,13.0,DHS-FPS,4382DRCAFPS01,DR,Activate FPS to DR-4382 to Contract security o...,"As directed by an in coordination with FEMA, F...",DR-4382-CA,Fire,2018-07-23 00:00:00+00:00,6.0,Fire,WILDFIRES AND HIGH WINDS,2018.0,7.0,23.0
66,2018072401,CA,Fire,9,FOS,High,10.0,EPA,4382DRCAEPA01,DR,"Activate ESF # 10 EPA to DR-4382 JFO, or other...","As directed by and in coordination with FEMA, ...",DR-4382-CA,Fire,2018-07-23 00:00:00+00:00,6.0,Fire,WILDFIRES AND HIGH WINDS,2018.0,7.0,23.0
136,2018072401,CA,Fire,9,FOS,Lifesaving,1.0,DOT,4382DRCADOT01,DR,"Activate ESF #1, DOT to the California Governo...","As directed by and in coordination with FEMA, ...",DR-4382-CA,Fire,2018-07-23 00:00:00+00:00,6.0,Fire,WILDFIRES AND HIGH WINDS,2018.0,7.0,23.0
152,2023121901,ME,Severe Storm,1,FOS,Normal,7.0,GSA,4754DRMEGSA03,DR,GSA to assist with leasing of space needed to...,"As directed by and in coordination with FEMA, ...",DR-4754-ME,Severe Storm,2023-12-17 00:00:00+00:00,23.0,"Flood,Severe Storm",SEVERE STORM AND FLOODING,2023.0,12.0,17.0
153,2023121901,ME,Severe Storm,1,FOS,High,7.0,GSA,4754DRMEGSA02,DR,Activate GSA to assist with leasing of space n...,"As directed by and in coordination with FEMA, ...",DR-4754-ME,Severe Storm,2023-12-17 00:00:00+00:00,23.0,"Flood,Severe Storm",SEVERE STORM AND FLOODING,2023.0,12.0,17.0


Transform the training data set using approach that is suitable for multiple categorical feature columns and a multilabel target. We return the transformers so that the test data set can be transformed later.

In [19]:
feature_columns = [
    'incidentId',
    'state',
    'designatedIncidentTypes',
    'declarationType',
    'region',
    'year',
]

target_column = 'supportFunction'

X_train, y_train = create_set(df, feature_columns, target_column)

print(X_train.shape, len(y_train))
print(y_train[:5])
X_train.head(5)

(485, 6) 485
0    [1.0, 3.0, 4.0, 7.0, 8.0, 10.0, 11.0, 12.0, 15.0]
1                                                [4.0]
2                                                [7.0]
3                                                [2.0]
4                                                [7.0]
Name: supportFunction, dtype: object


Unnamed: 0,incidentId,state,designatedIncidentTypes,declarationType,region,year
0,2012102505,CT,Hurricane,DR,1,2012.0
1,2012102505,DC,Hurricane,EM,3,2012.0
2,2012102505,DE,Hurricane,DR,3,2012.0
3,2012102505,DE,Hurricane,EM,3,2012.0
4,2012102505,MD,Hurricane,DR,3,2012.0


In [20]:
# Split the string in designatedIncidentTypes to a list of strings.
# This cannot be done earlier due to groupby issues.
X_train['designatedIncidentTypes'] = X_train['designatedIncidentTypes'].str.split(',')


In [21]:
# Since designatedIncidentTypes is a list it must be transformed differently than the other
# feature columns.
feature_columns_transform = [
    'state',
    'declarationType',
    'region',
]

X_train_transformed, y_train_transformed, preprocessor_X, preprocessor_y = transform_x_y_train(
    X_train,
    y_train,
    feature_columns_transform,
    ['designatedIncidentTypes'],
    target_column
)

In [22]:
# Assign the year values to a variable, then drop year and incidentId from X_train_transformed.
groups_by_year = X_train_transformed['year'].values
X_train_transformed.drop(columns=['incidentId', 'year'], inplace=True)
print(X_train_transformed.shape)
X_train_transformed.head()

(485, 88)


Unnamed: 0,state_AK,state_AL,state_AR,state_AS,state_AZ,state_CA,state_CO,state_CT,state_DC,state_DE,state_FL,state_GA,state_GU,state_HI,state_IA,state_ID,state_IL,state_IN,state_KS,state_KY,state_LA,state_MA,state_MD,state_ME,state_MI,state_MN,state_MO,state_MP,state_MS,state_MT,state_NC,state_ND,state_NE,state_NH,state_NJ,state_NM,state_NV,state_NY,state_OH,state_OK,state_OR,state_PA,state_PR,state_RI,state_SC,state_SD,state_TN,state_TX,state_UT,state_VA,state_VI,state_VT,state_WA,state_WI,state_WV,state_WY,declarationType_DR,declarationType_EM,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,designatedIncidentTypes_Biological,designatedIncidentTypes_Chemical,designatedIncidentTypes_Coastal Storm,designatedIncidentTypes_Dam/Levee Break,designatedIncidentTypes_Earthquake,designatedIncidentTypes_Fire,designatedIncidentTypes_Flood,designatedIncidentTypes_Hurricane,designatedIncidentTypes_Mud/Landslide,designatedIncidentTypes_Other,designatedIncidentTypes_Severe Ice Storm,designatedIncidentTypes_Severe Storm,designatedIncidentTypes_Snowstorm,designatedIncidentTypes_Straight-Line Winds,designatedIncidentTypes_Terrorist,designatedIncidentTypes_Tornado,designatedIncidentTypes_Tropical Storm,designatedIncidentTypes_Typhoon,designatedIncidentTypes_Volcanic Eruption,designatedIncidentTypes_Winter Storm
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [23]:
print(y_train_transformed.shape)
y_train_transformed.head()

(485, 16)


Unnamed: 0,ESF_0,ESF_1,ESF_2,ESF_3,ESF_4,ESF_5,ESF_6,ESF_7,ESF_8,ESF_9,ESF_10,ESF_11,ESF_12,ESF_13,ESF_14,ESF_15
0,0,1,0,1,1,0,0,1,1,0,1,1,1,0,0,1
1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
3,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


Run a variety of machine learning models and base classifiers using cross validation for model selection purposes.

In [45]:
base_svc = SVC(
        gamma='auto',
        decision_function_shape='ovo',
        class_weight='balanced',
        random_state=random_state,
        )
base_xgb = XGBClassifier(
        max_depth=12,
        random_state=0,
        n_estimators=100,
        base_score=0.5,
        )
base_gnb = GaussianNB()

br_classifier_svc = BinaryRelevance(
    classifier = base_svc,
    require_dense = [True, True]
)
br_classifier_xgb = BinaryRelevance(
    classifier = base_xgb,
    require_dense = [True, True]
)
br_classifier_gnb = BinaryRelevance(
    classifier = base_gnb,
    require_dense = [True, True]
)

cc_classifier_svc = ClassifierChain(
    classifier=base_svc, 
    require_dense=[True, True],
    order = list(range(0,16)),
)
cc_classifier_xgb = ClassifierChain(
    classifier=base_xgb,
    require_dense=[True, True],
    order = list(range(0,16)),
)
cc_classifier_gnb = ClassifierChain(
    classifier=base_gnb,
    require_dense=[True, True],
    order = list(range(0,16)),
)

classifier_rake_svc = RakelO(
   base_classifier=base_svc,
    base_classifier_require_dense=[True, True],
    labelset_size=3,
    model_count=32
)
classifier_rake_xgb = RakelO(
   base_classifier=base_xgb,
    base_classifier_require_dense=[True, True],
    labelset_size=3,
    model_count=32
)
classifier_rake_gnb = RakelO(
   base_classifier=base_gnb,
    base_classifier_require_dense=[True, True],
    labelset_size=3,
    model_count=32
)

moc_svc = MultiOutputClassifier(estimator=base_svc)
moc_xgb = MultiOutputClassifier(estimator=base_xgb)
moc_gnb = MultiOutputClassifier(estimator=base_gnb)

ovr_svc = OneVsRestClassifier(estimator=base_svc)
ovr_xgb = OneVsRestClassifier(estimator=base_xgb)
ovr_gnb = OneVsRestClassifier(estimator=base_gnb)

clf_list = [
    br_classifier_svc,
    br_classifier_xgb,
    br_classifier_gnb,
    cc_classifier_svc,
    cc_classifier_xgb,
    cc_classifier_gnb,
    classifier_rake_svc,
    classifier_rake_xgb,
    classifier_rake_gnb,
    moc_svc,
    moc_xgb,
    moc_gnb,
    ovr_svc,
    ovr_xgb,
    ovr_gnb,
]

hamming_scorer = make_scorer(
    hamming_loss,
    greater_is_better=False, 
)

F1_Macro_Avg = make_scorer(
    f1_score,
    average='macro', 
    zero_division=1,
)

F1_Samples_Avg = make_scorer(
    f1_score,
    average='macro', 
    zero_division=1,
)

scoring = {
    'Hamming':hamming_scorer,
    'F1_Macro_Avg':F1_Macro_Avg,
    'F1_Samples_Avg':F1_Samples_Avg,
    'jaccard_samples':'jaccard_samples',
}
import warnings
warnings.filterwarnings(
    "ignore", 
    category=UserWarning, 
    module="sklearn.utils.validation" 
)

for clf in clf_list:
    cross_val_results = cross_validate(clf,
                                       X_train_transformed,
                                       y_train_transformed,
                                       cv=10,
                                       scoring=scoring)

    print(f'{clf} Mean Hamming: {cross_val_results['test_Hamming'].mean()}, {cross_val_results['test_Hamming'].std() }')
    print(f'\nMean F1 Macro Avg: {cross_val_results['test_F1_Macro_Avg'].mean()}, {cross_val_results['test_F1_Macro_Avg'].std() }')
    print(f'\nMean F1 Samples Avg: {cross_val_results['test_F1_Samples_Avg'].mean()}, {cross_val_results['test_F1_Samples_Avg'].std() }')
    print(f'\nMean Jaccard Samples: {cross_val_results['test_jaccard_samples'].mean()}, {cross_val_results['test_jaccard_samples'].std() }')

BinaryRelevance(classifier=SVC(class_weight='balanced',
                               decision_function_shape='ovo', gamma='auto',
                               random_state=42),
                require_dense=[True, True]) Mean Hamming: -0.2947013180272109, 0.07151390028165508

Mean F1 Macro Avg: 0.46502319482358123, 0.07878793842017423

Mean F1 Samples Avg: 0.46502319482358123, 0.07878793842017423

Mean Jaccard Samples: 0.419884754205545, 0.04546304942241786
BinaryRelevance(classifier=XGBClassifier(base_score=0.5, booster=None,
                                         callbacks=None, colsample_bylevel=None,
                                         colsample_bynode=None,
                                         colsample_bytree=None, device=None,
                                         early_stopping_rounds=None,
                                         enable_categorical=False,
                                         eval_metric=None, feature_types=None,
                          

XGBoost appears to be our best base model. The most promising models appear to be BinaryRelevance, ClassifierChain, and OneVsRestClassifier (MultiOutputClassification had similar results and OneVsRestClassifier is more suited to this sort of task per documentation).

We can start with hyperparameter tuning of BinaryRelevance and the XGBoost base classifier.

In [None]:
def objective_brxgb(trial, X, y, groups, cv_args):

    xgb_params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200, step=25),
        'max_depth': trial.suggest_int('max_depth', 7, 15),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'scale_pos_weight' : trial.suggest_float('scale_pos_weight', 3, 10, log=True),
        'base_score': 0.5, 
        'random_state': 42,
        'n_jobs': -1,
        'verbosity': 0,
        'eval_metric': 'logloss',
        'use_label_encoder': False
    }

    
    base_XGB = XGBClassifier(**xgb_params)
    
    # The RakelO multilabel classifier (the main estimator)
    clf = BinaryRelevance(
        classifier = base_XGB,
        require_dense = [True, True]
    )

    scores = []    
    # cv = GroupTimeSeriesSplit(**cv_args)
    cv = KFold(**cv_args)

    # for train_index, val_index in cv.split(X, y, groups=groups):
    for train_index, val_index in cv.split(X, y):
    
        X_tr, X_val = X.iloc[train_index], X.iloc[val_index]
        y_tr, y_val = y.iloc[train_index], y.iloc[val_index]
        
        clf.fit(X_tr, y_tr)
        y_pred = clf.predict(X_val)
        
        # hl = hamming_loss(y_val, y_pred)
        # hl = f1_score(y_val, y_pred, average='samples', zero_division=1)
        eval_scorer = jaccard_score(y_val, y_pred, average='samples')

        scores.append(eval_scorer)

    return np.mean(scores)

cv_args = {'n_splits': 5, 'shuffle': True, 'random_state': 42}

objective_brxgb = partial(objective_brxgb,
                        X=X_train_transformed,
                        y=y_train_transformed,
                        groups=groups_by_year,
                        cv_args=cv_args,
                        )

# 3. Create and Run the Optuna Study
study_brxgb = optuna.create_study(direction="maximize") # Maximize the F1 score

study_brxgb.optimize(objective_brxgb, n_trials=30, show_progress_bar=True)

# --- Final Results ---
print("Optuna Hyperparameter Tuning Complete")
print(f"Best Score (Validation): {study_brxgb.best_value:.4f}")
print("Best Parameters:")
for key, value in study_brxgb.best_params.items():
    print(f"  {key}: {value}")

[I 2025-11-15 09:27:56,865] A new study created in memory with name: no-name-c93afa28-1b27-4625-b005-efe3ff505eb6
Best trial: 0. Best value: 0.427795:   3%|▎         | 1/30 [00:14<07:10, 14.86s/it]

[I 2025-11-15 09:28:11,725] Trial 0 finished with value: 0.42779457369663554 and parameters: {'n_estimators': 125, 'max_depth': 13, 'learning_rate': 0.003338318161433101, 'subsample': 0.8755838464745813, 'colsample_bytree': 0.8469636023590528, 'scale_pos_weight': 7.048015533411313}. Best is trial 0 with value: 0.42779457369663554.


Best trial: 1. Best value: 0.443922:   7%|▋         | 2/30 [00:31<07:31, 16.11s/it]

[I 2025-11-15 09:28:28,711] Trial 1 finished with value: 0.44392182273625574 and parameters: {'n_estimators': 150, 'max_depth': 13, 'learning_rate': 0.0815720542117103, 'subsample': 0.9357231531084991, 'colsample_bytree': 0.9250184631603144, 'scale_pos_weight': 6.453125247668684}. Best is trial 1 with value: 0.44392182273625574.


Best trial: 1. Best value: 0.443922:  10%|█         | 3/30 [00:45<06:48, 15.15s/it]

[I 2025-11-15 09:28:42,709] Trial 2 finished with value: 0.4396355935015729 and parameters: {'n_estimators': 100, 'max_depth': 12, 'learning_rate': 0.003233379569166068, 'subsample': 0.8143233595951601, 'colsample_bytree': 0.6029986397679086, 'scale_pos_weight': 4.291016599571224}. Best is trial 1 with value: 0.44392182273625574.


Best trial: 1. Best value: 0.443922:  13%|█▎        | 4/30 [00:57<05:54, 13.62s/it]

[I 2025-11-15 09:28:53,985] Trial 3 finished with value: 0.43494615922450974 and parameters: {'n_estimators': 125, 'max_depth': 9, 'learning_rate': 0.035341867866513485, 'subsample': 0.874345037872162, 'colsample_bytree': 0.7882019071578988, 'scale_pos_weight': 5.318312131780491}. Best is trial 1 with value: 0.44392182273625574.


Best trial: 1. Best value: 0.443922:  17%|█▋        | 5/30 [01:03<04:38, 11.13s/it]

[I 2025-11-15 09:29:00,702] Trial 4 finished with value: 0.4317029162132255 and parameters: {'n_estimators': 50, 'max_depth': 10, 'learning_rate': 0.08222954660235382, 'subsample': 0.8626760146576546, 'colsample_bytree': 0.6435951452451303, 'scale_pos_weight': 7.050823674782725}. Best is trial 1 with value: 0.44392182273625574.


Best trial: 5. Best value: 0.447289:  20%|██        | 6/30 [01:10<03:46,  9.45s/it]

[I 2025-11-15 09:29:06,906] Trial 5 finished with value: 0.4472886620824765 and parameters: {'n_estimators': 50, 'max_depth': 14, 'learning_rate': 0.027469411381923405, 'subsample': 0.6288171453008852, 'colsample_bytree': 0.8627939346542088, 'scale_pos_weight': 3.8368824873960117}. Best is trial 5 with value: 0.4472886620824765.


Best trial: 5. Best value: 0.447289:  23%|██▎       | 7/30 [01:17<03:22,  8.79s/it]

[I 2025-11-15 09:29:14,314] Trial 6 finished with value: 0.4413919814435278 and parameters: {'n_estimators': 75, 'max_depth': 10, 'learning_rate': 0.007014091429059219, 'subsample': 0.6750067462702265, 'colsample_bytree': 0.7046550413867396, 'scale_pos_weight': 4.395210116572784}. Best is trial 5 with value: 0.4472886620824765.


Best trial: 5. Best value: 0.447289:  27%|██▋       | 8/30 [01:27<03:19,  9.07s/it]

[I 2025-11-15 09:29:24,003] Trial 7 finished with value: 0.4279432222988923 and parameters: {'n_estimators': 75, 'max_depth': 7, 'learning_rate': 0.033215992395590796, 'subsample': 0.8537534598340147, 'colsample_bytree': 0.7301208833511772, 'scale_pos_weight': 6.349272850845399}. Best is trial 5 with value: 0.4472886620824765.


Best trial: 5. Best value: 0.447289:  30%|███       | 9/30 [01:34<03:02,  8.69s/it]

[I 2025-11-15 09:29:31,846] Trial 8 finished with value: 0.4115567508093282 and parameters: {'n_estimators': 125, 'max_depth': 7, 'learning_rate': 0.015173724074090114, 'subsample': 0.9470133367886433, 'colsample_bytree': 0.6002317828946155, 'scale_pos_weight': 9.241309082324134}. Best is trial 5 with value: 0.4472886620824765.


Best trial: 5. Best value: 0.447289:  33%|███▎      | 10/30 [01:38<02:23,  7.18s/it]

[I 2025-11-15 09:29:35,638] Trial 9 finished with value: 0.4066026699531854 and parameters: {'n_estimators': 50, 'max_depth': 7, 'learning_rate': 0.009872641676752138, 'subsample': 0.7777234978686013, 'colsample_bytree': 0.6684117229029768, 'scale_pos_weight': 9.633305252104615}. Best is trial 5 with value: 0.4472886620824765.


Best trial: 10. Best value: 0.450711:  37%|███▋      | 11/30 [02:06<04:13, 13.36s/it]

[I 2025-11-15 09:30:03,009] Trial 10 finished with value: 0.4507106982364714 and parameters: {'n_estimators': 200, 'max_depth': 15, 'learning_rate': 0.0010674096210635526, 'subsample': 0.6054019679418682, 'colsample_bytree': 0.9416879807441519, 'scale_pos_weight': 3.013890631964849}. Best is trial 10 with value: 0.4507106982364714.


Best trial: 11. Best value: 0.452198:  40%|████      | 12/30 [02:31<05:03, 16.88s/it]

[I 2025-11-15 09:30:27,957] Trial 11 finished with value: 0.45219849451808214 and parameters: {'n_estimators': 200, 'max_depth': 15, 'learning_rate': 0.0013205635968295897, 'subsample': 0.607416617684148, 'colsample_bytree': 0.9450606159139114, 'scale_pos_weight': 3.182060702044205}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  43%|████▎     | 13/30 [02:51<05:04, 17.93s/it]

[I 2025-11-15 09:30:48,301] Trial 12 finished with value: 0.45112873666997383 and parameters: {'n_estimators': 175, 'max_depth': 15, 'learning_rate': 0.001051265005656335, 'subsample': 0.608294153793394, 'colsample_bytree': 0.942904217229992, 'scale_pos_weight': 3.1208382514265063}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  47%|████▋     | 14/30 [03:18<05:32, 20.78s/it]

[I 2025-11-15 09:31:15,671] Trial 13 finished with value: 0.4508875145473084 and parameters: {'n_estimators': 200, 'max_depth': 15, 'learning_rate': 0.001002141883231294, 'subsample': 0.6967853792040895, 'colsample_bytree': 0.8824086813291059, 'scale_pos_weight': 3.0154939665515896}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  50%|█████     | 15/30 [03:38<05:06, 20.42s/it]

[I 2025-11-15 09:31:35,253] Trial 14 finished with value: 0.4441456624704047 and parameters: {'n_estimators': 175, 'max_depth': 15, 'learning_rate': 0.0020791779597166393, 'subsample': 0.7015044879067549, 'colsample_bytree': 0.810421977760781, 'scale_pos_weight': 3.7619247927769397}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  53%|█████▎    | 16/30 [04:01<04:56, 21.20s/it]

[I 2025-11-15 09:31:58,255] Trial 15 finished with value: 0.44447619677001127 and parameters: {'n_estimators': 175, 'max_depth': 13, 'learning_rate': 0.0021979936504472057, 'subsample': 0.6592618282106582, 'colsample_bytree': 0.9059700032047301, 'scale_pos_weight': 3.4606247653860085}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  57%|█████▋    | 17/30 [04:28<04:56, 22.84s/it]

[I 2025-11-15 09:32:24,914] Trial 16 finished with value: 0.43528904085605113 and parameters: {'n_estimators': 175, 'max_depth': 14, 'learning_rate': 0.0016859682530439217, 'subsample': 0.7394679862001029, 'colsample_bytree': 0.8317948303971723, 'scale_pos_weight': 4.91275016801466}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  60%|██████    | 18/30 [04:41<04:00, 20.00s/it]

[I 2025-11-15 09:32:38,316] Trial 17 finished with value: 0.44984421649370104 and parameters: {'n_estimators': 200, 'max_depth': 12, 'learning_rate': 0.004579381962677701, 'subsample': 0.6017129734131103, 'colsample_bytree': 0.9414561999086245, 'scale_pos_weight': 3.366314295973659}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  63%|██████▎   | 19/30 [04:54<03:15, 17.77s/it]

[I 2025-11-15 09:32:50,869] Trial 18 finished with value: 0.4384023851291892 and parameters: {'n_estimators': 150, 'max_depth': 14, 'learning_rate': 0.0014807340584688446, 'subsample': 0.6395788811238505, 'colsample_bytree': 0.8869472906194138, 'scale_pos_weight': 4.301235236840817}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  67%|██████▋   | 20/30 [05:07<02:43, 16.38s/it]

[I 2025-11-15 09:33:04,020] Trial 19 finished with value: 0.44372155622155623 and parameters: {'n_estimators': 150, 'max_depth': 11, 'learning_rate': 0.005480445749716214, 'subsample': 0.7405780196350582, 'colsample_bytree': 0.754323237152126, 'scale_pos_weight': 3.4715241145147946}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 11. Best value: 0.452198:  70%|███████   | 21/30 [05:20<02:19, 15.54s/it]

[I 2025-11-15 09:33:17,599] Trial 20 finished with value: 0.44094072307989834 and parameters: {'n_estimators': 175, 'max_depth': 15, 'learning_rate': 0.0030072259069498465, 'subsample': 0.7251055224623362, 'colsample_bytree': 0.9056119879029648, 'scale_pos_weight': 3.868248402633478}. Best is trial 11 with value: 0.45219849451808214.


Best trial: 21. Best value: 0.452798:  73%|███████▎  | 22/30 [05:37<02:08, 16.02s/it]

[I 2025-11-15 09:33:34,728] Trial 21 finished with value: 0.4527981325661738 and parameters: {'n_estimators': 200, 'max_depth': 15, 'learning_rate': 0.0011504858200806019, 'subsample': 0.682584731746067, 'colsample_bytree': 0.8792930692950166, 'scale_pos_weight': 3.0296984508908382}. Best is trial 21 with value: 0.4527981325661738.


Best trial: 22. Best value: 0.453808:  77%|███████▋  | 23/30 [05:55<01:55, 16.50s/it]

[I 2025-11-15 09:33:52,371] Trial 22 finished with value: 0.45380828793199923 and parameters: {'n_estimators': 200, 'max_depth': 14, 'learning_rate': 0.0012605477994259533, 'subsample': 0.6426730736617045, 'colsample_bytree': 0.9488151543720982, 'scale_pos_weight': 3.015500783322584}. Best is trial 22 with value: 0.45380828793199923.


Best trial: 22. Best value: 0.453808:  80%|████████  | 24/30 [06:18<01:51, 18.56s/it]

[I 2025-11-15 09:34:15,718] Trial 23 finished with value: 0.4486793447102725 and parameters: {'n_estimators': 200, 'max_depth': 14, 'learning_rate': 0.00165577849938726, 'subsample': 0.6509728999745582, 'colsample_bytree': 0.9082754068105436, 'scale_pos_weight': 3.328281773566841}. Best is trial 22 with value: 0.45380828793199923.


Best trial: 22. Best value: 0.453808:  83%|████████▎ | 25/30 [06:35<01:30, 18.05s/it]

[I 2025-11-15 09:34:32,588] Trial 24 finished with value: 0.44463208612693145 and parameters: {'n_estimators': 200, 'max_depth': 14, 'learning_rate': 0.0022857433434141546, 'subsample': 0.6831115309594687, 'colsample_bytree': 0.8662684295934896, 'scale_pos_weight': 3.638208523821397}. Best is trial 22 with value: 0.45380828793199923.


Best trial: 22. Best value: 0.453808:  87%|████████▋ | 26/30 [06:49<01:06, 16.65s/it]

[I 2025-11-15 09:34:45,981] Trial 25 finished with value: 0.4460194387513975 and parameters: {'n_estimators': 200, 'max_depth': 12, 'learning_rate': 0.001395411797683638, 'subsample': 0.6325051409737074, 'colsample_bytree': 0.91410332955879, 'scale_pos_weight': 4.024076997432766}. Best is trial 22 with value: 0.45380828793199923.


Best trial: 22. Best value: 0.453808:  90%|█████████ | 27/30 [07:05<00:49, 16.60s/it]

[I 2025-11-15 09:35:02,462] Trial 26 finished with value: 0.449256041781815 and parameters: {'n_estimators': 150, 'max_depth': 15, 'learning_rate': 0.0039994703871209306, 'subsample': 0.7710046420137538, 'colsample_bytree': 0.824159471780068, 'scale_pos_weight': 3.2149880753745945}. Best is trial 22 with value: 0.45380828793199923.


Best trial: 22. Best value: 0.453808:  93%|█████████▎| 28/30 [07:26<00:35, 17.92s/it]

[I 2025-11-15 09:35:23,446] Trial 27 finished with value: 0.4411254982131271 and parameters: {'n_estimators': 175, 'max_depth': 13, 'learning_rate': 0.007886555482516848, 'subsample': 0.7131740083680101, 'colsample_bytree': 0.8803905891653233, 'scale_pos_weight': 4.720402451359302}. Best is trial 22 with value: 0.45380828793199923.


Best trial: 22. Best value: 0.453808:  97%|█████████▋| 29/30 [07:40<00:16, 16.72s/it]

[I 2025-11-15 09:35:37,372] Trial 28 finished with value: 0.45250454013340613 and parameters: {'n_estimators': 200, 'max_depth': 14, 'learning_rate': 0.015021823067354723, 'subsample': 0.670845352354594, 'colsample_bytree': 0.9472069596239205, 'scale_pos_weight': 3.559633241458858}. Best is trial 22 with value: 0.45380828793199923.


Best trial: 22. Best value: 0.453808: 100%|██████████| 30/30 [08:02<00:00, 16.07s/it]

[I 2025-11-15 09:35:58,961] Trial 29 finished with value: 0.4358010631979704 and parameters: {'n_estimators': 175, 'max_depth': 13, 'learning_rate': 0.016210966611190477, 'subsample': 0.6692953387792109, 'colsample_bytree': 0.8510348399125045, 'scale_pos_weight': 8.530394916520034}. Best is trial 22 with value: 0.45380828793199923.
Optuna Hyperparameter Tuning Complete
Best Hamming Score (Validation): 0.4538
Best Parameters:
  n_estimators: 200
  max_depth: 14
  learning_rate: 0.0012605477994259533
  subsample: 0.6426730736617045
  colsample_bytree: 0.9488151543720982
  scale_pos_weight: 3.015500783322584





In [58]:
study_brxgb_best_params = study_brxgb.best_params

study_brxgb_best_params_xgb = {
    'n_estimators': study_brxgb_best_params['n_estimators'],
    'max_depth': study_brxgb_best_params['max_depth'],
    'learning_rate': study_brxgb_best_params['learning_rate'],
    'subsample': study_brxgb_best_params['subsample'],
    'colsample_bytree' : study_brxgb_best_params['colsample_bytree'],
    'scale_pos_weight': study_brxgb_best_params['scale_pos_weight'],
    'base_score': 0.5,
    'random_state': 42,
    'verbosity': 0,
    'use_label_encoder': False,
    'eval_metric': 'logloss',
    'use_label_encoder': False
}

study_brxgb_base_xgb = XGBClassifier(**study_brxgb_best_params_xgb)
study_brxgb_tuned = BinaryRelevance(
    classifier = study_brxgb_base_xgb,
    require_dense = [True, True]
)

study_brxgb_tuned.fit(X_train_transformed, y_train_transformed)

y_pred_training_br = study_brxgb_tuned.predict(X_train_transformed)

display_results(y_train_transformed, y_pred_training_br.toarray())

display_error_heatmap(y_train_transformed, y_pred_training_br)

display_label_cardinality_error(y_train_transformed, y_pred_training_br.toarray())

True Multi-Label Targets:
   ESF_0  ESF_1  ESF_2  ESF_3  ESF_4  ESF_5  ESF_6  ESF_7  ESF_8  ESF_9  \
0      0      1      0      1      1      0      0      1      1      0   
1      0      0      0      0      1      0      0      0      0      0   
2      0      0      0      0      0      0      0      1      0      0   
3      0      0      1      0      0      0      0      0      0      0   
4      0      0      0      0      0      0      0      1      0      0   

   ESF_10  ESF_11  ESF_12  ESF_13  ESF_14  ESF_15  
0       1       1       1       0       0       1  
1       0       0       0       0       0       0  
2       0       0       0       0       0       0  
3       0       0       0       0       0       0  
4       0       0       0       0       0       0  

Predicted Multi-Label Targets:
[[0 1 0 1 1 1 0 1 1 0 1 1 1 1 0 1]
 [0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0]
 [0 0 0 1 1 0 0 1 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0]]

Su


The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)



We see some rows with many false positives in the error heatmap, and we see that many of our predictions include too many labels and others too few.

The ClassifierChain we started with used a base order for the labels. We can try to improve the outcome using correlation-based ordering.

In [62]:
# 1. Ensure the correlation matrix has integer indices/columns
if isinstance(y_train_transformed, np.ndarray):
    # If starting from NumPy, force integer columns
    Y_df = pd.DataFrame(y_train_transformed, columns=list(range(y_train_transformed.shape[1])))
else:
    # If starting from a DataFrame, reset the columns to integers
    Y_df = y_train_transformed.copy()
    Y_df.columns = list(range(Y_df.shape[1]))
    
correlation_matrix = Y_df.corr() 

num_labels = correlation_matrix.shape[0]
label_indices = list(range(num_labels))

In [63]:
total_correlations = correlation_matrix.abs().sum(axis=1)

start_label = total_correlations.idxmax()

ordered_labels = [start_label]
remaining_labels = set(label_indices) - {start_label}

In [64]:
while remaining_labels:
    best_next_label = -1
    max_avg_corr = -1

    for candidate_label in remaining_labels:
        # Calculate the absolute correlation between the candidate and all chosen labels
        corrs_with_chosen = correlation_matrix.loc[candidate_label, ordered_labels].abs()
        
        # Calculate the average (or sum) of these correlations
        avg_corr = corrs_with_chosen.mean() 

        if avg_corr > max_avg_corr:
            max_avg_corr = avg_corr
            best_next_label = candidate_label

    # Add the best label to the order and remove it from the remaining set
    if best_next_label != -1:
        ordered_labels.append(best_next_label)
        remaining_labels.remove(best_next_label)
    else:
        # Should not happen if the correlation matrix is correctly defined
        break

# The final result is the desired order for the ClassifierChain
final_chain_order = ordered_labels
print(f"Optimal ClassifierChain Order: {final_chain_order}")

Optimal ClassifierChain Order: [np.int64(1), 10, 12, 11, 2, 4, 9, 6, 8, 5, 13, 3, 15, 0, 14, 7]


In [65]:
def objective_ccxgb(trial, X, y, groups, cv_args, final_chain_order):

    xgb_params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200, step=25),
        'max_depth': trial.suggest_int('max_depth', 7, 15),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'scale_pos_weight' : trial.suggest_float('scale_pos_weight', 3, 10, log=True),
        'base_score': 0.5, 
        'random_state': 42,
        'n_jobs': -1,
        'verbosity': 0,
        'eval_metric': 'logloss',
        'use_label_encoder': False
    }
        
    base_XGB = XGBClassifier(**xgb_params)
    
    # The RakelO multilabel classifier (the main estimator)
    clf = ClassifierChain(
        classifier = base_XGB,
        require_dense = [True, True],
        order = final_chain_order,
    )

    scores = []    
    # cv = GroupTimeSeriesSplit(**cv_args)
    cv = KFold(**cv_args)

    # for train_index, val_index in cv.split(X, y, groups=groups):
    for train_index, val_index in cv.split(X, y):
    
        X_tr, X_val = X.iloc[train_index], X.iloc[val_index]
        y_tr, y_val = y.iloc[train_index], y.iloc[val_index]
        
        clf.fit(X_tr, y_tr)
        y_pred = clf.predict(X_val)
        
        # hl = hamming_loss(y_val, y_pred)
        # hl = f1_score(y_val, y_pred, average='samples', zero_division=1)
        eval_scorer = jaccard_score(y_val, y_pred, average='samples')

        scores.append(eval_scorer)

    return np.mean(scores)

cv_args = {'n_splits': 5, 'shuffle': True, 'random_state': 42}

objective_ccxgb = partial(objective_ccxgb,
                        X=X_train_transformed,
                        y=y_train_transformed,
                        groups=groups_by_year,
                        cv_args=cv_args,
                        final_chain_order=final_chain_order
                        )

# 3. Create and Run the Optuna Study
study_ccxgb = optuna.create_study(direction="maximize") # Maximize the F1 score

study_ccxgb.optimize(objective_ccxgb, n_trials=30, show_progress_bar=True)

# --- Final Results ---
print("Optuna Hyperparameter Tuning Complete")
print(f"Best Score (Validation): {study_ccxgb.best_value:.4f}")
print("Best Parameters:")
for key, value in study_ccxgb.best_params.items():
    print(f"  {key}: {value}")

[I 2025-11-15 10:01:54,659] A new study created in memory with name: no-name-e9cabcac-d8b2-4774-92e7-b01023052ffd
  0%|          | 0/30 [00:00<?, ?it/s]

Best trial: 0. Best value: 0.200225:   3%|▎         | 1/30 [00:18<09:05, 18.81s/it]

[I 2025-11-15 10:02:13,505] Trial 0 finished with value: 0.20022472659070595 and parameters: {'n_estimators': 150, 'max_depth': 12, 'learning_rate': 0.0012050526887609034, 'subsample': 0.6308573788745474, 'colsample_bytree': 0.8229864024057549, 'scale_pos_weight': 8.37541927571129}. Best is trial 0 with value: 0.20022472659070595.


Best trial: 1. Best value: 0.207042:   7%|▋         | 2/30 [00:27<06:03, 12.99s/it]

[I 2025-11-15 10:02:22,419] Trial 1 finished with value: 0.2070415609590867 and parameters: {'n_estimators': 100, 'max_depth': 9, 'learning_rate': 0.012050444548013784, 'subsample': 0.7170248764116215, 'colsample_bytree': 0.66199422861704, 'scale_pos_weight': 7.880421543062396}. Best is trial 1 with value: 0.2070415609590867.


Best trial: 1. Best value: 0.207042:  10%|█         | 3/30 [00:39<05:35, 12.44s/it]

[I 2025-11-15 10:02:34,210] Trial 2 finished with value: 0.20212820398387404 and parameters: {'n_estimators': 125, 'max_depth': 15, 'learning_rate': 0.005106102648310954, 'subsample': 0.9272547288520461, 'colsample_bytree': 0.6119256255020815, 'scale_pos_weight': 6.504074143742361}. Best is trial 1 with value: 0.2070415609590867.


Best trial: 1. Best value: 0.207042:  13%|█▎        | 4/30 [00:44<04:07,  9.51s/it]

[I 2025-11-15 10:02:39,224] Trial 3 finished with value: 0.173073910625457 and parameters: {'n_estimators': 50, 'max_depth': 9, 'learning_rate': 0.03380553334440534, 'subsample': 0.8156130209312422, 'colsample_bytree': 0.79305338598671, 'scale_pos_weight': 3.020495444519633}. Best is trial 1 with value: 0.2070415609590867.


Best trial: 1. Best value: 0.207042:  17%|█▋        | 5/30 [00:50<03:22,  8.09s/it]

[I 2025-11-15 10:02:44,808] Trial 4 finished with value: 0.1797186290227527 and parameters: {'n_estimators': 50, 'max_depth': 15, 'learning_rate': 0.00431314690230864, 'subsample': 0.6486479496112153, 'colsample_bytree': 0.6751412115953086, 'scale_pos_weight': 3.882234470948048}. Best is trial 1 with value: 0.2070415609590867.


Best trial: 1. Best value: 0.207042:  20%|██        | 6/30 [01:02<03:48,  9.51s/it]

[I 2025-11-15 10:02:57,066] Trial 5 finished with value: 0.1854199323013756 and parameters: {'n_estimators': 125, 'max_depth': 15, 'learning_rate': 0.00589254066959443, 'subsample': 0.7814040127631641, 'colsample_bytree': 0.9018465115075721, 'scale_pos_weight': 5.262917717058013}. Best is trial 1 with value: 0.2070415609590867.


Best trial: 6. Best value: 0.223421:  23%|██▎       | 7/30 [01:08<03:11,  8.34s/it]

[I 2025-11-15 10:03:02,994] Trial 6 finished with value: 0.22342068012171107 and parameters: {'n_estimators': 75, 'max_depth': 7, 'learning_rate': 0.0032147012500595446, 'subsample': 0.9111185216446693, 'colsample_bytree': 0.8708706820861241, 'scale_pos_weight': 9.426670132864865}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  27%|██▋       | 8/30 [01:22<03:45, 10.25s/it]

[I 2025-11-15 10:03:17,337] Trial 7 finished with value: 0.20350106651653044 and parameters: {'n_estimators': 175, 'max_depth': 14, 'learning_rate': 0.0031629423057274687, 'subsample': 0.9203450442819415, 'colsample_bytree': 0.9153466119232156, 'scale_pos_weight': 6.037553471510055}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  30%|███       | 9/30 [01:35<03:51, 11.01s/it]

[I 2025-11-15 10:03:30,031] Trial 8 finished with value: 0.1736703416858056 and parameters: {'n_estimators': 150, 'max_depth': 14, 'learning_rate': 0.017038498632144513, 'subsample': 0.6911893424825792, 'colsample_bytree': 0.6705280545590453, 'scale_pos_weight': 3.859855135211906}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  33%|███▎      | 10/30 [01:48<03:53, 11.67s/it]

[I 2025-11-15 10:03:43,164] Trial 9 finished with value: 0.2028017600955745 and parameters: {'n_estimators': 150, 'max_depth': 14, 'learning_rate': 0.0012164109406669088, 'subsample': 0.8907887916030013, 'colsample_bytree': 0.6000989609362176, 'scale_pos_weight': 6.53316553905122}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  37%|███▋      | 11/30 [01:54<03:08,  9.90s/it]

[I 2025-11-15 10:03:49,044] Trial 10 finished with value: 0.20922410350245402 and parameters: {'n_estimators': 75, 'max_depth': 7, 'learning_rate': 0.09928341156017838, 'subsample': 0.8480456828339535, 'colsample_bytree': 0.8533580369403613, 'scale_pos_weight': 9.672722524787362}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  40%|████      | 12/30 [02:00<02:35,  8.63s/it]

[I 2025-11-15 10:03:54,784] Trial 11 finished with value: 0.21458638154514445 and parameters: {'n_estimators': 75, 'max_depth': 7, 'learning_rate': 0.04950095429905595, 'subsample': 0.8519089299087618, 'colsample_bytree': 0.8540678742111979, 'scale_pos_weight': 9.619302732588228}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  43%|████▎     | 13/30 [02:04<02:06,  7.46s/it]

[I 2025-11-15 10:03:59,534] Trial 12 finished with value: 0.20850844459091883 and parameters: {'n_estimators': 75, 'max_depth': 7, 'learning_rate': 0.09569811314693255, 'subsample': 0.8626894334847507, 'colsample_bytree': 0.7476272458846728, 'scale_pos_weight': 9.62662930181476}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  47%|████▋     | 14/30 [02:10<01:49,  6.84s/it]

[I 2025-11-15 10:04:04,956] Trial 13 finished with value: 0.20923258871712477 and parameters: {'n_estimators': 75, 'max_depth': 9, 'learning_rate': 0.028785443672895504, 'subsample': 0.9475895732852992, 'colsample_bytree': 0.876767609919436, 'scale_pos_weight': 7.788325343065842}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  50%|█████     | 15/30 [02:16<01:39,  6.66s/it]

[I 2025-11-15 10:04:11,202] Trial 14 finished with value: 0.22118853654936133 and parameters: {'n_estimators': 100, 'max_depth': 8, 'learning_rate': 0.0020847711396413233, 'subsample': 0.8159107018497491, 'colsample_bytree': 0.7617224647715986, 'scale_pos_weight': 9.968337988695119}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  53%|█████▎    | 16/30 [02:24<01:40,  7.21s/it]

[I 2025-11-15 10:04:19,675] Trial 15 finished with value: 0.20421109245851513 and parameters: {'n_estimators': 100, 'max_depth': 11, 'learning_rate': 0.0024830348484816624, 'subsample': 0.7565083660721712, 'colsample_bytree': 0.7522395151656812, 'scale_pos_weight': 7.652277052805363}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  57%|█████▋    | 17/30 [02:40<02:04,  9.59s/it]

[I 2025-11-15 10:04:34,808] Trial 16 finished with value: 0.19971919260579057 and parameters: {'n_estimators': 200, 'max_depth': 8, 'learning_rate': 0.0020642293479849138, 'subsample': 0.803381583555183, 'colsample_bytree': 0.9407018584043753, 'scale_pos_weight': 5.001858977109259}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  60%|██████    | 18/30 [02:48<01:49,  9.11s/it]

[I 2025-11-15 10:04:42,814] Trial 17 finished with value: 0.21176770823678037 and parameters: {'n_estimators': 100, 'max_depth': 11, 'learning_rate': 0.008027197283934223, 'subsample': 0.8903408103516965, 'colsample_bytree': 0.7202125697296712, 'scale_pos_weight': 8.682926667154014}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  63%|██████▎   | 19/30 [02:56<01:37,  8.87s/it]

[I 2025-11-15 10:04:51,121] Trial 18 finished with value: 0.20626732374155052 and parameters: {'n_estimators': 100, 'max_depth': 10, 'learning_rate': 0.002084783130548367, 'subsample': 0.7598963523626759, 'colsample_bytree': 0.8030574663364369, 'scale_pos_weight': 7.057746931328016}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  67%|██████▋   | 20/30 [03:01<01:16,  7.63s/it]

[I 2025-11-15 10:04:55,860] Trial 19 finished with value: 0.1976635163233101 and parameters: {'n_estimators': 50, 'max_depth': 8, 'learning_rate': 0.0014683240727071617, 'subsample': 0.8294111298677198, 'colsample_bytree': 0.830251447818767, 'scale_pos_weight': 4.616378972631753}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  70%|███████   | 21/30 [03:11<01:16,  8.48s/it]

[I 2025-11-15 10:05:06,319] Trial 20 finished with value: 0.22124707194294818 and parameters: {'n_estimators': 125, 'max_depth': 8, 'learning_rate': 0.003230610022133203, 'subsample': 0.8876810726657041, 'colsample_bytree': 0.711054305083486, 'scale_pos_weight': 8.806746472105711}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  73%|███████▎  | 22/30 [03:23<01:15,  9.38s/it]

[I 2025-11-15 10:05:17,790] Trial 21 finished with value: 0.2220014492952637 and parameters: {'n_estimators': 125, 'max_depth': 8, 'learning_rate': 0.003688636939255393, 'subsample': 0.8835597425341851, 'colsample_bytree': 0.7148475051914999, 'scale_pos_weight': 8.914241310703558}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  77%|███████▋  | 23/30 [03:32<01:05,  9.39s/it]

[I 2025-11-15 10:05:27,202] Trial 22 finished with value: 0.2208387917924 and parameters: {'n_estimators': 125, 'max_depth': 8, 'learning_rate': 0.0037513609234855077, 'subsample': 0.8881771601283185, 'colsample_bytree': 0.7119569659039631, 'scale_pos_weight': 8.761484033973764}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  80%|████████  | 24/30 [03:45<01:03, 10.51s/it]

[I 2025-11-15 10:05:40,319] Trial 23 finished with value: 0.21116813312689597 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.008868739621303356, 'subsample': 0.910780509803532, 'colsample_bytree': 0.7142369109592559, 'scale_pos_weight': 7.319770233905928}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  83%|████████▎ | 25/30 [03:57<00:54, 10.94s/it]

[I 2025-11-15 10:05:52,282] Trial 24 finished with value: 0.2206616207389403 and parameters: {'n_estimators': 150, 'max_depth': 7, 'learning_rate': 0.005971035146402655, 'subsample': 0.9451720370287441, 'colsample_bytree': 0.6388990582218206, 'scale_pos_weight': 8.498767548118908}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  87%|████████▋ | 26/30 [04:16<00:53, 13.36s/it]

[I 2025-11-15 10:06:11,291] Trial 25 finished with value: 0.21728756924633214 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.0029674448314350147, 'subsample': 0.8731407271419598, 'colsample_bytree': 0.6961892596263647, 'scale_pos_weight': 8.841951961700095}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  90%|█████████ | 27/30 [04:34<00:44, 14.70s/it]

[I 2025-11-15 10:06:29,108] Trial 26 finished with value: 0.2037053439115295 and parameters: {'n_estimators': 125, 'max_depth': 9, 'learning_rate': 0.014895482442230908, 'subsample': 0.9117636526515988, 'colsample_bytree': 0.7341552541241281, 'scale_pos_weight': 5.757010387823765}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  93%|█████████▎| 28/30 [04:58<00:35, 17.66s/it]

[I 2025-11-15 10:06:53,684] Trial 27 finished with value: 0.21293990373887278 and parameters: {'n_estimators': 200, 'max_depth': 8, 'learning_rate': 0.0015915628255517048, 'subsample': 0.8506304537737802, 'colsample_bytree': 0.6924521263483908, 'scale_pos_weight': 6.772087137933656}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421:  97%|█████████▋| 29/30 [05:09<00:15, 15.61s/it]

[I 2025-11-15 10:07:04,506] Trial 28 finished with value: 0.21576533146120774 and parameters: {'n_estimators': 125, 'max_depth': 7, 'learning_rate': 0.007186022618031249, 'subsample': 0.7883999759131618, 'colsample_bytree': 0.7876527486882147, 'scale_pos_weight': 8.065262184424816}. Best is trial 6 with value: 0.22342068012171107.


Best trial: 6. Best value: 0.223421: 100%|██████████| 30/30 [05:26<00:00, 10.89s/it]

[I 2025-11-15 10:07:21,319] Trial 29 finished with value: 0.21945472625884999 and parameters: {'n_estimators': 150, 'max_depth': 11, 'learning_rate': 0.0038449906880223157, 'subsample': 0.8871651816171002, 'colsample_bytree': 0.8199727897457674, 'scale_pos_weight': 9.08486916268178}. Best is trial 6 with value: 0.22342068012171107.
Optuna Hyperparameter Tuning Complete
Best Score (Validation): 0.2234
Best Parameters:
  n_estimators: 75
  max_depth: 7
  learning_rate: 0.0032147012500595446
  subsample: 0.9111185216446693
  colsample_bytree: 0.8708706820861241
  scale_pos_weight: 9.426670132864865





In [67]:
study_ccxgb_best_params = study_ccxgb.best_params

study_ccxgb_best_params_xgb = {
    'n_estimators': study_ccxgb_best_params['n_estimators'],
    'max_depth': study_ccxgb_best_params['max_depth'],
    'learning_rate': study_ccxgb_best_params['learning_rate'],
    'subsample': study_ccxgb_best_params['subsample'],
    'colsample_bytree' : study_ccxgb_best_params['colsample_bytree'],
    'scale_pos_weight': study_ccxgb_best_params['scale_pos_weight'],
    'base_score': 0.5,
    'random_state': 42,
    'verbosity': 0,
    'use_label_encoder': False,
    'eval_metric': 'logloss',
    'use_label_encoder': False
}

study_ccxgb_bbase_xgb = XGBClassifier(**study_ccxgb_best_params_xgb)
study_ccxgb_tuned = ClassifierChain(
    classifier = study_ccxgb_bbase_xgb,
    require_dense = [True, True],
    order=final_chain_order,
)

study_ccxgb_tuned.fit(X_train_transformed, y_train_transformed)

y_pred_training_cc = study_ccxgb_tuned.predict(X_train_transformed)

display_results(y_train_transformed, y_pred_training_cc.toarray())

display_error_heatmap(y_train_transformed, y_pred_training_cc.toarray())

display_label_cardinality_error(y_train_transformed, y_pred_training_cc.toarray())

True Multi-Label Targets:
   ESF_0  ESF_1  ESF_2  ESF_3  ESF_4  ESF_5  ESF_6  ESF_7  ESF_8  ESF_9  \
0      0      1      0      1      1      0      0      1      1      0   
1      0      0      0      0      1      0      0      0      0      0   
2      0      0      0      0      0      0      0      1      0      0   
3      0      0      1      0      0      0      0      0      0      0   
4      0      0      0      0      0      0      0      1      0      0   

   ESF_10  ESF_11  ESF_12  ESF_13  ESF_14  ESF_15  
0       1       1       1       0       0       1  
1       0       0       0       0       0       0  
2       0       0       0       0       0       0  
3       0       0       0       0       0       0  
4       0       0       0       0       0       0  

Predicted Multi-Label Targets:
[[1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1.]
 [0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1. 1. 0. 1. 0. 1.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 1.]
 [0. 0. 0. 0. 1. 1. 1. 0. 


The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)



It's certainly more aggressive in the number of labels it predicts than the first model, whic explains the high number of false positives in the error heatmap. We can try one instance using the standard order for comparison.

In [68]:
def objective_ccxgb2(trial, X, y, groups, cv_args, final_chain_order):

    xgb_params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200, step=25),
        'max_depth': trial.suggest_int('max_depth', 7, 15),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'scale_pos_weight' : trial.suggest_float('scale_pos_weight', 3, 10, log=True),
        'base_score': 0.5, 
        'random_state': 42,
        'n_jobs': -1,
        'verbosity': 0,
        'eval_metric': 'logloss',
        'use_label_encoder': False
    }
        
    base_XGB = XGBClassifier(**xgb_params)
    
    # The RakelO multilabel classifier (the main estimator)
    clf = ClassifierChain(
        classifier = base_XGB,
        require_dense = [True, True],
        order = final_chain_order,
    )

    scores = []    
    # cv = GroupTimeSeriesSplit(**cv_args)
    cv = KFold(**cv_args)

    # for train_index, val_index in cv.split(X, y, groups=groups):
    for train_index, val_index in cv.split(X, y):
    
        X_tr, X_val = X.iloc[train_index], X.iloc[val_index]
        y_tr, y_val = y.iloc[train_index], y.iloc[val_index]
        
        clf.fit(X_tr, y_tr)
        y_pred = clf.predict(X_val)
        
        # hl = hamming_loss(y_val, y_pred)
        # hl = f1_score(y_val, y_pred, average='samples', zero_division=1)
        eval_scorer = jaccard_score(y_val, y_pred, average='samples')

        scores.append(eval_scorer)

    return np.mean(scores)

cv_args = {'n_splits': 5, 'shuffle': True, 'random_state': 42}

objective_ccxgb2 = partial(objective_ccxgb2,
                        X=X_train_transformed,
                        y=y_train_transformed,
                        groups=groups_by_year,
                        cv_args=cv_args,
                        final_chain_order=list(range(0,16)),
                        )

# 3. Create and Run the Optuna Study
study_ccxgb2 = optuna.create_study(direction="maximize") # Maximize the F1 score

study_ccxgb2.optimize(objective_ccxgb2, n_trials=30, show_progress_bar=True)

# --- Final Results ---
print("Optuna Hyperparameter Tuning Complete")
print(f"Best Score (Validation): {study_ccxgb2.best_value:.4f}")
print("Best Parameters:")
for key, value in study_ccxgb2.best_params.items():
    print(f"  {key}: {value}")

[I 2025-11-15 10:15:15,551] A new study created in memory with name: no-name-8e2c0b0e-7589-484f-b6b3-2e329ddf0457
Best trial: 0. Best value: 0.438911:   3%|▎         | 1/30 [00:10<05:10, 10.71s/it]

[I 2025-11-15 10:15:26,272] Trial 0 finished with value: 0.4389106483436381 and parameters: {'n_estimators': 75, 'max_depth': 12, 'learning_rate': 0.011768689623371508, 'subsample': 0.779234477793001, 'colsample_bytree': 0.6228869427061535, 'scale_pos_weight': 5.366408611478881}. Best is trial 0 with value: 0.4389106483436381.


Best trial: 0. Best value: 0.438911:   7%|▋         | 2/30 [00:19<04:35,  9.84s/it]

[I 2025-11-15 10:15:35,492] Trial 1 finished with value: 0.4137056471592554 and parameters: {'n_estimators': 100, 'max_depth': 11, 'learning_rate': 0.007656667312091308, 'subsample': 0.624955744881925, 'colsample_bytree': 0.8770597154950144, 'scale_pos_weight': 9.707442656538566}. Best is trial 0 with value: 0.4389106483436381.


Best trial: 2. Best value: 0.440484:  10%|█         | 3/30 [00:29<04:28,  9.94s/it]

[I 2025-11-15 10:15:45,557] Trial 2 finished with value: 0.4404843981905837 and parameters: {'n_estimators': 100, 'max_depth': 13, 'learning_rate': 0.02862880372491373, 'subsample': 0.7385584371213998, 'colsample_bytree': 0.8977730779006831, 'scale_pos_weight': 4.389909530080833}. Best is trial 2 with value: 0.4404843981905837.


Best trial: 2. Best value: 0.440484:  13%|█▎        | 4/30 [00:36<03:41,  8.51s/it]

[I 2025-11-15 10:15:51,868] Trial 3 finished with value: 0.43913419913419915 and parameters: {'n_estimators': 75, 'max_depth': 8, 'learning_rate': 0.0022546409437996786, 'subsample': 0.8021889289916728, 'colsample_bytree': 0.7339511448614447, 'scale_pos_weight': 4.25604593268818}. Best is trial 2 with value: 0.4404843981905837.


Best trial: 2. Best value: 0.440484:  17%|█▋        | 5/30 [00:49<04:17, 10.31s/it]

[I 2025-11-15 10:16:05,383] Trial 4 finished with value: 0.4390883257635835 and parameters: {'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.003577619273192808, 'subsample': 0.7917016441551555, 'colsample_bytree': 0.6533146062696477, 'scale_pos_weight': 5.444602315992159}. Best is trial 2 with value: 0.4404843981905837.


Best trial: 5. Best value: 0.441863:  20%|██        | 6/30 [01:01<04:20, 10.87s/it]

[I 2025-11-15 10:16:17,324] Trial 5 finished with value: 0.4418626905740308 and parameters: {'n_estimators': 125, 'max_depth': 14, 'learning_rate': 0.006703654784257146, 'subsample': 0.8559969538719139, 'colsample_bytree': 0.6392985443255764, 'scale_pos_weight': 3.316492748955909}. Best is trial 5 with value: 0.4418626905740308.


Best trial: 6. Best value: 0.445847:  23%|██▎       | 7/30 [01:08<03:40,  9.59s/it]

[I 2025-11-15 10:16:24,298] Trial 6 finished with value: 0.44584709391410426 and parameters: {'n_estimators': 75, 'max_depth': 11, 'learning_rate': 0.0018468480115001276, 'subsample': 0.7204105920607267, 'colsample_bytree': 0.7519256038317472, 'scale_pos_weight': 4.156405111488158}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  27%|██▋       | 8/30 [01:17<03:22,  9.20s/it]

[I 2025-11-15 10:16:32,638] Trial 7 finished with value: 0.445102200205293 and parameters: {'n_estimators': 100, 'max_depth': 13, 'learning_rate': 0.00528880692168167, 'subsample': 0.601831644651123, 'colsample_bytree': 0.9386060505385176, 'scale_pos_weight': 3.6112437923383105}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  30%|███       | 9/30 [01:21<02:44,  7.85s/it]

[I 2025-11-15 10:16:37,517] Trial 8 finished with value: 0.40338616995833493 and parameters: {'n_estimators': 50, 'max_depth': 10, 'learning_rate': 0.0038377327148840664, 'subsample': 0.9135213302085089, 'colsample_bytree': 0.9127414418944684, 'scale_pos_weight': 7.774579969633722}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  33%|███▎      | 10/30 [01:27<02:20,  7.03s/it]

[I 2025-11-15 10:16:42,724] Trial 9 finished with value: 0.4434164403752033 and parameters: {'n_estimators': 75, 'max_depth': 9, 'learning_rate': 0.022108474062346877, 'subsample': 0.7725407531561221, 'colsample_bytree': 0.8168504036123125, 'scale_pos_weight': 4.676244314317607}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  37%|███▋      | 11/30 [01:36<02:28,  7.80s/it]

[I 2025-11-15 10:16:52,257] Trial 10 finished with value: 0.41363217252392503 and parameters: {'n_estimators': 175, 'max_depth': 7, 'learning_rate': 0.0010937432999168382, 'subsample': 0.6950635052208609, 'colsample_bytree': 0.7341016060605068, 'scale_pos_weight': 6.862123587849422}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  40%|████      | 12/30 [01:45<02:27,  8.17s/it]

[I 2025-11-15 10:17:01,272] Trial 11 finished with value: 0.43963190303396493 and parameters: {'n_estimators': 125, 'max_depth': 15, 'learning_rate': 0.07501223619997097, 'subsample': 0.6090296300768191, 'colsample_bytree': 0.8089759182529664, 'scale_pos_weight': 3.1504774845141665}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  43%|████▎     | 13/30 [01:50<01:59,  7.05s/it]

[I 2025-11-15 10:17:05,756] Trial 12 finished with value: 0.4435477752488062 and parameters: {'n_estimators': 50, 'max_depth': 12, 'learning_rate': 0.0011110179109443334, 'subsample': 0.6760517503216222, 'colsample_bytree': 0.7255359202354623, 'scale_pos_weight': 3.720862146665895}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  47%|████▋     | 14/30 [02:01<02:15,  8.44s/it]

[I 2025-11-15 10:17:17,414] Trial 13 finished with value: 0.44152791938874414 and parameters: {'n_estimators': 150, 'max_depth': 13, 'learning_rate': 0.0025441256315467207, 'subsample': 0.6653721403606413, 'colsample_bytree': 0.9465191785801015, 'scale_pos_weight': 3.717398302302059}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 6. Best value: 0.445847:  50%|█████     | 15/30 [02:09<02:04,  8.30s/it]

[I 2025-11-15 10:17:25,389] Trial 14 finished with value: 0.44046949613959924 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.0055262580681437095, 'subsample': 0.7179563361192027, 'colsample_bytree': 0.838035815775364, 'scale_pos_weight': 3.8370958863774156}. Best is trial 6 with value: 0.44584709391410426.


Best trial: 15. Best value: 0.452121:  53%|█████▎    | 16/30 [02:20<02:07,  9.11s/it]

[I 2025-11-15 10:17:36,387] Trial 15 finished with value: 0.4521210461932112 and parameters: {'n_estimators': 150, 'max_depth': 11, 'learning_rate': 0.002066833925524604, 'subsample': 0.63534384488379, 'colsample_bytree': 0.7733226597180934, 'scale_pos_weight': 3.085892228599734}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  57%|█████▋    | 17/30 [02:33<02:10, 10.06s/it]

[I 2025-11-15 10:17:48,643] Trial 16 finished with value: 0.4489974000798743 and parameters: {'n_estimators': 150, 'max_depth': 11, 'learning_rate': 0.0017808837241314029, 'subsample': 0.6504181598408209, 'colsample_bytree': 0.7740070678961912, 'scale_pos_weight': 3.0083596013608487}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  60%|██████    | 18/30 [02:49<02:23, 11.95s/it]

[I 2025-11-15 10:18:05,002] Trial 17 finished with value: 0.4477851959037527 and parameters: {'n_estimators': 150, 'max_depth': 9, 'learning_rate': 0.014171436974805893, 'subsample': 0.6499538985249418, 'colsample_bytree': 0.7841441546612994, 'scale_pos_weight': 3.0934965264460184}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  63%|██████▎   | 19/30 [03:03<02:17, 12.53s/it]

[I 2025-11-15 10:18:18,875] Trial 18 finished with value: 0.4512454555753525 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.0016243219132814387, 'subsample': 0.649312247055197, 'colsample_bytree': 0.6951399394561896, 'scale_pos_weight': 3.006730364742347}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  67%|██████▋   | 20/30 [03:19<02:15, 13.54s/it]

[I 2025-11-15 10:18:34,784] Trial 19 finished with value: 0.42400270977590565 and parameters: {'n_estimators': 200, 'max_depth': 9, 'learning_rate': 0.0010464606984240568, 'subsample': 0.8368530533318566, 'colsample_bytree': 0.6892358522069807, 'scale_pos_weight': 6.356167092295108}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  70%|███████   | 21/30 [03:29<01:52, 12.53s/it]

[I 2025-11-15 10:18:44,944] Trial 20 finished with value: 0.4318721799391903 and parameters: {'n_estimators': 175, 'max_depth': 7, 'learning_rate': 0.003255699832352505, 'subsample': 0.7469145837454991, 'colsample_bytree': 0.6892268290351837, 'scale_pos_weight': 4.826677535797174}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  73%|███████▎  | 22/30 [03:38<01:32, 11.62s/it]

[I 2025-11-15 10:18:54,453] Trial 21 finished with value: 0.4496474888227466 and parameters: {'n_estimators': 150, 'max_depth': 11, 'learning_rate': 0.0016296872242166662, 'subsample': 0.6435625790950659, 'colsample_bytree': 0.7759940706512181, 'scale_pos_weight': 3.317212585790354}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  77%|███████▋  | 23/30 [03:50<01:20, 11.56s/it]

[I 2025-11-15 10:19:05,863] Trial 22 finished with value: 0.45183146750157055 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.0014198708649247535, 'subsample': 0.634973602941255, 'colsample_bytree': 0.6953692848787476, 'scale_pos_weight': 3.490164353256947}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  80%|████████  | 24/30 [04:03<01:12, 12.06s/it]

[I 2025-11-15 10:19:19,083] Trial 23 finished with value: 0.4476850526335062 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.0015815071889117975, 'subsample': 0.6909254530827874, 'colsample_bytree': 0.6902396974556301, 'scale_pos_weight': 3.4392935188314695}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  83%|████████▎ | 25/30 [04:14<00:58, 11.67s/it]

[I 2025-11-15 10:19:29,864] Trial 24 finished with value: 0.44430862093233225 and parameters: {'n_estimators': 175, 'max_depth': 8, 'learning_rate': 0.0026382177911257756, 'subsample': 0.6263736107269907, 'colsample_bytree': 0.6068017046608286, 'scale_pos_weight': 3.8809679860557367}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  87%|████████▋ | 26/30 [04:28<00:49, 12.44s/it]

[I 2025-11-15 10:19:44,088] Trial 25 finished with value: 0.44622510650861164 and parameters: {'n_estimators': 200, 'max_depth': 12, 'learning_rate': 0.0013177056654170491, 'subsample': 0.942057608749904, 'colsample_bytree': 0.6710917577586119, 'scale_pos_weight': 3.370355073721827}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  90%|█████████ | 27/30 [04:39<00:35, 11.94s/it]

[I 2025-11-15 10:19:54,860] Trial 26 finished with value: 0.4405178870385056 and parameters: {'n_estimators': 175, 'max_depth': 10, 'learning_rate': 0.09624186447118577, 'subsample': 0.6941091604041161, 'colsample_bytree': 0.716769502047306, 'scale_pos_weight': 3.107300814416734}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  93%|█████████▎| 28/30 [04:48<00:22, 11.12s/it]

[I 2025-11-15 10:20:04,064] Trial 27 finished with value: 0.4438775800373739 and parameters: {'n_estimators': 150, 'max_depth': 8, 'learning_rate': 0.004419624899407971, 'subsample': 0.627879564419932, 'colsample_bytree': 0.7115452291095897, 'scale_pos_weight': 4.046423633965064}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121:  97%|█████████▋| 29/30 [05:02<00:11, 11.86s/it]

[I 2025-11-15 10:20:17,637] Trial 28 finished with value: 0.4453088120356162 and parameters: {'n_estimators': 200, 'max_depth': 9, 'learning_rate': 0.0022922094336784503, 'subsample': 0.6715664975688206, 'colsample_bytree': 0.8475804064089858, 'scale_pos_weight': 4.952510777231797}. Best is trial 15 with value: 0.4521210461932112.


Best trial: 15. Best value: 0.452121: 100%|██████████| 30/30 [05:17<00:00, 10.59s/it]

[I 2025-11-15 10:20:33,346] Trial 29 finished with value: 0.43898083016639716 and parameters: {'n_estimators': 125, 'max_depth': 12, 'learning_rate': 0.011923506954362896, 'subsample': 0.604075605676646, 'colsample_bytree': 0.622436291350757, 'scale_pos_weight': 5.835805661616327}. Best is trial 15 with value: 0.4521210461932112.
Optuna Hyperparameter Tuning Complete
Best Score (Validation): 0.4521
Best Parameters:
  n_estimators: 150
  max_depth: 11
  learning_rate: 0.002066833925524604
  subsample: 0.63534384488379
  colsample_bytree: 0.7733226597180934
  scale_pos_weight: 3.085892228599734





In [69]:
study_ccxgb2_best_params = study_ccxgb2.best_params

study_ccxgb2_best_params_xgb = {
    'n_estimators': study_ccxgb2_best_params['n_estimators'],
    'max_depth': study_ccxgb2_best_params['max_depth'],
    'learning_rate': study_ccxgb2_best_params['learning_rate'],
    'subsample': study_ccxgb2_best_params['subsample'],
    'colsample_bytree' : study_ccxgb2_best_params['colsample_bytree'],
    'scale_pos_weight': study_ccxgb2_best_params['scale_pos_weight'],
    'base_score': 0.5,
    'random_state': 42,
    'verbosity': 0,
    'use_label_encoder': False,
    'eval_metric': 'logloss',
    'use_label_encoder': False
}

study_ccxgb2_bbase_xgb = XGBClassifier(**study_ccxgb2_best_params_xgb)
study_ccxgb2_tuned = ClassifierChain(
    classifier = study_ccxgb2_bbase_xgb,
    require_dense = [True, True],
    order=list(range(0,16)),
)

study_ccxgb2_tuned.fit(X_train_transformed, y_train_transformed)

y_pred_training_cc2 = study_ccxgb2_tuned.predict(X_train_transformed)

display_results(y_train_transformed, y_pred_training_cc2.toarray())

display_error_heatmap(y_train_transformed, y_pred_training_cc2.toarray())

display_label_cardinality_error(y_train_transformed, y_pred_training_cc2.toarray())

True Multi-Label Targets:
   ESF_0  ESF_1  ESF_2  ESF_3  ESF_4  ESF_5  ESF_6  ESF_7  ESF_8  ESF_9  \
0      0      1      0      1      1      0      0      1      1      0   
1      0      0      0      0      1      0      0      0      0      0   
2      0      0      0      0      0      0      0      1      0      0   
3      0      0      1      0      0      0      0      0      0      0   
4      0      0      0      0      0      0      0      1      0      0   

   ESF_10  ESF_11  ESF_12  ESF_13  ESF_14  ESF_15  
0       1       1       1       0       0       1  
1       0       0       0       0       0       0  
2       0       0       0       0       0       0  
3       0       0       0       0       0       0  
4       0       0       0       0       0       0  

Predicted Multi-Label Targets:
[[0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1.]
 [0. 0. 0. 1. 1. 0. 0. 1. 1. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 1. 0. 0. 1. 


The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)



The metrics are improved but the model is still aggressive in predicting false positives. Our final hypertuning instance will be for OneVsRest.

Binary Relevance is a decent baseline. The next model up is Classifier Chain, which should perform better if set up correctly.

In [70]:
def objective_ovrxgb(trial, X, y, groups, cv_args):

    xgb_params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 200, step=25),
        'max_depth': trial.suggest_int('max_depth', 7, 15),
        'learning_rate': trial.suggest_float('learning_rate', 1e-3, 0.1, log=True),
        'subsample': trial.suggest_float('subsample', 0.6, 0.95),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 0.95),
        'scale_pos_weight' : trial.suggest_float('scale_pos_weight', 3, 10, log=True),
        'base_score': 0.5, 
        'random_state': 42,
        'n_jobs': -1,
        'verbosity': 0,
        'eval_metric': 'logloss',
        'use_label_encoder': False
    }
        
    base_XGB = XGBClassifier(**xgb_params)
    
    # The RakelO multilabel classifier (the main estimator)
    clf = OneVsRestClassifier(
        estimator = base_XGB,
    )

    scores = []    
    # cv = GroupTimeSeriesSplit(**cv_args)
    cv = KFold(**cv_args)

    # for train_index, val_index in cv.split(X, y, groups=groups):
    for train_index, val_index in cv.split(X, y):
    
        X_tr, X_val = X.iloc[train_index], X.iloc[val_index]
        y_tr, y_val = y.iloc[train_index], y.iloc[val_index]
        
        clf.fit(X_tr, y_tr)
        y_pred = clf.predict(X_val)
        
        # hl = hamming_loss(y_val, y_pred)
        # hl = f1_score(y_val, y_pred, average='samples', zero_division=1)
        eval_scorer = jaccard_score(y_val, y_pred, average='samples')

        scores.append(eval_scorer)

    return np.mean(scores)

cv_args = {'n_splits': 5, 'shuffle': True, 'random_state': 42}

objective_ovrxgb = partial(objective_ovrxgb,
                        X=X_train_transformed,
                        y=y_train_transformed,
                        groups=groups_by_year,
                        cv_args=cv_args,
                        )

# 3. Create and Run the Optuna Study
study_ovrxgb = optuna.create_study(direction="maximize") # Maximize the F1 score

study_ovrxgb.optimize(objective_ovrxgb, n_trials=30, show_progress_bar=True)

# --- Final Results ---
print("Optuna Hyperparameter Tuning Complete")
print(f"Best Score (Validation): {study_ovrxgb.best_value:.4f}")
print("Best Parameters:")
for key, value in study_ovrxgb.best_params.items():
    print(f"  {key}: {value}")

[I 2025-11-15 10:25:42,334] A new study created in memory with name: no-name-f38abe6d-b8e1-4fbf-a334-7f457b198fa0
Best trial: 0. Best value: 0.44415:   3%|▎         | 1/30 [00:06<03:09,  6.52s/it]

[I 2025-11-15 10:25:48,861] Trial 0 finished with value: 0.4441500481191203 and parameters: {'n_estimators': 50, 'max_depth': 7, 'learning_rate': 0.0015269653185780267, 'subsample': 0.7953481884092196, 'colsample_bytree': 0.7340171428424385, 'scale_pos_weight': 3.9684516805460275}. Best is trial 0 with value: 0.4441500481191203.


Best trial: 0. Best value: 0.44415:   7%|▋         | 2/30 [00:15<03:35,  7.70s/it]

[I 2025-11-15 10:25:57,386] Trial 1 finished with value: 0.43533792095647766 and parameters: {'n_estimators': 50, 'max_depth': 14, 'learning_rate': 0.09507213218913584, 'subsample': 0.6257613667477866, 'colsample_bytree': 0.6267882317303097, 'scale_pos_weight': 8.819236418063925}. Best is trial 0 with value: 0.4441500481191203.


Best trial: 0. Best value: 0.44415:  10%|█         | 3/30 [00:35<06:03, 13.46s/it]

[I 2025-11-15 10:26:17,690] Trial 2 finished with value: 0.4338025777201035 and parameters: {'n_estimators': 125, 'max_depth': 7, 'learning_rate': 0.04792099575251483, 'subsample': 0.7855771217114984, 'colsample_bytree': 0.7923430398941271, 'scale_pos_weight': 6.152685433523176}. Best is trial 0 with value: 0.4441500481191203.


Best trial: 0. Best value: 0.44415:  13%|█▎        | 4/30 [01:05<08:37, 19.91s/it]

[I 2025-11-15 10:26:47,500] Trial 3 finished with value: 0.4285839160839161 and parameters: {'n_estimators': 150, 'max_depth': 9, 'learning_rate': 0.0022611356632084526, 'subsample': 0.9246301691916837, 'colsample_bytree': 0.9146065443934118, 'scale_pos_weight': 4.922698586077244}. Best is trial 0 with value: 0.4441500481191203.


Best trial: 4. Best value: 0.445777:  17%|█▋        | 5/30 [01:18<07:19, 17.60s/it]

[I 2025-11-15 10:27:00,993] Trial 4 finished with value: 0.4457773898495548 and parameters: {'n_estimators': 75, 'max_depth': 12, 'learning_rate': 0.0019423203989580404, 'subsample': 0.9208348943542042, 'colsample_bytree': 0.7518894219631196, 'scale_pos_weight': 3.3107796076710105}. Best is trial 4 with value: 0.4457773898495548.


Best trial: 4. Best value: 0.445777:  20%|██        | 6/30 [01:54<09:34, 23.95s/it]

[I 2025-11-15 10:27:37,280] Trial 5 finished with value: 0.4389408243531955 and parameters: {'n_estimators': 200, 'max_depth': 14, 'learning_rate': 0.06938443485214801, 'subsample': 0.7498901146697097, 'colsample_bytree': 0.9245414989685563, 'scale_pos_weight': 5.884438936787585}. Best is trial 4 with value: 0.4457773898495548.


Best trial: 4. Best value: 0.445777:  23%|██▎       | 7/30 [02:16<08:53, 23.18s/it]

[I 2025-11-15 10:27:58,858] Trial 6 finished with value: 0.4427640829960418 and parameters: {'n_estimators': 200, 'max_depth': 13, 'learning_rate': 0.004694450580255662, 'subsample': 0.796158101043879, 'colsample_bytree': 0.8763210974624493, 'scale_pos_weight': 3.8553944604150012}. Best is trial 4 with value: 0.4457773898495548.


Best trial: 4. Best value: 0.445777:  27%|██▋       | 8/30 [02:36<08:07, 22.15s/it]

[I 2025-11-15 10:28:18,823] Trial 7 finished with value: 0.42092235370585884 and parameters: {'n_estimators': 200, 'max_depth': 10, 'learning_rate': 0.0019579785085323464, 'subsample': 0.6734882023585019, 'colsample_bytree': 0.7303369219367828, 'scale_pos_weight': 9.649971468837096}. Best is trial 4 with value: 0.4457773898495548.


Best trial: 8. Best value: 0.446205:  30%|███       | 9/30 [02:53<07:08, 20.40s/it]

[I 2025-11-15 10:28:35,356] Trial 8 finished with value: 0.44620511504532123 and parameters: {'n_estimators': 125, 'max_depth': 15, 'learning_rate': 0.01806073918854276, 'subsample': 0.8505700254865463, 'colsample_bytree': 0.8734524725114146, 'scale_pos_weight': 3.563949226896886}. Best is trial 8 with value: 0.44620511504532123.


Best trial: 8. Best value: 0.446205:  33%|███▎      | 10/30 [03:05<05:56, 17.84s/it]

[I 2025-11-15 10:28:47,479] Trial 9 finished with value: 0.4217713100187327 and parameters: {'n_estimators': 125, 'max_depth': 11, 'learning_rate': 0.005951842250169088, 'subsample': 0.7364343424244468, 'colsample_bytree': 0.9118101868844317, 'scale_pos_weight': 8.878589936165282}. Best is trial 8 with value: 0.44620511504532123.


Best trial: 10. Best value: 0.449017:  37%|███▋      | 11/30 [03:18<05:14, 16.55s/it]

[I 2025-11-15 10:29:01,097] Trial 10 finished with value: 0.4490170396613696 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.019863452941013496, 'subsample': 0.8558624712713075, 'colsample_bytree': 0.8200520171510071, 'scale_pos_weight': 3.1877510927492527}. Best is trial 10 with value: 0.4490170396613696.


Best trial: 11. Best value: 0.45011:  40%|████      | 12/30 [03:31<04:38, 15.48s/it] 

[I 2025-11-15 10:29:14,144] Trial 11 finished with value: 0.45011010467196033 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.023545427019819067, 'subsample': 0.8628363603679048, 'colsample_bytree': 0.8333122439293678, 'scale_pos_weight': 3.0983994407745503}. Best is trial 11 with value: 0.45011010467196033.


Best trial: 12. Best value: 0.451612:  43%|████▎     | 13/30 [03:43<04:05, 14.44s/it]

[I 2025-11-15 10:29:26,198] Trial 12 finished with value: 0.4516118732355846 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.020579550824690554, 'subsample': 0.8624393481826119, 'colsample_bytree': 0.8158333028563421, 'scale_pos_weight': 3.037295334647881}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  47%|████▋     | 14/30 [03:56<03:40, 13.77s/it]

[I 2025-11-15 10:29:38,410] Trial 13 finished with value: 0.441869147461931 and parameters: {'n_estimators': 100, 'max_depth': 13, 'learning_rate': 0.02870553337280828, 'subsample': 0.8676228446306298, 'colsample_bytree': 0.8351963515846874, 'scale_pos_weight': 4.440524271514301}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  50%|█████     | 15/30 [04:05<03:08, 12.54s/it]

[I 2025-11-15 10:29:48,083] Trial 14 finished with value: 0.45103844322916486 and parameters: {'n_estimators': 75, 'max_depth': 15, 'learning_rate': 0.010146662457877693, 'subsample': 0.8861720573316293, 'colsample_bytree': 0.6841442585386901, 'scale_pos_weight': 3.1745035520542175}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  53%|█████▎    | 16/30 [04:16<02:46, 11.93s/it]

[I 2025-11-15 10:29:58,592] Trial 15 finished with value: 0.4245044302776261 and parameters: {'n_estimators': 75, 'max_depth': 13, 'learning_rate': 0.012091991941247502, 'subsample': 0.9491457734081001, 'colsample_bytree': 0.6705602029138548, 'scale_pos_weight': 7.2708939898848435}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  57%|█████▋    | 17/30 [04:34<02:58, 13.70s/it]

[I 2025-11-15 10:30:16,411] Trial 16 finished with value: 0.4421636296120832 and parameters: {'n_estimators': 150, 'max_depth': 14, 'learning_rate': 0.007654094669986272, 'subsample': 0.8950425473437684, 'colsample_bytree': 0.6875163255173238, 'scale_pos_weight': 4.460865343177161}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  60%|██████    | 18/30 [04:43<02:28, 12.34s/it]

[I 2025-11-15 10:30:25,578] Trial 17 finished with value: 0.44241030161133266 and parameters: {'n_estimators': 75, 'max_depth': 12, 'learning_rate': 0.004384730723048287, 'subsample': 0.8277432765258106, 'colsample_bytree': 0.6103702845688606, 'scale_pos_weight': 3.8804602433261053}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  63%|██████▎   | 19/30 [04:57<02:22, 13.00s/it]

[I 2025-11-15 10:30:40,114] Trial 18 finished with value: 0.4418806451280678 and parameters: {'n_estimators': 150, 'max_depth': 10, 'learning_rate': 0.04033339239035642, 'subsample': 0.8249338325320527, 'colsample_bytree': 0.6912876942105651, 'scale_pos_weight': 4.845230753369581}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  67%|██████▋   | 20/30 [05:04<01:51, 11.11s/it]

[I 2025-11-15 10:30:46,823] Trial 19 finished with value: 0.4475663111230121 and parameters: {'n_estimators': 50, 'max_depth': 15, 'learning_rate': 0.011879949535211109, 'subsample': 0.8955946839265949, 'colsample_bytree': 0.7756010678184594, 'scale_pos_weight': 3.068051927620948}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 12. Best value: 0.451612:  70%|███████   | 21/30 [05:13<01:33, 10.37s/it]

[I 2025-11-15 10:30:55,467] Trial 20 finished with value: 0.44330695936366044 and parameters: {'n_estimators': 75, 'max_depth': 12, 'learning_rate': 0.009859047485747858, 'subsample': 0.7023975884965762, 'colsample_bytree': 0.638689337414726, 'scale_pos_weight': 3.609776059596801}. Best is trial 12 with value: 0.4516118732355846.


Best trial: 21. Best value: 0.451614:  73%|███████▎  | 22/30 [05:26<01:29, 11.18s/it]

[I 2025-11-15 10:31:08,546] Trial 21 finished with value: 0.45161377854676826 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.024557355062843067, 'subsample': 0.8851200388940734, 'colsample_bytree': 0.823340316266019, 'scale_pos_weight': 3.0040380698639892}. Best is trial 21 with value: 0.45161377854676826.


Best trial: 21. Best value: 0.451614:  77%|███████▋  | 23/30 [05:39<01:21, 11.68s/it]

[I 2025-11-15 10:31:21,378] Trial 22 finished with value: 0.44268807080662753 and parameters: {'n_estimators': 100, 'max_depth': 14, 'learning_rate': 0.015198991173025924, 'subsample': 0.8946444989486839, 'colsample_bytree': 0.7978534586056658, 'scale_pos_weight': 3.47504796210099}. Best is trial 21 with value: 0.45161377854676826.


Best trial: 23. Best value: 0.453468:  80%|████████  | 24/30 [05:50<01:09, 11.56s/it]

[I 2025-11-15 10:31:32,653] Trial 23 finished with value: 0.45346835009206143 and parameters: {'n_estimators': 100, 'max_depth': 15, 'learning_rate': 0.03356860978167583, 'subsample': 0.8243841736622988, 'colsample_bytree': 0.7587250324606077, 'scale_pos_weight': 3.0505474642881847}. Best is trial 23 with value: 0.45346835009206143.


Best trial: 23. Best value: 0.453468:  83%|████████▎ | 25/30 [06:01<00:57, 11.56s/it]

[I 2025-11-15 10:31:44,215] Trial 24 finished with value: 0.44279673648230344 and parameters: {'n_estimators': 100, 'max_depth': 14, 'learning_rate': 0.033941631594258975, 'subsample': 0.8283645388188876, 'colsample_bytree': 0.8635643553759058, 'scale_pos_weight': 4.103544780606492}. Best is trial 23 with value: 0.45346835009206143.


Best trial: 23. Best value: 0.453468:  87%|████████▋ | 26/30 [06:15<00:48, 12.15s/it]

[I 2025-11-15 10:31:57,757] Trial 25 finished with value: 0.44265602953747285 and parameters: {'n_estimators': 125, 'max_depth': 13, 'learning_rate': 0.056237583459986605, 'subsample': 0.7584907161203633, 'colsample_bytree': 0.7567539739013714, 'scale_pos_weight': 3.048212325025412}. Best is trial 23 with value: 0.45346835009206143.


Best trial: 23. Best value: 0.453468:  90%|█████████ | 27/30 [06:35<00:43, 14.56s/it]

[I 2025-11-15 10:32:17,936] Trial 26 finished with value: 0.445820282237808 and parameters: {'n_estimators': 175, 'max_depth': 15, 'learning_rate': 0.02732128855625259, 'subsample': 0.8216618957878916, 'colsample_bytree': 0.8016935088357647, 'scale_pos_weight': 3.5077653539621165}. Best is trial 23 with value: 0.45346835009206143.


Best trial: 23. Best value: 0.453468:  93%|█████████▎| 28/30 [06:47<00:27, 13.80s/it]

[I 2025-11-15 10:32:29,947] Trial 27 finished with value: 0.43193349033555223 and parameters: {'n_estimators': 125, 'max_depth': 8, 'learning_rate': 0.06911676536656539, 'subsample': 0.9234464134783458, 'colsample_bytree': 0.8533362653687142, 'scale_pos_weight': 7.303933791919177}. Best is trial 23 with value: 0.45346835009206143.


Best trial: 23. Best value: 0.453468:  97%|█████████▋| 29/30 [07:00<00:13, 13.40s/it]

[I 2025-11-15 10:32:42,432] Trial 28 finished with value: 0.4412472893658461 and parameters: {'n_estimators': 100, 'max_depth': 14, 'learning_rate': 0.03371881678866989, 'subsample': 0.8429285233555964, 'colsample_bytree': 0.7180061891985303, 'scale_pos_weight': 4.347664908151131}. Best is trial 23 with value: 0.45346835009206143.


Best trial: 23. Best value: 0.453468: 100%|██████████| 30/30 [07:10<00:00, 14.34s/it]

[I 2025-11-15 10:32:52,630] Trial 29 finished with value: 0.44066064233590013 and parameters: {'n_estimators': 75, 'max_depth': 11, 'learning_rate': 0.0011604337792121117, 'subsample': 0.7962969097720464, 'colsample_bytree': 0.9457395287192616, 'scale_pos_weight': 3.7473647918937045}. Best is trial 23 with value: 0.45346835009206143.
Optuna Hyperparameter Tuning Complete
Best Score (Validation): 0.4535
Best Parameters:
  n_estimators: 100
  max_depth: 15
  learning_rate: 0.03356860978167583
  subsample: 0.8243841736622988
  colsample_bytree: 0.7587250324606077
  scale_pos_weight: 3.0505474642881847





In [72]:
study_ovrxgb_best_params = study_ovrxgb.best_params

study_ovrxgb_best_params_xgb = {
    'n_estimators': study_ovrxgb_best_params['n_estimators'],
    'max_depth': study_ovrxgb_best_params['max_depth'],
    'learning_rate': study_ovrxgb_best_params['learning_rate'],
    'subsample': study_ovrxgb_best_params['subsample'],
    'colsample_bytree' : study_ovrxgb_best_params['colsample_bytree'],
    'scale_pos_weight': study_ovrxgb_best_params['scale_pos_weight'],
    'base_score': 0.5,
    'random_state': 42,
    'verbosity': 0,
    'use_label_encoder': False,
    'eval_metric': 'logloss',
    'use_label_encoder': False
}

study_ovrxgb_bbase_xgb = XGBClassifier(**study_ovrxgb_best_params_xgb)
study_ovrxgb_tuned = OneVsRestClassifier(
    estimator = study_ovrxgb_bbase_xgb,
)

study_ovrxgb_tuned.fit(X_train_transformed, y_train_transformed)

y_pred_training_ovr = study_ovrxgb_tuned.predict(X_train_transformed)

display_results(y_train_transformed, y_pred_training_ovr)

display_error_heatmap(y_train_transformed, y_pred_training_ovr)

display_label_cardinality_error(y_train_transformed, y_pred_training_ovr)

True Multi-Label Targets:
   ESF_0  ESF_1  ESF_2  ESF_3  ESF_4  ESF_5  ESF_6  ESF_7  ESF_8  ESF_9  \
0      0      1      0      1      1      0      0      1      1      0   
1      0      0      0      0      1      0      0      0      0      0   
2      0      0      0      0      0      0      0      1      0      0   
3      0      0      1      0      0      0      0      0      0      0   
4      0      0      0      0      0      0      0      1      0      0   

   ESF_10  ESF_11  ESF_12  ESF_13  ESF_14  ESF_15  
0       1       1       1       0       0       1  
1       0       0       0       0       0       0  
2       0       0       0       0       0       0  
3       0       0       0       0       0       0  
4       0       0       0       0       0       0  

Predicted Multi-Label Targets:
[[0 1 0 1 1 0 0 1 1 0 1 1 1 1 0 1]
 [0 0 1 1 1 0 0 0 1 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0]
 [0 0 0 1 1 0 0 0 1 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0]]

Su


The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)



OneVsRest is still relatively aggressive in terms of false positive but does have the best overall metrics score and an impressive f1_macro score.

We will run one quick check of the untuned classifier for comparison.

In [73]:
ovr_xgb.fit(X_train_transformed, y_train_transformed)

y_pred_training_ovr_base = ovr_xgb.predict(X_train_transformed)

display_results(y_train_transformed, y_pred_training_ovr_base)

display_error_heatmap(y_train_transformed, y_pred_training_ovr_base)

display_label_cardinality_error(y_train_transformed, y_pred_training_ovr_base)

True Multi-Label Targets:
   ESF_0  ESF_1  ESF_2  ESF_3  ESF_4  ESF_5  ESF_6  ESF_7  ESF_8  ESF_9  \
0      0      1      0      1      1      0      0      1      1      0   
1      0      0      0      0      1      0      0      0      0      0   
2      0      0      0      0      0      0      0      1      0      0   
3      0      0      1      0      0      0      0      0      0      0   
4      0      0      0      0      0      0      0      1      0      0   

   ESF_10  ESF_11  ESF_12  ESF_13  ESF_14  ESF_15  
0       1       1       1       0       0       1  
1       0       0       0       0       0       0  
2       0       0       0       0       0       0  
3       0       0       0       0       0       0  
4       0       0       0       0       0       0  

Predicted Multi-Label Targets:
[[0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]]

Su


The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)



This has fewer false positive but seems more prone to underestimate the number of labels than our tuned model. Since we hope to provide more options for our end user to select, we will move forward with our tuned model even though this has a higher combined metrics score.

We can now load in the test set to measure performance of our tuned model.

In [89]:
df_test = pd.read_parquet('combined_test_set.parquet')
X_test, y_test = create_set(df_test, feature_columns, target_column)

In [90]:
# Split the string in designatedIncidentTypes to a list of strings.
# This cannot be done earlier due to groupby issues.
X_test['designatedIncidentTypes'] = X_test['designatedIncidentTypes'].str.split(',')
X_test.head()

Unnamed: 0,incidentId,state,designatedIncidentTypes,declarationType,region,year
0,2024010901,RI,[Severe Storm],DR,1,2024.0
1,2024020701,OR,"[Mud/Landslide, Straight-Line Winds, Winter St...",DR,10,2024.0
2,2024020801,CA,"[Flood, Severe Storm]",DR,9,2024.0
3,2024032101,OH,[Tornado],DR,5,2024.0
4,2024032102,WA,"[Mud/Landslide, Straight-Line Winds, Winter St...",DR,10,2024.0


In [91]:
y_test.head()

0               [7.0]
1          [0.0, 7.0]
2    [13.0, 3.0, 5.0]
3         [13.0, 7.0]
4               [7.0]
Name: supportFunction, dtype: object

In [None]:
X_test_transformed, y_test_transformed = transform_x_y_test(
    X_test,
    y_test,
    preprocessor_X,
    preprocessor_y
)
X_test_transformed.drop(columns=['incidentId', 'year'], inplace=True)



unknown class(es) ['Tropical Depression'] will be ignored



In [93]:
print(X_test_transformed.shape)
X_test_transformed.head()

(80, 88)


Unnamed: 0,state_AK,state_AL,state_AR,state_AS,state_AZ,state_CA,state_CO,state_CT,state_DC,state_DE,state_FL,state_GA,state_GU,state_HI,state_IA,state_ID,state_IL,state_IN,state_KS,state_KY,state_LA,state_MA,state_MD,state_ME,state_MI,state_MN,state_MO,state_MP,state_MS,state_MT,state_NC,state_ND,state_NE,state_NH,state_NJ,state_NM,state_NV,state_NY,state_OH,state_OK,state_OR,state_PA,state_PR,state_RI,state_SC,state_SD,state_TN,state_TX,state_UT,state_VA,state_VI,state_VT,state_WA,state_WI,state_WV,state_WY,declarationType_DR,declarationType_EM,region_1,region_2,region_3,region_4,region_5,region_6,region_7,region_8,region_9,region_10,designatedIncidentTypes_Biological,designatedIncidentTypes_Chemical,designatedIncidentTypes_Coastal Storm,designatedIncidentTypes_Dam/Levee Break,designatedIncidentTypes_Earthquake,designatedIncidentTypes_Fire,designatedIncidentTypes_Flood,designatedIncidentTypes_Hurricane,designatedIncidentTypes_Mud/Landslide,designatedIncidentTypes_Other,designatedIncidentTypes_Severe Ice Storm,designatedIncidentTypes_Severe Storm,designatedIncidentTypes_Snowstorm,designatedIncidentTypes_Straight-Line Winds,designatedIncidentTypes_Terrorist,designatedIncidentTypes_Tornado,designatedIncidentTypes_Tropical Storm,designatedIncidentTypes_Typhoon,designatedIncidentTypes_Volcanic Eruption,designatedIncidentTypes_Winter Storm
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0
2,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0


In [94]:
print(y_test_transformed.shape)
y_test_transformed.head()

(80, 16)


Unnamed: 0,ESF_0,ESF_1,ESF_2,ESF_3,ESF_4,ESF_5,ESF_6,ESF_7,ESF_8,ESF_9,ESF_10,ESF_11,ESF_12,ESF_13,ESF_14,ESF_15
0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0
2,0,0,0,1,0,1,0,0,0,0,0,0,0,1,0,0
3,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [95]:
y_pred_test_over = study_ovrxgb_tuned.predict(X_test_transformed)

display_results(y_test_transformed, y_pred_test_over)

display_error_heatmap(y_test_transformed, y_pred_test_over)

display_label_cardinality_error(y_test_transformed, y_pred_test_over)



True Multi-Label Targets:
   ESF_0  ESF_1  ESF_2  ESF_3  ESF_4  ESF_5  ESF_6  ESF_7  ESF_8  ESF_9  \
0      0      0      0      0      0      0      0      1      0      0   
1      1      0      0      0      0      0      0      1      0      0   
2      0      0      0      1      0      1      0      0      0      0   
3      0      0      0      0      0      0      0      1      0      0   
4      0      0      0      0      0      0      0      1      0      0   

   ESF_10  ESF_11  ESF_12  ESF_13  ESF_14  ESF_15  
0       0       0       0       0       0       0  
1       0       0       0       0       0       0  
2       0       0       0       1       0       0  
3       0       0       0       1       0       0  
4       0       0       0       0       0       0  

Predicted Multi-Label Targets:
[[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0]
 [1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0]
 [1 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0]]

Su


The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)



In [98]:
y_proba = study_ovrxgb_tuned.predict_proba(X_test_transformed)

y_proba_adjusted = (y_proba >= 0.33).astype(int)

In [99]:
y_proba_adjusted

array([[0, 0, 0, ..., 1, 0, 0],
       [1, 0, 0, ..., 0, 0, 0],
       [1, 0, 0, ..., 1, 0, 0],
       ...,
       [1, 0, 0, ..., 1, 0, 0],
       [1, 1, 0, ..., 1, 0, 0],
       [0, 0, 0, ..., 1, 0, 0]], shape=(80, 16))

In [101]:

display_results(y_test_transformed, y_proba_adjusted)

display_error_heatmap(y_test_transformed, y_proba_adjusted)

display_label_cardinality_error(y_test_transformed, y_proba_adjusted)

True Multi-Label Targets:
   ESF_0  ESF_1  ESF_2  ESF_3  ESF_4  ESF_5  ESF_6  ESF_7  ESF_8  ESF_9  \
0      0      0      0      0      0      0      0      1      0      0   
1      1      0      0      0      0      0      0      1      0      0   
2      0      0      0      1      0      1      0      0      0      0   
3      0      0      0      0      0      0      0      1      0      0   
4      0      0      0      0      0      0      0      1      0      0   

   ESF_10  ESF_11  ESF_12  ESF_13  ESF_14  ESF_15  
0       0       0       0       0       0       0  
1       0       0       0       0       0       0  
2       0       0       0       1       0       0  
3       0       0       0       1       0       0  
4       0       0       0       0       0       0  

Predicted Multi-Label Targets:
[[0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0]
 [1 0 0 1 1 0 0 1 0 0 0 0 0 0 0 0]
 [1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0]
 [1 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0]
 [1 0 0 1 1 0 1 1 0 0 1 0 0 0 0 0]]

Su


The behavior of DataFrame.sum with axis=None is deprecated, in a future version this will reduce over both axes and return a scalar. To retain the old behavior, pass axis=0 (or do not pass axis)

