In [None]:
from google.colab import drive
drive.mount('/content/drive')

!pip install catboost -q

# Standard libraries
import os
import glob
import time
import multiprocessing
from concurrent.futures import ProcessPoolExecutor, as_completed
import pickle

# Data handling and manipulation
import numpy as np
import pandas as pd

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm

# Machine learning and preprocessing
import sklearn
from sklearn.utils.validation import _check_n_features
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

# Machine learning models
from catboost import CatBoostClassifier
from xgboost import XGBClassifier

# Model saving/loading
import joblib

# Get the number of CPU threads available
num_threads = multiprocessing.cpu_count()
print(f"Number of CPU threads available: {num_threads}")

# Filepaths
TRAIN_X = '/content/drive/MyDrive/building_classification/X_train_processed.csv'
TEST_X = '/content/drive/MyDrive/building_classification/X_test_processed.csv'
TRAIN_Y_PATH = '/content/drive/MyDrive/building_classification/Y_train_processed.csv'

SAMPLE_SUBMISSION_PATH = '/content/drive/MyDrive/building_classification/sample_submission_v0.1.0.csv.gz'


X_train = pd.read_csv(TRAIN_X)
Y_train = pd.read_csv(TRAIN_Y_PATH)

# Directory to save models
model_save_path = "/content/drive/MyDrive/building_classification/models_tuningscalefactor_5/"
# If path exist delete existing folder
if os.path.exists(model_save_path):
    os.system(f"rm -r {model_save_path}")
os.makedirs(model_save_path, exist_ok=True)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Number of CPU threads available: 8


In [None]:
# display full columns
pd.set_option('display.max_columns', None)
X_train.head()

Unnamed: 0,ABS_p05_value,ABS_95th_percentile_of_0_1_changes,RATIO_mode_DIV_total_len,RATIO_percentile_95_DIV_percentile_05,RATIO_num_of_unique_DIV_mean,RATIO_unique_DIV_sd,RATIO_sum_reoccur_DIV_unique,RATIO_first_min_to_last_min_DIV_len,RATIO_longest_one_run_ratio,peak_height_min,peak_width_median,peak_width_min,missing_values_ratio,missing_runs_ratio,fraction_of_values_that_are_zero,mean_interval_between_0s,abs_energy_DIV_len,benford_corr,change_quantiles_mean,permutation_entropy_3,seasonal_mean_DIV_seasonal_std,rolling_max_2,rolling_max_144,fft_phase_mean,wavelet_min_haar_1,wavelet_max_haar_2,wavelet_min_haar_2,autocorr_lag_1,autocorr_lag_36,is_Air_Flow_Setpoint,is_Cooling_Supply_Air_Temp_deadband_Setpoint,is_Cooling_Temperature_Setpoint,is_differential_pressure_Setpoint,is_Discharge_Air_Temperature_Setpoint,is_Flow_Setpoint,is_Heating_Demand_Setpoint,is_Heating_Supply_Air_Temperature_Deadband_Setpoint,is_Heating_Temperature_Setpoint,is_Humidity_Setpoint,is_Low_Outside_Air_Temperature_Enable_Setpoint,is_Max_Air_Temperature_Setpoint,is_Min_Air_Temperature_Setpoint,is_Outside_Air_Lockout_Temperature_Setpoint,is_Outside_Air_Temperature_Setpoint,is_Room_Air_Temperature_Setpoint,is_Speed_Setpoint,is_Static_Pressure_Setpoint,is_Temperature_Setpoint,is_Time_Setpoint,is_Water_Temperature_Setpoint,is_Zone_Air_Humidity_Setpoint,RATIO_peak_height_median_DIV_peak_width_median
0,30.0,0.0,0.99975,1.0,0.066683,4.218478,60030.0,0.0,0.0,0.0,0.0,0.0,0.011991,0.00025,0.00025,0.0,899.7752,0.062915,0.0,0.006144,0.0006122196,30.0,30.0,-5.224122e-16,3.93284e-16,21.213203,6.661338e-16,-0.00025,-0.000252,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0.0
1,22.5,0.0,0.999875,1.0,0.0889,7.964365,90292.5,0.0,0.0,0.0,0.0,0.0,0.009717,0.000623,0.000125,0.0,506.1869,0.295657,0.0,0.003324,-0.000101761,22.5,22.5,5.809065e-16,1.183141e-15,15.909903,4.996004e-16,-0.000125,-0.000125,0,0,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,1,0,0,0,0.0
2,1.76,0.0,0.081988,171.781818,18.981519,18.998546,212.0484,0.269565,0.0,100.24,1.529241,0.500536,0.003727,0.001242,0.0,0.0,17473.32,0.924611,5.491232,1.682359,-2.1508670000000003e-17,488.96,488.96,-1.941856e-17,6.567036e-17,258.291965,3.2479390000000005e-17,0.970319,0.27247,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,116.685368
3,3834.0,0.0,0.999379,1.0,0.000522,0.020947,15435680.0,0.134666,0.0,0.0,0.0,0.0,0.00211,0.000869,0.000621,163129.75225,14690430.0,0.062915,0.0,0.009422,-3.3744710000000003e-17,3834.0,3834.0,5.432473e-16,2.223495e-13,2711.047399,1.081357e-13,-0.000621,-0.000624,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0
4,234.0,0.0,0.999379,1.0,0.008552,0.343054,941265.0,0.773665,0.0,0.0,0.0,0.0,0.001118,0.000497,0.000621,935935.79025,54721.99,0.295657,0.0,0.009945,-2.539742e-18,234.0,234.0,0.00039026,1.181703e-14,165.462987,0.0,0.199503,-0.000624,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0.0


In [None]:
# Number of features used
print("Number of features used: " ,len(X_train.columns))

Number of features used:  52


# Training Function

In [None]:
best_model_dict = {
    "Active_Power_Sensor": "cat",
    "Air_Flow_Sensor": "cat",
    "Air_Flow_Setpoint": "cat",
    "Air_Temperature_Sensor": "cat",
    "Air_Temperature_Setpoint": "cat",
    "Alarm": "cat",
    "Angle_Sensor": "cat",
    "Average_Zone_Air_Temperature_Sensor": "cat",
    "Chilled_Water_Differential_Temperature_Sensor": "cat",
    "Chilled_Water_Return_Temperature_Sensor": "cat",
    "Chilled_Water_Supply_Flow_Sensor": "cat",
    "Chilled_Water_Supply_Temperature_Sensor": "cat",
    "Command": "cat",
    'Cooling_Demand_Sensor': "cat",
    "Cooling_Demand_Setpoint": "cat",
    "Cooling_Supply_Air_Temperature_Deadband_Setpoint": "cat",
    "Cooling_Temperature_Setpoint": "cat",
    "Current_Sensor": "cat",
    "Damper_Position_Sensor": "cat",
    "Damper_Position_Setpoint": "cat",
    "Demand_Sensor": "cat",
    "Dew_Point_Setpoint": "cat",
    "Differential_Pressure_Sensor": "cat",
    "Differential_Pressure_Setpoint": "cat",
    "Differential_Supply_Return_Water_Temperature_Sensor": "cat",
    "Discharge_Air_Dewpoint_Sensor": "cat",
    "Discharge_Air_Temperature_Sensor": "cat",
    "Discharge_Air_Temperature_Setpoint": "cat",
    "Discharge_Water_Temperature_Sensor": "cat",
    "Duration_Sensor": "cat",
    "Electrical_Power_Sensor": "cat",
    "Energy_Usage_Sensor": "cat",
    "Filter_Differential_Pressure_Sensor": "cat",
    "Flow_Sensor": "cat",
    "Flow_Setpoint": "cat",
    "Frequency_Sensor": "cat",
    'Heating_Demand_Sensor': "cat",
    "Heating_Demand_Setpoint": "cat",
    "Heating_Supply_Air_Temperature_Deadband_Setpoint": "cat",
    "Heating_Temperature_Setpoint": "cat",
    "Hot_Water_Flow_Sensor": "cat",
    "Hot_Water_Return_Temperature_Sensor": "cat",
    "Hot_Water_Supply_Temperature_Sensor": "cat",
    "Humidity_Setpoint": "cat",
    "Load_Current_Sensor": "cat",
    "Low_Outside_Air_Temperature_Enable_Setpoint": "cat",
    "Max_Air_Temperature_Setpoint": "cat",
    "Min_Air_Temperature_Setpoint": "cat",
    "Outside_Air_CO2_Sensor": "cat",
    "Outside_Air_Enthalpy_Sensor": "cat",
    "Outside_Air_Humidity_Sensor": "cat",
    "Outside_Air_Lockout_Temperature_Setpoint": "cat",
    "Outside_Air_Temperature_Sensor": "cat",
    "Outside_Air_Temperature_Setpoint": "cat",
    "Parameter": "cat",
    "Peak_Power_Demand_Sensor": "cat",
    "Position_Sensor": "cat",
    "Power_Sensor": "cat",
    "Pressure_Sensor": "cat",
    "Rain_Sensor": "cat",
    "Reactive_Power_Sensor": "cat",
    "Reset_Setpoint": "cat",
    "Return_Air_Temperature_Sensor": "cat",
    "Return_Water_Temperature_Sensor": "cat",
    "Room_Air_Temperature_Setpoint": "cat",
    "Sensor": "cat",
    "Setpoint": "cat",
    "Solar_Radiance_Sensor": "cat",
    "Speed_Setpoint": "cat",
    "Static_Pressure_Sensor": "cat",
    "Static_Pressure_Setpoint": "cat",
    "Status": "cat",
    "Supply_Air_Humidity_Sensor": "cat",
    "Supply_Air_Static_Pressure_Sensor": "cat",
    "Supply_Air_Static_Pressure_Setpoint": "cat",
    "Supply_Air_Temperature_Sensor": "cat",
    "Supply_Air_Temperature_Setpoint": "cat",
    "Temperature_Sensor": "cat",
    "Temperature_Setpoint": "cat",
    "Thermal_Power_Sensor": "cat",
    "Time_Setpoint": "cat",
    "Usage_Sensor": "cat",
    "Valve_Position_Sensor": "cat",
    "Voltage_Sensor": "cat",
    "Warmest_Zone_Air_Temperature_Sensor": "cat",
    "Water_Flow_Sensor": "cat",
    "Water_Temperature_Sensor": "cat",
    "Water_Temperature_Setpoint": "cat",
    "Wind_Direction_Sensor": "cat",
    "Wind_Speed_Sensor": "cat",
    "Zone_Air_Dewpoint_Sensor": "cat",
    "Zone_Air_Humidity_Sensor": "cat",
    "Zone_Air_Humidity_Setpoint": "cat",
    "Zone_Air_Temperature_Sensor": "cat",
}

In [None]:
# ====================================================
#                 Evaluation Function
# ====================================================

def evaluate_models(y_true, y_pred):
    """Compute F1, Precision, and Recall for given predictions."""
    return {
        'f1': f1_score(y_true, y_pred, zero_division=0),
        'precision': precision_score(y_true, y_pred, zero_division=0),
        'recall': recall_score(y_true, y_pred, zero_division=0)
    }

# ====================================================
#              Model Parameter Factory
# ====================================================

def get_model_params(model_type, seed, pos_weight, class_weights):
    """
    Return an instantiated model given the model_type.
    Extend this function if you want to add or modify models.
    """
    model_map = {
        'cat': CatBoostClassifier(
            iterations=100,
            random_state=seed,
            verbose=False,
            thread_count=1,
            class_weights=class_weights,
            eval_metric='AUC',
        ),
    }

    if model_type not in model_map:
        raise ValueError(f"Model type {model_type} is not supported.")

    return model_map[model_type]


# ====================================================
#               Training Function
# ====================================================

def train_models_for_label(label,
                           X_train,
                           Y_train,
                           model_save_path,
                           best_model_dict,
                           seed,
                           scaling_factor,   # <-- now passed in
                           n_splits=2):
    """
    Train and cross-validate a model for the specified label using a given scaling_factor.
    Returns:
        (label, seed, scaling_factor, final_metrics, model, confusion_matrices)
        or
        (label, seed, scaling_factor, None, None, None) if an error occurs
    """
    try:
        # ------------------------------------------------
        # Prepare the data
        # ------------------------------------------------
        y = Y_train[label]
        mask = y != 0
        # Convert -1 to 0, and keep 1 as 1
        y_binary = y[mask].replace({-1: 0, 1: 1})
        X_sub = X_train.loc[mask]
        y_sub = y_binary

        # Get the specific model type for this label
        model_type = best_model_dict.get(label)
        if not model_type:
            print(f"No model type specified for label '{label}'")
            return (label, seed, scaling_factor, None, None, None)

        # ------------------------------------------------
        # Calculate class weights or pos_weight from scaling_factor
        # ------------------------------------------------
        ratio = sum(y_sub == 0) / sum(y_sub == 1)
        pos_weight = max(1.0, ratio * scaling_factor)
        class_weights = [1, pos_weight]

        model = get_model_params(
            model_type=model_type,
            seed=seed,
            pos_weight=pos_weight,
            class_weights=class_weights
        )

        # ------------------------------------------------
        # Cross-validation training
        # ------------------------------------------------
        skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=seed)
        eval_metrics = {'f1': [], 'precision': [], 'recall': []}
        confusion_matrices = []

        for fold, (train_idx, val_idx) in enumerate(skf.split(X_sub, y_sub)):
            X_fold_train, X_fold_val = X_sub.iloc[train_idx], X_sub.iloc[val_idx]
            y_fold_train, y_fold_val = y_sub.iloc[train_idx], y_sub.iloc[val_idx]

            model.fit(X_fold_train, y_fold_train)

            y_proba = model.predict_proba(X_fold_val)[:, 1]
            y_pred = (y_proba > 0.5).astype(int)

            # Evaluate
            metrics = evaluate_models(y_fold_val, y_pred)
            for metric_name, value in metrics.items():
                eval_metrics[metric_name].append(value)

            # Confusion matrix for each fold
            cm = confusion_matrix(y_fold_val, y_pred)
            confusion_matrices.append(cm)

        final_metrics = {m: np.mean(vals) for m, vals in eval_metrics.items()}

        return (label, seed, scaling_factor, final_metrics, model, confusion_matrices)

    except Exception as e:
        print(f"Error processing label '{label}' with seed {seed} & scaling_factor {scaling_factor}: {e}")
        return (label, seed, scaling_factor, None, None, None)

# Run Training

In [None]:
from itertools import product
from concurrent.futures import ProcessPoolExecutor, as_completed
from collections import defaultdict
import numpy as np
from tqdm import tqdm

# ------------------------------------------------
# Define seeds
# ------------------------------------------------
# 50 seeds
seeds = [8, 88, 888, 1, 175, 907, 354, 917, 182, 695, 744, 665, 981, 876, 286, 241, 159, 219, 868, 277,
         521, 278, 755, 387, 113, 115, 576, 527, 270, 663, 495, 214, 620, 376, 912, 50, 733, 168, 101, 689,
         947, 382, 649, 456, 544, 993, 127, 730, 760, 855]

# ------------------------------------------------
# Label(s) to tune
# ------------------------------------------------
label_cols = [
    "Supply_Air_Temperature_Sensor",
    "Supply_Air_Temperature_Setpoint",
    "Temperature_Sensor",
    "Temperature_Setpoint",
    "Thermal_Power_Sensor",
    "Time_Setpoint",
    "Usage_Sensor",
    "Valve_Position_Sensor",
    "Voltage_Sensor",
    "Warmest_Zone_Air_Temperature_Sensor",
    "Water_Flow_Sensor",
    "Water_Temperature_Sensor",
    "Water_Temperature_Setpoint",
    "Wind_Direction_Sensor",
    "Wind_Speed_Sensor",
    "Zone_Air_Dewpoint_Sensor",
    "Zone_Air_Humidity_Sensor",
    "Zone_Air_Humidity_Setpoint",
    "Zone_Air_Temperature_Sensor",
]



valid_labels = [label for label in label_cols if label in best_model_dict]

# ------------------------------------------------
# COARSE SEARCH
# ------------------------------------------------
coarse_scaling_factors = list(range(0, 1))  # 0..10 in steps of 1

# We'll store results as: metrics_results_coarse[label][sf] = list of F1 scores (one per seed)
metrics_results_coarse = defaultdict(lambda: defaultdict(list))

# Create tasks for coarse search: (label, X_train, Y_train, model_save_path, best_model_dict, seed, sf)
coarse_tasks = []
for label, sf, seed in product(valid_labels, coarse_scaling_factors, seeds):
    coarse_tasks.append((label, X_train, Y_train, model_save_path, best_model_dict, seed, sf))

print(f"Starting coarse search with {len(coarse_tasks)} total tasks...")

with ProcessPoolExecutor(max_workers=8) as executor:
    future_to_task = {
        executor.submit(train_models_for_label, *task, n_splits=3): task
        for task in coarse_tasks
    }

    # Wrap as_completed with tqdm for progress bar
    for future in tqdm(as_completed(future_to_task),
                       total=len(future_to_task),
                       desc="Coarse Search Progress"):
        (label, X_t, Y_t, msp, bmd, seed, sf) = future_to_task[future]
        try:
            r_label, r_seed, r_sf, r_metrics, r_model, r_cm_list = future.result()
            if r_metrics is not None:
                f1_val = r_metrics['f1']
                metrics_results_coarse[r_label][r_sf].append(f1_val)
        except Exception as e:
            print(f"Exception for (label={label}, seed={seed}, sf={sf}): {e}")


Starting coarse search with 950 total tasks...


Coarse Search Progress: 100%|██████████| 950/950 [12:30<00:00,  1.27it/s]


In [None]:
# Determine best coarse scaling factor per label (by average F1)
best_coarse_sf_per_label = {}
for label in valid_labels:
    best_sf = None
    best_f1 = -1
    for sf in sorted(metrics_results_coarse[label].keys()):
        f1_list = metrics_results_coarse[label][sf]
        if not f1_list:
            continue
        avg_f1 = np.mean(f1_list)
        if avg_f1 > best_f1:
            best_f1 = avg_f1
            best_sf = sf
    best_coarse_sf_per_label[label] = best_sf
    print(f"[COARSE] Label={label}, Best Scaling Factor={best_sf}, Avg F1={best_f1:.4f}")

[COARSE] Label=Supply_Air_Temperature_Sensor, Best Scaling Factor=0, Avg F1=0.9549
[COARSE] Label=Supply_Air_Temperature_Setpoint, Best Scaling Factor=0, Avg F1=0.8995
[COARSE] Label=Temperature_Sensor, Best Scaling Factor=0, Avg F1=0.9018
[COARSE] Label=Temperature_Setpoint, Best Scaling Factor=0, Avg F1=0.8078
[COARSE] Label=Thermal_Power_Sensor, Best Scaling Factor=0, Avg F1=0.6721
[COARSE] Label=Time_Setpoint, Best Scaling Factor=0, Avg F1=0.5419
[COARSE] Label=Usage_Sensor, Best Scaling Factor=0, Avg F1=0.8373
[COARSE] Label=Valve_Position_Sensor, Best Scaling Factor=0, Avg F1=0.6430
[COARSE] Label=Voltage_Sensor, Best Scaling Factor=0, Avg F1=0.9514
[COARSE] Label=Warmest_Zone_Air_Temperature_Sensor, Best Scaling Factor=0, Avg F1=0.6423
[COARSE] Label=Water_Flow_Sensor, Best Scaling Factor=0, Avg F1=0.4375
[COARSE] Label=Water_Temperature_Sensor, Best Scaling Factor=0, Avg F1=0.7645
[COARSE] Label=Water_Temperature_Setpoint, Best Scaling Factor=0, Avg F1=0.6074
[COARSE] Label=Win

In [None]:
# ------------------------------------------------
# FINE SEARCH AROUND THE BEST COARSE FACTOR
# ------------------------------------------------
fine_search_radius = 0.3   # +/- 2 around the best coarse factor
fine_search_step = 0.1

metrics_results_fine = defaultdict(lambda: defaultdict(list))
fine_tasks = []

for label in valid_labels:
    coarse_best = best_coarse_sf_per_label[label]
    if coarse_best is None:
        # If for some reason we didn't get a valid coarse_best, skip
        continue

    start = max(0, coarse_best - fine_search_radius)
    end   = coarse_best + fine_search_radius + fine_search_step  # + step to include end
    sf_values = np.arange(start, end, fine_search_step)

    for sf in sf_values:
        # Round or keep as float
        sf_rounded = round(sf, 3)
        for seed in seeds:
            fine_tasks.append((label, X_train, Y_train, model_save_path, best_model_dict, seed, sf_rounded))

print(f"\nStarting fine search with {len(fine_tasks)} total tasks...")

with ProcessPoolExecutor(max_workers=8) as executor:
    future_to_task = {
        executor.submit(train_models_for_label, *task, n_splits=3): task
        for task in fine_tasks
    }

    for future in tqdm(as_completed(future_to_task),
                       total=len(future_to_task),
                       desc="Fine Search Progress"):
        (label, X_t, Y_t, msp, bmd, seed, sf) = future_to_task[future]
        try:
            r_label, r_seed, r_sf, r_metrics, r_model, r_cm_list = future.result()
            if r_metrics is not None:
                f1_val = r_metrics['f1']
                metrics_results_fine[r_label][r_sf].append(f1_val)
        except Exception as e:
            print(f"Exception for (label={label}, seed={seed}, sf={sf}): {e}")


Starting fine search with 3800 total tasks...


Fine Search Progress: 100%|██████████| 3800/3800 [49:19<00:00,  1.28it/s]


In [None]:
# Determine best fine scaling factor per label
best_fine_sf_per_label = {}
for label in valid_labels:
    best_sf = None
    best_f1 = -1
    tried_sfs = sorted(metrics_results_fine[label].keys())
    for sf in tried_sfs:
        f1_list = metrics_results_fine[label][sf]
        if not f1_list:
            continue
        avg_f1 = np.mean(f1_list)
        if avg_f1 > best_f1:
            best_f1 = avg_f1
            best_sf = sf
    best_fine_sf_per_label[label] = best_sf
    print(f"{label}, Best Scaling Factor={best_sf}, Avg F1={best_f1:.4f}")

Supply_Air_Temperature_Sensor, Best Scaling Factor=0.1, Avg F1=0.9657
Supply_Air_Temperature_Setpoint, Best Scaling Factor=0.1, Avg F1=0.9468
Temperature_Sensor, Best Scaling Factor=0.0, Avg F1=0.9018
Temperature_Setpoint, Best Scaling Factor=0.3, Avg F1=0.8358
Thermal_Power_Sensor, Best Scaling Factor=0.0, Avg F1=0.6721
Time_Setpoint, Best Scaling Factor=0.0, Avg F1=0.5419
Usage_Sensor, Best Scaling Factor=0.1, Avg F1=0.8523
Valve_Position_Sensor, Best Scaling Factor=0.1, Avg F1=0.6461
Voltage_Sensor, Best Scaling Factor=0.0, Avg F1=0.9514
Warmest_Zone_Air_Temperature_Sensor, Best Scaling Factor=0.1, Avg F1=0.7415
Water_Flow_Sensor, Best Scaling Factor=0.0, Avg F1=0.4375
Water_Temperature_Sensor, Best Scaling Factor=0.1, Avg F1=0.7973
Water_Temperature_Setpoint, Best Scaling Factor=0.2, Avg F1=0.6391
Wind_Direction_Sensor, Best Scaling Factor=0.1, Avg F1=0.9730
Wind_Speed_Sensor, Best Scaling Factor=0.1, Avg F1=0.8766
Zone_Air_Dewpoint_Sensor, Best Scaling Factor=0.0, Avg F1=0.9377
Zo

In [None]:
# Determine best fine scaling factor per label
best_fine_sf_per_label = {}
for label in valid_labels:
    best_sf = None
    best_f1 = -1
    tried_sfs = sorted(metrics_results_fine[label].keys())
    for sf in tried_sfs:
        f1_list = metrics_results_fine[label][sf]
        if not f1_list:
            continue
        avg_f1 = np.mean(f1_list)
        if avg_f1 > best_f1:
            best_f1 = avg_f1
            best_sf = sf
    best_fine_sf_per_label[label] = best_sf
    print(f"{label}, Best Scaling Factor={best_sf}")

Supply_Air_Temperature_Sensor, Best Scaling Factor=0.1
Supply_Air_Temperature_Setpoint, Best Scaling Factor=0.1
Temperature_Sensor, Best Scaling Factor=0.0
Temperature_Setpoint, Best Scaling Factor=0.3
Thermal_Power_Sensor, Best Scaling Factor=0.0
Time_Setpoint, Best Scaling Factor=0.0
Usage_Sensor, Best Scaling Factor=0.1
Valve_Position_Sensor, Best Scaling Factor=0.1
Voltage_Sensor, Best Scaling Factor=0.0
Warmest_Zone_Air_Temperature_Sensor, Best Scaling Factor=0.1
Water_Flow_Sensor, Best Scaling Factor=0.0
Water_Temperature_Sensor, Best Scaling Factor=0.1
Water_Temperature_Setpoint, Best Scaling Factor=0.2
Wind_Direction_Sensor, Best Scaling Factor=0.1
Wind_Speed_Sensor, Best Scaling Factor=0.1
Zone_Air_Dewpoint_Sensor, Best Scaling Factor=0.0
Zone_Air_Humidity_Sensor, Best Scaling Factor=0.1
Zone_Air_Humidity_Setpoint, Best Scaling Factor=0.3
Zone_Air_Temperature_Sensor, Best Scaling Factor=0.0
