In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import f1_score
import re
import warnings
import math
import copy
import time
import os # Aggiunto

warnings.filterwarnings('ignore')

# --- CONFIGURAZIONE HARDWARE ---
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Device in uso: {device}")

# =============================================================================
# === SEZIONE CONFIGURAZIONE (V-FINALE: 0.9425 + TTA + Fix) ===
# =============================================================================
CONFIG = {
    # --- Architettura (L'ORIGINALE 0.9425) ---
    "LSTM_HIDDEN": 128,
    "LSTM_LAYERS": 2,
    "CNN_DROPOUT": 0.2,
    "LSTM_DROPOUT": 0.3,
    "CLASSIFIER_DROPOUT": 0.3,

    # --- Feature & Windowing (L'ORIGINALE) ---
    "WINDOW_SIZE": 40,
    "STRIDE": 10,

    # --- Training (L'ORIGINALE) ---
    "LEARNING_RATE": 1e-3, 
    "WEIGHT_DECAY": 1e-4, 
    "EPOCHS": 50,                # Originale
    "BATCH_SIZE": 64,
    "GRADIENT_CLIP_VALUE": 1.0, 
    "EARLY_STOPPING_PATIENCE": 15, # Un po' piÃ¹ di pazienza non guasta
    "LABEL_SMOOTHING": 0.1,
    
    # --- Advice 16: TTA ---
    "TTA_STEPS": 10,
    "TTA_NOISE_LEVEL": 0.02,

    # --- Fissi ---
    "K_FOLDS": 5,
}

print("--- Esecuzione Run con Configurazione (V-FINALE: 0.9425 + TTA + NaN/Time Fix) ---")
for k, v in CONFIG.items():
    print(f"{k}: {v}")
print("-----------------------------------------")

# =============================================================================

# --- 1. CARICAMENTO E PULIZIA ---
print("--- 1. Caricamento e Pulizia Iniziale ---")
INPUT_DIR = "/kaggle/input/pirate"
if not os.path.exists(INPUT_DIR):
    print("Percorso Kaggle non trovato, uso i file locali...")
    INPUT_DIR = "." # Fallback locale

try:
    df_features_raw = pd.read_csv(os.path.join(INPUT_DIR, 'pirate_pain_train.csv'))
    df_labels_raw = pd.read_csv(os.path.join(INPUT_DIR, 'pirate_pain_train_labels.csv'))
    df_test_raw = pd.read_csv(os.path.join(INPUT_DIR, 'pirate_pain_test.csv'))
    sample_sub_df = pd.read_csv(os.path.join(INPUT_DIR, 'sample_submission.csv'))
except Exception as e:
    print(f"Errore caricamento file: {e}")
    df_features_raw = pd.read_csv('pirate_pain_train.csv')
    df_labels_raw = pd.read_csv('pirate_pain_train_labels.csv')
    df_test_raw = pd.read_csv('pirate_pain_test.csv')
    sample_sub_df = pd.read_csv('sample_submission.csv')


# --- FIX PER IL CRASH 0.6494 (Completo) ---
# 1. Colonne continue
cols_to_fill_numeric = [f'joint_{i:02d}' for i in range(31)] + ['pain_survey_1', 'pain_survey_2', 'pain_survey_3', 'pain_survey_4']
print(f"Pre-fillna (Continui): NaN nel test set: {df_test_raw[cols_to_fill_numeric].isnull().sum().sum()}")
for col in cols_to_fill_numeric:
    if col in df_test_raw.columns:
        df_test_raw[col] = df_test_raw.groupby('sample_index')[col].ffill()
df_test_raw = df_test_raw.fillna(0) # Riempi i NaN iniziali
print(f"Post-fillna (Continui): NaN nel test set: {df_test_raw[cols_to_fill_numeric].isnull().sum().sum()}")
# --- FINE FIX 1 ---

# --- LOGICA FEATURE TESTO (DAL 0.9425) ---
exclude_cols = ['label', 'sample_index']
string_cols = df_features_raw.select_dtypes(include=['object']).columns.tolist()
string_cols = [c for c in string_cols if c not in exclude_cols]
TEXT_COL = None
TEXT_VOCAB_SIZE = 0
if len(string_cols) > 0:
    TEXT_COL = string_cols[0] 
    print(f"Trovata colonna 'Team Name': {TEXT_COL}")
    def clean_team_name(text):
        if pd.isna(text): return "unknown" # Gestisce i NaN
        return re.sub(r'[^a-z0-9]', '', str(text).lower())
    
    df_features_raw[TEXT_COL] = df_features_raw[TEXT_COL].apply(clean_team_name)
    df_test_raw[TEXT_COL] = df_test_raw[TEXT_COL].apply(clean_team_name)
    
    le_text = LabelEncoder()
    # Fitta solo su 'unknown' + train + test per evitare crash
    all_text = pd.concat([df_features_raw[TEXT_COL], df_test_raw[TEXT_COL]], axis=0).unique()
    le_text.fit(all_text)
    
    df_features_raw[TEXT_COL] = le_text.transform(df_features_raw[TEXT_COL])
    df_test_raw[TEXT_COL] = le_text.transform(df_test_raw[TEXT_COL])
    
    TEXT_VOCAB_SIZE = len(le_text.classes_)
    print(f"Vocabolario Text Size: {TEXT_VOCAB_SIZE}")
else:
    print("Nessuna colonna 'Team Name' trovata.")

# --- 2. FEATURE ENGINEERING (Come 0.9425, DINAMICO) ---
JOINT_COLS = [f'joint_{i:02d}' for i in range(30)] 
SURVEY_COLS = ['pain_survey_1', 'pain_survey_2', 'pain_survey_3', 'pain_survey_4']
TIME_COL = 'time'
def engineer_features(df, is_test=False):
    df_eng = df.copy()
    grouped = df_eng.groupby('sample_index')
    for col in JOINT_COLS:
        df_eng[f'd_{col}'] = grouped[col].diff().fillna(0)
    
    # --- LA CORREZIONE FONDAMENTALE (dal V-Originale) ---
    max_time_val = df_eng[TIME_COL].max() + 1 
    if max_time_val <= 1: 
        max_time_val = 160 # Fallback
    
    if is_test:
        print(f"  Calcolato max_time_val (Test): {max_time_val}")
    else:
        print(f"  Calcolato max_time_val (Train): {max_time_val}")
    # --- FINE CORREZIONE ---
    
    df_eng['sin_time'] = np.sin(2 * np.pi * df_eng[TIME_COL] / max_time_val)
    df_eng['cos_time'] = np.cos(2 * np.pi * df_eng[TIME_COL] / max_time_val)
    if 'joint_30' in df_eng.columns:
        df_eng = df_eng.drop(columns=['joint_30'])
    return df_eng

print("Calcolo Feature Engineering (Train)...")
df_features_engineered = engineer_features(df_features_raw, is_test=False)
print("Calcolo Feature Engineering (Test)...")
df_test_engineered = engineer_features(df_test_raw, is_test=True) 

DELTA_COLS = [f'd_{col}' for col in JOINT_COLS]
CONTINUOUS_COLS = JOINT_COLS + DELTA_COLS + ['sin_time', 'cos_time']
print(f"Numero colonne continue: {len(CONTINUOUS_COLS)}")
survey_vocab_sizes = [int(df_features_engineered[c].max() + 1) for c in SURVEY_COLS]
time_vocab_size = int(df_features_engineered[TIME_COL].max() + 1)
label_mapping = {'no_pain': 0, 'low_pain': 1, 'high_pain': 2}
df_labels_raw['label_encoded'] = df_labels_raw['label'].map(label_mapping)

# --- 3. DATASET & SAMPLER ---
class PiratePainDataset(Dataset):
    def __init__(self, features_df, labels_df, sample_indices, text_col=None, augment=False):
        self.features_df = features_df
        self.labels_df = labels_df.set_index('sample_index') if labels_df is not None else None
        self.sample_indices = sample_indices
        self.window_size = CONFIG["WINDOW_SIZE"]
        self.stride = CONFIG["STRIDE"]
        self.text_col = text_col 
        self.augment = augment 
        self.grouped_features = dict(tuple(features_df.groupby('sample_index')))
        self.indices = self._create_indices()
    def _create_indices(self):
        indices = []
        for sample_idx in self.sample_indices:
            if sample_idx not in self.grouped_features: continue
            data = self.grouped_features[sample_idx]
            n_timesteps = len(data)
            for start in range(0, n_timesteps - self.window_size + 1, self.stride):
                indices.append((sample_idx, start, start + self.window_size))
        return indices
    def __len__(self):
        return len(self.indices)
    def __getitem__(self, idx):
        sample_idx, start, end = self.indices[idx]
        window_data = self.grouped_features[sample_idx].iloc[start:end]
        vals = window_data[CONTINUOUS_COLS].values
        if np.isnan(vals).any():
            vals = np.nan_to_num(vals)
        if self.augment:
            noise = np.random.normal(0, 0.02, vals.shape) 
            vals = vals + noise
        x_cont = torch.tensor(vals, dtype=torch.float)
        x_survey = torch.tensor((window_data[SURVEY_COLS].values + 1), dtype=torch.long)
        x_time = torch.tensor((window_data[TIME_COL].values + 1), dtype=torch.long)
        x_text = torch.tensor(0, dtype=torch.long)
        if self.text_col:
            val = window_data[self.text_col].iloc[0]
            x_text = torch.tensor(val, dtype=torch.long) # Come V-Originale
        label = torch.tensor(-1, dtype=torch.long)
        if self.labels_df is not None:
            label = torch.tensor(self.labels_df.loc[sample_idx, 'label_encoded'], dtype=torch.long)
        return x_cont, x_survey, x_time, x_text, label
def get_weighted_sampler(dataset, labels_df):
    sample_to_label = labels_df.set_index('sample_index')['label_encoded'].to_dict()
    label_counts = labels_df['label_encoded'].value_counts().sort_index()
    class_weights = 1.0 / label_counts
    weights = [class_weights[sample_to_label[s_idx]] for s_idx, _, _ in dataset.indices if s_idx in sample_to_label]
    return WeightedRandomSampler(weights, num_samples=len(weights), replacement=True)

# --- 4. LOSS FUNCTION ---
class FocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=2.0, reduction='mean', label_smoothing=0.0):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.label_smoothing = label_smoothing
    def forward(self, inputs, targets):
        ce_loss = F.cross_entropy(inputs, targets, reduction='none', weight=self.alpha, label_smoothing=self.label_smoothing)
        pt = torch.exp(-ce_loss)
        focal_loss = ((1 - pt) ** self.gamma) * ce_loss
        return focal_loss.mean()

# --- 5. MODELLO (L'ORIGINALE 0.9425) ---
class PiratePainModel_Original(nn.Module):
    def __init__(self, n_continuous, survey_vocab_sizes, time_vocab_size, text_vocab_size):
        super().__init__()
        
        self.emb_surveys = nn.ModuleList([nn.Embedding(v+2, 4) for v in survey_vocab_sizes])
        self.emb_time = nn.Embedding(time_vocab_size+2, 8)
        
        self.use_text = (text_vocab_size > 0)
        text_dim = 8 if self.use_text else 0
        if self.use_text:
            self.emb_text = nn.Embedding(text_vocab_size+2, 8) 
            
        total_survey_dim = len(survey_vocab_sizes) * 4
        input_dim = n_continuous + total_survey_dim + 8 + text_dim
        
        self.cnn = nn.Sequential(
            nn.Conv1d(in_channels=input_dim, out_channels=64, kernel_size=3, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(),
            nn.Dropout(CONFIG["CNN_DROPOUT"]),
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(CONFIG["CNN_DROPOUT"])
        )
        
        self.lstm = nn.LSTM(
            128, 
            CONFIG["LSTM_HIDDEN"], 
            num_layers=CONFIG["LSTM_LAYERS"], 
            batch_first=True, 
            dropout=CONFIG["LSTM_DROPOUT"],
            bidirectional=False
        )
        
        self.dropout = nn.Dropout(CONFIG["CLASSIFIER_DROPOUT"])
        self.classifier = nn.Linear(CONFIG["LSTM_HIDDEN"], 3) 

    def forward(self, x_cont, x_survey, x_time, x_text):
        batch_size, seq_len, _ = x_cont.shape
        e_surv = [emb(x_survey[:,:,i]) for i, emb in enumerate(self.emb_surveys)]
        e_time = self.emb_time(x_time)
        features = [x_cont] + e_surv + [e_time]
        if self.use_text:
            e_txt = self.emb_text(x_text).unsqueeze(1).repeat(1, seq_len, 1)
            features.append(e_txt)
        full_input = torch.cat(features, dim=2) 
        
        x = full_input.permute(0, 2, 1)
        x = self.cnn(x)
        x = x.permute(0, 2, 1)
        lstm_out, _ = self.lstm(x) 
        
        last_step = lstm_out[:, -1, :]
        logits = self.classifier(self.dropout(last_step))
        return logits

# --- 6. TRAINING & VALIDATION UTILS ---
def train_epoch(model, loader, optimizer, criterion):
    model.train()
    total_loss = 0
    for xc, xs, xt, xtxt, y in loader:
        xc, xs, xt, xtxt, y = xc.to(device), xs.to(device), xt.to(device), xtxt.to(device), y.to(device)
        optimizer.zero_grad()
        logits = model(xc, xs, xt, xtxt)
        loss = criterion(logits, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), CONFIG["GRADIENT_CLIP_VALUE"])
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

# --- 6.5. CLASSE EARLY STOPPING (Dai Lab) ---
class EarlyStopping:
    def __init__(self, patience=7, verbose=False, delta=0, path='checkpoint.pt', trace_func=print):
        self.patience = patience
        self.verbose = verbose
        self.counter = 0
        self.best_score = None 
        self.early_stop = False
        self.delta = delta
        self.path = path
        self.trace_func = trace_func

    def __call__(self, val_f1, model, oof_data=None):
        score = val_f1 
        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_f1, model)
            if oof_data: self.save_oof(oof_data)
        elif score < self.best_score + self.delta: 
            self.counter += 1
            self.trace_func(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else: 
            self.best_score = score
            self.save_checkpoint(val_f1, model)
            if oof_data: self.save_oof(oof_data)
            self.counter = 0

    def save_checkpoint(self, val_f1, model):
        if self.verbose:
            self.trace_func(f'Validation F1 increased ({self.best_score:.6f} --> {val_f1:.6f}).  Saving model to {self.path} ...')
        torch.save(model.state_dict(), self.path)

    def save_oof(self, oof_data):
        if self.verbose:
            self.trace_func(f'Saving OOF probabilities for {len(oof_data["indices"])} samples...')
        oof_probs_all = oof_data['oof_probs_all']
        oof_targets_all = oof_data['oof_targets_all']
        sample_to_idx = oof_data['sample_to_idx']
        current_val_probs = oof_data['current_probs']
        current_val_indices = oof_data['indices']
        current_val_labels = oof_data['labels']
        for idx, s_idx in enumerate(current_val_indices):
            global_idx = sample_to_idx[s_idx]
            oof_probs_all[global_idx] = current_val_probs[idx]
            oof_targets_all[global_idx] = current_val_labels[idx]

# --- 7. MAIN LOOP (SENZA SCHEDULER, COME 0.9425) ---
print("\n--- Avvio K-Fold (V-FINALE: 0.9425 + TTA + NaN/Time Fix) ---")
start_time_cv = time.time()
all_sample_indices = df_labels_raw['sample_index'].unique()
all_labels_strat = df_labels_raw.set_index('sample_index').loc[all_sample_indices]['label_encoded'].values
oof_probs = np.zeros((len(all_sample_indices), 3))
oof_targets = np.zeros(len(all_sample_indices))
sample_to_idx = {s: i for i, s in enumerate(all_sample_indices)}
models_list = [] 
skf = StratifiedKFold(n_splits=CONFIG["K_FOLDS"], shuffle=True, random_state=SEED)

for fold, (train_idx, val_idx) in enumerate(skf.split(all_sample_indices, all_labels_strat)):
    fold_start_time = time.time()
    print(f"\n--- Fold {fold+1}/{CONFIG['K_FOLDS']} ---")
    train_samples = all_sample_indices[train_idx]
    val_samples = all_sample_indices[val_idx]
    
    scaler = StandardScaler()
    train_subset = df_features_engineered[df_features_engineered['sample_index'].isin(train_samples)]
    scaler.fit(train_subset[CONTINUOUS_COLS])
    
    df_fold = df_features_engineered.copy()
    df_fold[CONTINUOUS_COLS] = scaler.transform(df_fold[CONTINUOUS_COLS])
    
    train_ds = PiratePainDataset(df_fold, df_labels_raw, train_samples, TEXT_COL, augment=True)
    val_ds = PiratePainDataset(df_fold, df_labels_raw, val_samples, TEXT_COL, augment=False)
    
    sampler = get_weighted_sampler(train_ds, df_labels_raw)
    train_loader = DataLoader(train_ds, batch_size=CONFIG["BATCH_SIZE"], sampler=sampler, shuffle=False, drop_last=True)
    val_loader = DataLoader(val_ds, batch_size=CONFIG["BATCH_SIZE"], shuffle=False)
    
    model = PiratePainModel_Original(
        n_continuous=len(CONTINUOUS_COLS), 
        survey_vocab_sizes=survey_vocab_sizes, 
        time_vocab_size=time_vocab_size,
        text_vocab_size=TEXT_VOCAB_SIZE,
    ).to(device)

    optimizer = optim.AdamW(model.parameters(), lr=CONFIG["LEARNING_RATE"], weight_decay=CONFIG["WEIGHT_DECAY"])
    criterion = FocalLoss(alpha=None, gamma=2.0, label_smoothing=CONFIG["LABEL_SMOOTHING"])
    
    # --- NIENTE SCHEDULER (come 0.9425) ---
    
    model_path = f'model_fold_{fold+1}.pth'
    early_stopping = EarlyStopping(patience=CONFIG["EARLY_STOPPING_PATIENCE"], verbose=True, path=model_path)
    
    for ep in range(CONFIG["EPOCHS"]):
        t_loss = train_epoch(model, train_loader, optimizer, criterion)
        model.eval()
        val_logits_list = []
        window_sample_map_val = [x[0] for x in val_ds.indices]
        with torch.no_grad():
            for xc, xs, xt, xtxt, y in val_loader:
                xc, xs, xt, xtxt = xc.to(device), xs.to(device), xt.to(device), xtxt.to(device)
                logits = model(xc, xs, xt, xtxt)
                val_logits_list.extend(logits.cpu().numpy())
        
        df_val_logits = pd.DataFrame(val_logits_list, columns=[0, 1, 2])
        df_val_logits['sample_index'] = window_sample_map_val
        df_val_probs = df_val_logits.groupby('sample_index').mean()
        current_val_probs = torch.softmax(torch.tensor(df_val_probs.values), dim=1).numpy()
        current_val_preds = np.argmax(current_val_probs, axis=1)
        current_val_indices = df_val_probs.index
        current_val_labels = df_labels_raw.set_index('sample_index').loc[current_val_indices]['label_encoded'].values
        v_f1 = f1_score(current_val_labels, current_val_preds, average='weighted')
        
        # --- NIENTE SCHEDULER.STEP() ---

        if (ep+1) % 5 == 0 or ep == 0:
            print(f"Epoch {ep+1}/{CONFIG['EPOCHS']}, Train Loss: {t_loss:.4f}, Val F1: {v_f1:.4f}, LR: {optimizer.param_groups[0]['lr']:.1e}")
        
        oof_data_dict = {
            'oof_probs_all': oof_probs,
            'oof_targets_all': oof_targets,
            'sample_to_idx': sample_to_idx,
            'current_probs': current_val_probs,
            'indices': current_val_indices,
            'labels': current_val_labels
        }
        early_stopping(v_f1, model, oof_data_dict)
        if early_stopping.early_stop:
            print("Early stopping")
            break

    fold_time = time.time() - fold_start_time
    print(f"Fold {fold+1} Best Val F1: {early_stopping.best_score:.4f} (Tempo: {fold_time:.1f}s)")
    model.load_state_dict(torch.load(model_path))
    models_list.append(model)

cv_time = time.time() - start_time_cv
print(f"\n--- CV Completato in {cv_time:.1f} secondi ---")

# --- 8. OTTIMIZZAZIONE SOGLIE ---
print("\n--- Ricerca Soglie Ottimali su OOF ---")
oof_preds_argmax = np.argmax(oof_probs, axis=1)
f1_oof_base = f1_score(oof_targets, oof_preds_argmax, average='weighted')
print(f"OOF F1 (base, argmax): {f1_oof_base:.4f}")
best_score = f1_oof_base 
thr_l_best, thr_h_best = 0.5, 0.5 
for t_high in np.arange(0.15, 0.50, 0.01):
    for t_low in np.arange(0.20, 0.55, 0.01):
        if t_low >= t_high: continue
        preds = [2 if p[2] > t_high else (1 if p[1] > t_low else 0) for p in oof_probs]
        s = f1_score(oof_targets, preds, average='weighted')
        if s > best_score:
            best_score = s
            thr_l_best, thr_h_best = t_low, t_high
print(f"Soglie Trovate: Low>{thr_l_best:.2f}, High>{thr_h_best:.2f} -> OOF F1 (ottimizzato): {best_score:.4f}")

# --- 9. INFERENZA FINALE (TTA) ---
print("\n--- Generazione Submission (Ensemble + TTA) ---")
final_scaler = StandardScaler()
final_scaler.fit(df_features_engineered[CONTINUOUS_COLS])
df_test_scaled = df_test_engineered.copy()
df_test_scaled[CONTINUOUS_COLS] = final_scaler.transform(df_test_scaled[CONTINUOUS_COLS])
sub_indices = sample_sub_df['sample_index'].unique()
test_ds_final = PiratePainDataset(df_test_scaled, None, sub_indices, TEXT_COL, augment=False)
test_loader_final = DataLoader(test_ds_final, batch_size=CONFIG["BATCH_SIZE"], shuffle=False)
window_sample_map_test = [x[0] for x in test_ds_final.indices]

ensemble_logits = None
TTA_STEPS = CONFIG["TTA_STEPS"]
TTA_NOISE = CONFIG["TTA_NOISE_LEVEL"]
for i, model in enumerate(models_list):
    print(f"Eseguo TTA per Fold {i+1}/{len(models_list)}...")
    model.eval()
    fold_avg_logits = None
    for tta_run in range(TTA_STEPS):
        fold_logits = []
        with torch.no_grad():
            for xc, xs, xt, xtxt, _ in test_loader_final:
                if tta_run > 0: 
                    noise = torch.normal(0.0, TTA_NOISE, size=xc.shape).to(device)
                    xc = xc.to(device) + noise
                else:
                    xc = xc.to(device)
                xs, xt, xtxt = xs.to(device), xt.to(device), xtxt.to(device)
                logits = model(xc, xs, xt, xtxt)
                fold_logits.extend(logits.cpu().numpy())
        df_tmp = pd.DataFrame(fold_logits, columns=[0, 1, 2])
        df_tmp['sample_index'] = window_sample_map_test
        df_avg_tta_run = df_tmp.groupby('sample_index').mean()
        if fold_avg_logits is None:
            fold_avg_logits = df_avg_tta_run.reindex(sub_indices, fill_value=0)
        else:
            fold_avg_logits = fold_avg_logits.add(df_avg_tta_run.reindex(sub_indices, fill_value=0), fill_value=0)
    fold_avg_logits /= TTA_STEPS
    if ensemble_logits is None:
        ensemble_logits = fold_avg_logits
    else:
        ensemble_logits = ensemble_logits.add(fold_avg_logits, fill_value=0)

ensemble_logits /= CONFIG["K_FOLDS"]
final_probs = torch.softmax(torch.tensor(ensemble_logits.values), dim=1).numpy()
final_preds_list = []
thr_l, thr_h = thr_l_best, thr_h_best
for p in final_probs:
    if p[2] > thr_h: final_preds_list.append(2)
    elif p[1] > thr_l: final_preds_list.append(1)
    else: final_preds_list.append(0)
final_series = pd.Series(final_preds_list, index=ensemble_logits.index)
inv_map = {v: k for k, v in label_mapping.items()}
submission = final_series.map(inv_map).reset_index()
submission.columns = ['sample_index', 'label']
submission = submission.set_index('sample_index').reindex(sample_sub_df['sample_index']).reset_index()
submission.to_csv('submission.csv', index=False)
print("Fatto! Submission creata.")


Device in uso: cuda
--- Esecuzione Run con Configurazione (V-FINALE: 0.9425 + TTA + NaN/Time Fix) ---
LSTM_HIDDEN: 128
LSTM_LAYERS: 2
CNN_DROPOUT: 0.2
LSTM_DROPOUT: 0.3
CLASSIFIER_DROPOUT: 0.3
WINDOW_SIZE: 40
STRIDE: 10
LEARNING_RATE: 0.001
WEIGHT_DECAY: 0.0001
EPOCHS: 50
BATCH_SIZE: 64
GRADIENT_CLIP_VALUE: 1.0
EARLY_STOPPING_PATIENCE: 15
LABEL_SMOOTHING: 0.1
TTA_STEPS: 10
TTA_NOISE_LEVEL: 0.02
K_FOLDS: 5
-----------------------------------------
--- 1. Caricamento e Pulizia Iniziale ---
Pre-fillna (Continui): NaN nel test set: 0
Post-fillna (Continui): NaN nel test set: 0
Trovata colonna 'Team Name': n_legs
Vocabolario Text Size: 2
Calcolo Feature Engineering (Train)...
  Calcolato max_time_val (Train): 160
Calcolo Feature Engineering (Test)...
  Calcolato max_time_val (Test): 160
Numero colonne continue: 62

--- Avvio K-Fold (V-FINALE: 0.9425 + TTA + NaN/Time Fix) ---

--- Fold 1/5 ---
Epoch 1/50, Train Loss: 0.2523, Val F1: 0.7898, LR: 1.0e-03
Validation F1 increased (0.789821 --> 0