In [1]:
import os, json, joblib, numpy as np, pandas as pd
import random, math
from pathlib import Path
import warnings 
warnings.filterwarnings("ignore")

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, AdamW
from torch.cuda.amp import GradScaler, autocast
from torch.optim.lr_scheduler import CosineAnnealingWarmRestarts

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold
from timm.scheduler import CosineLRScheduler
from scipy.signal import firwin

import torchvision

import polars as pl
from tqdm import tqdm
from copy import deepcopy

from IPython.display import clear_output
from functools import partial
from scipy.spatial.transform import Rotation as R
from types import SimpleNamespace
from scipy.optimize import linear_sum_assignment

In [2]:
# Configuration
TRAIN = False                     # ← set to True when you want to train
RAW_DIR = Path("/kaggle/input/cmi-detect-behavior-with-sensor-data")
PRETRAINED_DIR = Path("/kaggle/input/cmi-1d-cnn-v2") # used when TRAIN=False
EXPORT_DIR = Path("./")                                    # artefacts will be saved here
BATCH_SIZE = 64
PAD_PERCENTILE = 100
maxlen = PAD_PERCENTILE
LR_INIT = 1e-3
WD = 3e-3
MIXUP_ALPHA = 0.4
MASKING_PROB = 0.25
PATIENCE = 40
FOLDS = 5
random_state = 42
epochs_warmup = 20
warmup_lr_init = 1.822126131809773e-05
lr_min = 3.810323058740104e-09

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"▶ imports ready · pytorch {torch.__version__} · device: {device}")

# ================================
# Model Components
# ================================

class ImuFeatureExtractor(nn.Module):
    def __init__(self, fs=10., add_quaternion=False):
        super().__init__()
        self.fs = fs
        self.add_quaternion = add_quaternion

        k = 15

        self.lpf_acc   = nn.Conv1d(3, 3, k, padding=k//2, groups=3, bias=False)
        nn.init.kaiming_normal_(self.lpf_acc.weight, mode='fan_out')
        self.lpf_gyro = nn.Conv1d(3, 3, k, padding=k//2, groups=3, bias=False)
        nn.init.kaiming_normal_(self.lpf_gyro.weight, mode='fan_out')


    def forward(self, imu):
        # imu: 
        B, C, T = imu.shape
        acc  = imu[:, 0:3, :]                 # acc_x, acc_y, acc_z
        gyro = imu[:, 4:7, :]                 # gyro_x, gyro_y, gyro_z
        linear_acc      = imu[:, 7:10, :]
        angular_vel     = imu[:, 10:13, :]
        angular_distance = imu[:, 13:14, :] # 保持维度 (B, 1, T)    
 

        # 线性加速度的幅度和jerk
        linear_acc_mag = torch.norm(linear_acc, dim=1, keepdim=True)
        linear_acc_mag_jerk = F.pad(linear_acc_mag[:, :, 1:] - linear_acc_mag[:, :, :-1], (1,0), 'replicate')  

        angular_vel_mag = torch.norm(angular_vel, dim=1, keepdim=True)
        angular_vel_mag_jerk = F.pad(angular_vel_mag[:, :, 1:] - angular_vel_mag[:, :, :-1], (1,0), 'replicate')  

        rot_angle = 2 * torch.acos(imu[:, 3, :].clamp(-1.0, 1.0)).unsqueeze(1) # rot_w is the 4th comp
        rot_angle_vel = F.pad(rot_angle[:, :, 1:] - rot_angle[:, :, :-1], (1,0), 'replicate')

        # 1) magnitude
        acc_mag  = torch.norm(acc,  dim=1, keepdim=True)          # (B,1,T)
        gyro_mag = torch.norm(gyro, dim=1, keepdim=True)

        # 2) jerk 
        jerk = F.pad(acc[:, :, 1:] - acc[:, :, :-1], (1,0))       # (B,3,T)
        gyro_delta = F.pad(gyro[:, :, 1:] - gyro[:, :, :-1], (1,0))

        # 3) energy
        acc_pow  = acc ** 2
        gyro_pow = gyro ** 2

        # 4) LPF / HPF 
        acc_lpf  = self.lpf_acc(acc)
        acc_hpf  = acc - acc_lpf
        gyro_lpf = self.lpf_gyro(gyro)
        gyro_hpf = gyro - gyro_lpf


        acc_features = [
            acc, acc_mag,
            jerk, acc_pow,
            acc_lpf, acc_hpf,
            linear_acc, linear_acc_mag, linear_acc_mag_jerk
        ]
        gyro_features = [
            gyro, gyro_mag,
            gyro_delta, gyro_pow,
            gyro_lpf, gyro_hpf,
            angular_vel, angular_vel_mag, angular_vel_mag_jerk, angular_distance,
            rot_angle, rot_angle_vel
        ]
        # print(torch.cat(acc_features, dim=1).shape, torch.cat(gyro_features, dim=1).shape)
        features = acc_features + gyro_features
        return torch.cat(features, dim=1)


class SEBlock(nn.Module):
    def __init__(self, channels, reduction=8):
        super().__init__()
        self.squeeze = nn.AdaptiveAvgPool1d(1)
        self.excitation = nn.Sequential(
            nn.Linear(channels, channels // reduction, bias=False),
            nn.SiLU(inplace=True),
            nn.Linear(channels // reduction, channels, bias=False),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        b, c, _ = x.size()
        y = self.squeeze(x).view(b, c)
        y = self.excitation(y).view(b, c, 1)
        return x * y.expand_as(x)

class ResidualSECNNBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, pool_size=2, dropout=0.3):
        super().__init__()
        
        # First conv block
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size, padding=kernel_size//2, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        
        # Second conv block
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size, padding=kernel_size//2, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)
        
        # SE block
        self.se = SEBlock(out_channels)
        
        # Shortcut connection
        self.shortcut = nn.Sequential()
        if in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_channels, out_channels, 1, bias=False),
                nn.BatchNorm1d(out_channels)
            )
        
        self.pool = nn.MaxPool1d(pool_size)
        self.dropout = nn.Dropout(dropout)
        
    def forward(self, x):
        shortcut = self.shortcut(x)
        
        # First conv
        out = F.silu(self.bn1(self.conv1(x)))
        # Second conv
        out = self.bn2(self.conv2(out))
        
        # SE block
        out = self.se(out)
        
        # Add shortcut
        out += shortcut
        out = F.silu(out)
        
        # Pool and dropout
        out = self.pool(out)
        out = self.dropout(out)
        
        return out

class AttentionLayer(nn.Module):
    def __init__(self, hidden_dim):
        super().__init__()
        self.attention = nn.Linear(hidden_dim, 1)
        
    def forward(self, x):
        # x shape: (batch, seq_len, hidden_dim)
        scores = torch.tanh(self.attention(x))  # (batch, seq_len, 1)
        weights = F.softmax(scores.squeeze(-1), dim=1)  # (batch, seq_len)
        context = torch.sum(x * weights.unsqueeze(-1), dim=1)  # (batch, hidden_dim)
        return context

▶ imports ready · pytorch 2.6.0+cu124 · device: cuda


In [3]:
class TwoBranchModel(nn.Module):
    def __init__(self, pad_len, imu_dim_raw, tof_dim, n_classes, dropouts=[0.3, 0.3, 0.3, 0.3, 0.4, 0.5, 0.3], 
                 feature_engineering=True, **kwargs):
        super().__init__()
        self.feature_engineering = feature_engineering
        if feature_engineering:
            self.imu_fe = ImuFeatureExtractor(**kwargs)
            imu_dim = 45
        else:
            self.imu_fe = nn.Identity()
            imu_dim = imu_dim_raw
            
        self.imu_dim = imu_dim
        self.tof_dim = tof_dim
        self.fir_nchan = imu_dim_raw
        
        # --- 1. 保留输入分支 (与原模型相同) ---
        # IMU deep branch
        # self.imu_block1 = ResidualSECNNBlock(imu_dim, 64, 3, 1, dropout=dropouts[0])
        # self.imu_block2 = ResidualSECNNBlock(64, 128, 5, 1, dropout=dropouts[1])
        self.acc_dim, self.rot_dim = 21, 24
        self.imu_block11 = ResidualSECNNBlock(self.acc_dim, 64, 3, 1, dropout=dropouts[0])
        self.imu_block12 = ResidualSECNNBlock(64, 128, 5, 1, dropout=dropouts[1])
        self.imu_block21 = ResidualSECNNBlock(self.rot_dim, 64, 3, 1, dropout=dropouts[0])
        self.imu_block22 = ResidualSECNNBlock(64, 128, 5, 1, dropout=dropouts[1])
        
        # TOF/Thermal lighter branch
        # v1
        # self.tof_conv1 = nn.Conv1d(tof_dim, 64, 3, padding=1, bias=False)
        # self.tof_bn1 = nn.BatchNorm1d(64)
        # self.tof_drop1 = nn.Dropout(dropouts[2])
        
        # self.tof_conv2 = nn.Conv1d(64, 128, 3, padding=1, bias=False)
        # self.tof_bn2 = nn.BatchNorm1d(128)
        # self.tof_drop2 = nn.Dropout(dropouts[3])

        # v2
        self.tof_conv1 = ResidualSECNNBlock(tof_dim, 64, 3, 1, dropout=dropouts[2])
        self.tof_conv2 = ResidualSECNNBlock(64, 128, 3, 1, dropout=dropouts[3])

    

        # Gate
        self.pool = nn.AdaptiveAvgPool1d(1)
        self.dense1_gate = nn.Linear(pad_len, 16)
        self.dense2_gate = nn.Linear(16, 1)

        

        merged_channels = 256 + 128
        self.cnn_backbone1 = nn.Sequential(
            nn.Conv1d(merged_channels, 256, kernel_size=7, padding=3, bias=False),
            nn.BatchNorm1d(256),
            nn.SiLU(),
            nn.Dropout(dropouts[4])
        )
        self.cnn_backbone2 = nn.Sequential(
            nn.Conv1d(256, 512, kernel_size=5, padding=2, bias=False),
            nn.BatchNorm1d(512),
            nn.SiLU(),
            nn.Dropout(dropouts[5])
        )

        
        # --- 3. 新增全局池化层 ---
        # 使用自适应平均池化将时间维度压缩为1
        # self.global_pool = AttentionLayer(512)
        self.global_pool = nn.AdaptiveAvgPool1d(1)  

        # --- 4. 修改全连接层 ---
        # 输入维度需要从原来的 atten_dim (528) 修改为最后一个CNN层的输出通道数 (512)
        cnn_out_dim = 512
        self.dense1 = nn.Linear(cnn_out_dim, 256, bias=False)
        self.bn_dense1 = nn.BatchNorm1d(256)
        self.drop1 = nn.Dropout(dropouts[5]) # 复用一个dropout值
        
        self.dense2 = nn.Linear(256, 128, bias=False)
        self.bn_dense2 = nn.BatchNorm1d(128)
        self.drop2 = nn.Dropout(dropouts[6])
        
        self.classifier = nn.Linear(128, n_classes)
        
    def forward(self, x):
        # --- 与原模型相同的部分 ---
        # 分割输入
        imu = x[:, :, :self.fir_nchan].transpose(1, 2)  # (B, D_imu_raw, T)
        tof = x[:, :, self.fir_nchan:].transpose(1, 2)  # (B, D_tof, T)

        imu = self.imu_fe(imu)  # (B, D_imu, T)
        
        
        # IMU 分支
        # x1 = self.imu_block1(imu) # (B, 64, T)
        # x1 = self.imu_block2(x1) # (B, 128, T)
        acc = imu[:, :self.acc_dim, :]  # 保留前21个通道作为加速度特征
        rot = imu[:, self.acc_dim:, :]  
        x11 = self.imu_block11(acc) # (B, 64, T)
        x11 = self.imu_block12(x11) # (B, 64, T)
        
        x12 = self.imu_block21(rot) # (B, 64, T)
        x12 = self.imu_block22(x12) # (B, 64, T)
        x1 = torch.cat([x11, x12], dim=1)

        
        # TOF 分支
        # v1
        # x2 = F.silu(self.tof_bn1(self.tof_conv1(tof)))
        # x2 = self.tof_drop1(x2) # (B, 64, T)
        # x2 = F.silu(self.tof_bn2(self.tof_conv2(x2)))
        # x2 = self.tof_drop2(x2) # (B, 128, T)
        
        # v2
        x2 = self.tof_conv1(tof)
        x2 = self.tof_conv2(x2)
        

        # Gate x2
        gate_input = self.pool(tof.transpose(1, 2)).squeeze(-1)
        gate_input = F.silu(self.dense1_gate(gate_input))
    
        gate = torch.sigmoid(self.dense2_gate(gate_input)) # -> (B, 1)
        x2 = x2 * gate.unsqueeze(-1)
        
        # 合并分支, Conv1d期望的输入格式是 (Batch, Channels, Length)
        # 所以我们不需要像RNN那样进行转置
        merged = torch.cat([x1, x2], dim=1) # (B, 256, T)
        
        # --- 新的CNN处理流程 ---
        # 通过CNN主干网络
        cnn_out = self.cnn_backbone1(merged) # (B, 256, T)
        cnn_out = self.cnn_backbone2(cnn_out) # (B, 512, T)

        
        # 全局池化
        pooled = self.global_pool(cnn_out) # (B, 512, 1)
        pooled_flat = torch.flatten(pooled, 1) # (B, 512)
        # pooled_flat = self.global_pool(cnn_out.transpose(1, 2))
        
        # --- 全连接层分类 (与原模型类似) ---
        x = F.silu(self.bn_dense1(self.dense1(pooled_flat)))
        x = self.drop1(x)
        x = F.silu(self.bn_dense2(self.dense2(x)))
        x = self.drop2(x)
        
        logits = self.classifier(x)
        return logits, x, gate

In [4]:
def remove_gravity_from_acc(acc_data, rot_data):

    if isinstance(acc_data, pd.DataFrame):
        acc_values = acc_data[['acc_x', 'acc_y', 'acc_z']].values
    else:
        acc_values = acc_data

    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = acc_values.shape[0]
    linear_accel = np.zeros_like(acc_values)
    
    gravity_world = np.array([0, 0, 9.81])

    for i in range(num_samples):
        if np.all(np.isnan(quat_values[i])) or np.all(np.isclose(quat_values[i], 0)):
            linear_accel[i, :] = acc_values[i, :] 
            continue

        try:
            rotation = R.from_quat(quat_values[i])
            gravity_sensor_frame = rotation.apply(gravity_world, inverse=True)
            linear_accel[i, :] = acc_values[i, :] - gravity_sensor_frame
        except ValueError:
             linear_accel[i, :] = acc_values[i, :]
             
    return linear_accel

def calculate_angular_velocity_from_quat(rot_data, time_delta=1/200): # Assuming 200Hz sampling rate
    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = quat_values.shape[0]
    angular_vel = np.zeros((num_samples, 3))

    for i in range(num_samples - 1):
        q_t = quat_values[i]
        q_t_plus_dt = quat_values[i+1]

        if np.all(np.isnan(q_t)) or np.all(np.isclose(q_t, 0)) or \
           np.all(np.isnan(q_t_plus_dt)) or np.all(np.isclose(q_t_plus_dt, 0)):
            continue

        try:
            rot_t = R.from_quat(q_t)
            rot_t_plus_dt = R.from_quat(q_t_plus_dt)

            # Calculate the relative rotation
            delta_rot = rot_t.inv() * rot_t_plus_dt
            
            # Convert delta rotation to angular velocity vector
            # The rotation vector (Euler axis * angle) scaled by 1/dt
            # is a good approximation for small delta_rot
            angular_vel[i, :] = delta_rot.as_rotvec() / time_delta
        except ValueError:
            # If quaternion is invalid, angular velocity remains zero
            pass
            
    return angular_vel

def calculate_angular_distance(rot_data):
    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = quat_values.shape[0]
    angular_dist = np.zeros(num_samples)

    for i in range(num_samples - 1):
        q1 = quat_values[i]
        q2 = quat_values[i+1]

        if np.all(np.isnan(q1)) or np.all(np.isclose(q1, 0)) or \
           np.all(np.isnan(q2)) or np.all(np.isclose(q2, 0)):
            angular_dist[i] = 0 # Или np.nan, в зависимости от желаемого поведения
            continue
        try:
            # Преобразование кватернионов в объекты Rotation
            r1 = R.from_quat(q1)
            r2 = R.from_quat(q2)

            # Вычисление углового расстояния: 2 * arccos(|real(p * q*)|)
            # где p* - сопряженный кватернион q
            # В scipy.spatial.transform.Rotation, r1.inv() * r2 дает относительное вращение.
            # Угол этого относительного вращения - это и есть угловое расстояние.
            relative_rotation = r1.inv() * r2
            
            # Угол rotation vector соответствует угловому расстоянию
            # Норма rotation vector - это угол в радианах
            angle = np.linalg.norm(relative_rotation.as_rotvec())
            angular_dist[i] = angle
        except ValueError:
            angular_dist[i] = 0 # В случае недействительных кватернионов
            pass
            
    return angular_dist

def pad_sequences_torch(sequences, maxlen, padding='post', truncating='post', value=0.0):
    """PyTorch equivalent of Keras pad_sequences"""
    result = []
    for seq in sequences:
        if len(seq) >= maxlen:
            if truncating == 'post':
                seq = seq[:maxlen]
            else:  # 'pre'
                seq = seq[-maxlen:]
        else:
            pad_len = maxlen - len(seq)
            if padding == 'post':
                seq = np.concatenate([seq, np.full((pad_len, seq.shape[1]), value)])
            else:  # 'pre'
                seq = np.concatenate([np.full((pad_len, seq.shape[1]), value), seq])
        result.append(seq)
    return np.array(result, dtype=np.float32)



In [5]:
print("▶ INFERENCE MODE – loading artefacts from", PRETRAINED_DIR)
feature_cols = np.load(PRETRAINED_DIR / "feature_cols.npy", allow_pickle=True).tolist()
pad_len = int(np.load(PRETRAINED_DIR / "sequence_maxlen.npy"))
scaler = joblib.load(PRETRAINED_DIR / "scaler.pkl")
gesture_classes = np.load(PRETRAINED_DIR / "gesture_classes.npy", allow_pickle=True)

imu_cols = [c for c in feature_cols if not (c.startswith('thm_') or c.startswith('tof_'))]
tof_cols = [c for c in feature_cols if c.startswith('thm_') or c.startswith('tof_')]


# Load model
MODELS = [f'gesture_two_branch_fold{i}.pth' for i in range(5)]

models = []
for path in MODELS:
    checkpoint = torch.load(PRETRAINED_DIR / path, map_location=device)
    
    model = TwoBranchModel(
        checkpoint['pad_len'], 
        checkpoint['imu_dim'], 
        checkpoint['tof_dim'], 
        checkpoint['n_classes']
        ).to(device)
    
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    models.append(model)

print("  model, scaler, pads loaded – ready for evaluation")

# Make sure gesture_classes exists in both modes
if TRAIN:
    gesture_classes = le.classes_

# def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
#     """Prediction function for Kaggle competition"""
#     global gesture_classes
#     if gesture_classes is None:
#         gesture_classes = np.load(PRETRAINED_DIR / "gesture_classes.npy", allow_pickle=True)

#     df_seq = sequence.to_pandas()
#     df_demo = demographics.to_pandas()
#     df_seq = df_seq.merge(df_demo[['subject', 'handedness']], on='subject', how='left')
#     handedness = df_seq['handedness'].iloc[0]
#     if handedness == 0:
#         # --- a) Swap sensor 3 and sensor 5 data ---
        
#         # Find all columns related to thermopile and time-of-flight sensors 3 and 5
#         cols_3 = [c for c in df_seq.columns if any(p in c for p in ['tof_3', 'thm_3'])]
#         cols_5 = [c for c in df_seq.columns if any(p in c for p in ['tof_5', 'thm_5'])]
        
#         # Sort to ensure a one-to-one mapping for the swap
#         cols_3.sort()
#         cols_5.sort()
        
#         # Sanity check
#         if len(cols_3) == len(cols_5):
#             # Perform the swap using a temporary variable
#             temp_cols_3_data = df_seq[cols_3].copy()
#             df_seq[cols_3] = df_seq[cols_5]
#             df_seq[cols_5] = temp_cols_3_data
        
#         # --- b) Negate specific IMU columns ---
#         negate_cols = ['acc_x', 'rot_y', 'rot_z']
#         df_seq[negate_cols] *= -1
    
#     linear_accel = remove_gravity_from_acc(df_seq, df_seq)
#     df_seq['linear_acc_x'], df_seq['linear_acc_y'], df_seq['linear_acc_z'] = linear_accel[:, 0], linear_accel[:, 1], linear_accel[:, 2]
#     angular_vel = calculate_angular_velocity_from_quat(df_seq)
#     df_seq['angular_vel_x'], df_seq['angular_vel_y'], df_seq['angular_vel_z'] = angular_vel[:, 0], angular_vel[:, 1], angular_vel[:, 2]
#     df_seq['angular_distance'] = calculate_angular_distance(df_seq)


#     for i in range(1, 6):
#         pixel_cols = [f"tof_{i}_v{p}" for p in range(64)]; tof_data = df_seq[pixel_cols].replace(-1, np.nan)
#         df_seq[f'tof_{i}_mean'], df_seq[f'tof_{i}_std'], df_seq[f'tof_{i}_min'], df_seq[f'tof_{i}_max'] = tof_data.mean(axis=1), tof_data.std(axis=1), tof_data.min(axis=1), tof_data.max(axis=1)

#     mat_unscaled = df_seq[feature_cols].ffill().bfill().fillna(0).values.astype('float32')
#     mat = scaler.transform(mat_unscaled)
#     pad = pad_sequences_torch([mat], maxlen=pad_len, padding='pre', truncating='pre')
    
#     with torch.no_grad():
#         x = torch.FloatTensor(pad).to(device)
#         outputs = None
#         for model in models:
#             model.eval()
#             logits = model(x)[0]
#             p = torch.softmax(logits, dim=1)
#             if outputs is None: outputs = p
#             else: outputs += p
#         outputs /= len(models)
        
#         idx = int(outputs.argmax(dim=1)[0].cpu().numpy())

#     composite_class = str(gesture_classes[idx])
#     final_class = composite_class.split('_')[1]
    
#     return final_class

▶ INFERENCE MODE – loading artefacts from /kaggle/input/cmi-1d-cnn-v2
  model, scaler, pads loaded – ready for evaluation


In [6]:
SUBJECT_HISTORY = {}

# FINAL_PREDICTIONS: 存储经过最优分配后，每个序列最终确定的标签
FINAL_PREDICTIONS = {}
gesture_classes = None
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    """
    一个实现了“全局最优分配”后处理的、有状态的预测函数。
    """
    global gesture_classes, SUBJECT_HISTORY, FINAL_PREDICTIONS

    # --- 初始化/重置逻辑 ---
    # 通过检查 gesture_classes 是否已加载，来判断是否是新一轮提交的开始
    if gesture_classes is None:
        print("First call of this submission run. Initializing...")
        gesture_classes = np.load(PRETRAINED_DIR / "gesture_classes.npy", allow_pickle=True)
        # 清空上一轮的记忆
        SUBJECT_HISTORY = {}
        FINAL_PREDICTIONS = {}

    # --- 1. 特征工程 (与您原来基本相同) ---
    df_seq = sequence.to_pandas()
    subject_id = df_seq['subject'].iloc[0]
    sequence_id = df_seq['sequence_id'].iloc[0]
    
    df_demo = demographics.to_pandas()
    df_seq = df_seq.merge(df_demo[['subject', 'handedness']], on='subject', how='left')
    
    handedness = df_seq['handedness'].iloc[0]
    if handedness == 0:
        # --- a) Swap sensor 3 and sensor 5 data ---
        
        # Find all columns related to thermopile and time-of-flight sensors 3 and 5
        cols_3 = [c for c in df_seq.columns if any(p in c for p in ['tof_3', 'thm_3'])]
        cols_5 = [c for c in df_seq.columns if any(p in c for p in ['tof_5', 'thm_5'])]
        
        # Sort to ensure a one-to-one mapping for the swap
        cols_3.sort()
        cols_5.sort()
        
        # Sanity check
        if len(cols_3) == len(cols_5):
            # Perform the swap using a temporary variable
            temp_cols_3_data = df_seq[cols_3].copy()
            df_seq[cols_3] = df_seq[cols_5]
            df_seq[cols_5] = temp_cols_3_data
        
        # --- b) Negate specific IMU columns ---
        negate_cols = ['acc_x', 'rot_y', 'rot_z']
        df_seq[negate_cols] *= -1
    
    linear_accel = remove_gravity_from_acc(df_seq, df_seq)
    df_seq['linear_acc_x'], df_seq['linear_acc_y'], df_seq['linear_acc_z'] = linear_accel[:, 0], linear_accel[:, 1], linear_accel[:, 2]
    angular_vel = calculate_angular_velocity_from_quat(df_seq)
    df_seq['angular_vel_x'], df_seq['angular_vel_y'], df_seq['angular_vel_z'] = angular_vel[:, 0], angular_vel[:, 1], angular_vel[:, 2]
    df_seq['angular_distance'] = calculate_angular_distance(df_seq)


    for i in range(1, 6):
        pixel_cols = [f"tof_{i}_v{p}" for p in range(64)]; tof_data = df_seq[pixel_cols].replace(-1, np.nan)
        df_seq[f'tof_{i}_mean'], df_seq[f'tof_{i}_std'], df_seq[f'tof_{i}_min'], df_seq[f'tof_{i}_max'] = tof_data.mean(axis=1), tof_data.std(axis=1), tof_data.min(axis=1), tof_data.max(axis=1)

    mat_unscaled = df_seq[feature_cols].ffill().bfill().fillna(0).values.astype('float32')
    mat = scaler.transform(mat_unscaled)
    pad = pad_sequences_torch([mat], maxlen=pad_len, padding='pre', truncating='pre')
    
    # --- 2. 模型预测 -> 获取Log-Softmax概率 ---
    with torch.no_grad():
        x = torch.FloatTensor(pad).to(device)
        all_logits = []
        for model in models:
            model.eval()
            # 假设您的模型现在输出 (B, 102) 的logits
            logits = model(x)[0] 
            all_logits.append(logits)
        
        # 集成logits并计算log-softmax
        avg_logits = torch.stack(all_logits).mean(dim=0)
        log_probs = F.log_softmax(avg_logits, dim=1).cpu().numpy().flatten() # (102,)

    # --- 3. 存储当前序列的预测历史 ---
    if subject_id not in SUBJECT_HISTORY:
        SUBJECT_HISTORY[subject_id] = []
        
    # 存储唯一的序列ID和对应的log概率
    # (需要处理同一个序列被重复预测的情况, 如果API可能这样做)
    if not any(d['seq_id'] == sequence_id for d in SUBJECT_HISTORY[subject_id]):
        SUBJECT_HISTORY[subject_id].append({'seq_id': sequence_id, 'log_probs': log_probs})

    # --- 4. 核心：为当前被试的所有已知序列，重新进行全局最优分配 ---
    subject_history = SUBJECT_HISTORY[subject_id]
    num_sequences_so_far = len(subject_history)
    num_labels = len(log_probs) # 102

    # a) 构建成本矩阵
    # 形状: (N个序列, 102个标签)
    # 值: 负的log概率 (因为算法求的是最小成本，等价于求最大log概率和)
    cost_matrix = np.zeros((num_sequences_so_far, num_labels))
    for i in range(num_sequences_so_far):
        cost_matrix[i, :] = -subject_history[i]['log_probs']

    # b) 使用匈牙利算法求解最优分配
    # row_ind是序列索引, col_ind是分配给该序列的最佳标签索引
    row_ind, col_ind = linear_sum_assignment(cost_matrix)

    # c) 更新所有序列的最终预测结果
    for i in range(num_sequences_so_far):
        seq_info = subject_history[i]
        assigned_label_index = col_ind[i]
        final_gesture_name = gesture_classes[assigned_label_index] # 假设gesture_classes现在是102个复合标签

        # 将最终确定的预测结果存入“最终答案”字典
        final_gesture_name = final_gesture_name.split('_')[1]
        FINAL_PREDICTIONS[seq_info['seq_id']] = final_gesture_name
        
    # --- 5. 返回当前序列的最终预测结果 ---
    return FINAL_PREDICTIONS[sequence_id]

In [7]:
# Kaggle competition interface
import kaggle_evaluation.cmi_inference_server
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )

First call of this submission run. Initializing...


In [8]:
pd.read_parquet("/kaggle/working/submission.parquet").head(5)

Unnamed: 0,sequence_id,gesture
0,SEQ_000001,Forehead - pull hairline
1,SEQ_000011,Eyelash - pull hair
