In [1]:
import os

import pandas as pd
import polars as pl

import kaggle_evaluation.cmi_inference_server
import os
import pandas as pd
import numpy as np
from scipy.spatial.transform import Rotation as R
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedGroupKFold
from sklearn.metrics import f1_score
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import random
import joblib

In [2]:
def remove_gravity_from_acc(acc_data, rot_data):
    """Remove gravity component from accelerometer data"""
    if isinstance(acc_data, pd.DataFrame):
        acc_values = acc_data[['acc_x', 'acc_y', 'acc_z']].values
    else:
        acc_values = acc_data

    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = acc_values.shape[0]
    linear_accel = np.zeros_like(acc_values)
    gravity_world = np.array([0, 0, 9.802])

    for i in range(num_samples):
        if np.all(np.isnan(quat_values[i])) or np.all(np.isclose(quat_values[i], 0)):
            linear_accel[i, :] = acc_values[i, :]
            continue

        try:
            rotation = R.from_quat(quat_values[i])
            gravity_sensor_frame = rotation.apply(gravity_world, inverse=True)
            linear_accel[i, :] = acc_values[i, :] - gravity_sensor_frame
        except ValueError:
            linear_accel[i, :] = acc_values[i, :]

    return linear_accel

def calculate_angular_velocity_from_quat(rot_data, time_delta=1/200):
    """Calculate angular velocity from quaternion derivatives"""
    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = quat_values.shape[0]
    angular_vel = np.zeros((num_samples, 3))

    for i in range(num_samples - 1):
        q_t = quat_values[i]
        q_t_plus_dt = quat_values[i+1]

        if np.all(np.isnan(q_t)) or np.all(np.isclose(q_t, 0)) or \
           np.all(np.isnan(q_t_plus_dt)) or np.all(np.isclose(q_t_plus_dt, 0)):
            continue

        try:
            rot_t = R.from_quat(q_t)
            rot_t_plus_dt = R.from_quat(q_t_plus_dt)
            delta_rot = rot_t.inv() * rot_t_plus_dt
            angular_vel[i, :] = delta_rot.as_rotvec() / time_delta
        except ValueError:
            pass

    return angular_vel

def calculate_angular_distance(rot_data):
    """Calculate angular distance between successive quaternions"""
    if isinstance(rot_data, pd.DataFrame):
        quat_values = rot_data[['rot_x', 'rot_y', 'rot_z', 'rot_w']].values
    else:
        quat_values = rot_data

    num_samples = quat_values.shape[0]
    angular_dist = np.zeros(num_samples)

    for i in range(num_samples - 1):
        q1 = quat_values[i]
        q2 = quat_values[i+1]

        if np.all(np.isnan(q1)) or np.all(np.isclose(q1, 0)) or \
           np.all(np.isnan(q2)) or np.all(np.isclose(q2, 0)):
            angular_dist[i] = 0
            continue

        try:
            r1 = R.from_quat(q1)
            r2 = R.from_quat(q2)
            relative_rotation = r1.inv() * r2
            angle = np.linalg.norm(relative_rotation.as_rotvec())
            angular_dist[i] = angle
        except ValueError:
            angular_dist[i] = 0

    return angular_dist

In [3]:
class IMUModel(nn.Module):
    def __init__(self, input_dim, num_classes, dropout_rate=0.2):
        super().__init__()

        # MLP for initial feature transformation (applied at each time step)
        self.feature_mlp = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.5),
            
            nn.Linear(128, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate * 0.5),
            
            nn.Linear(256, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True)
        )

        # Temporal feature extraction with 1D CNNs
        self.temporal_conv = nn.Sequential(
            nn.Conv1d(256, 256, kernel_size=5, padding=2),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),

            nn.Conv1d(256, 256, kernel_size=5, padding=2),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate)
        )

        # Multi-scale temporal feature extractor (original kernel sizes)
        self.multi_scale = nn.ModuleList([
            nn.Sequential(
                nn.Conv1d(256, 64, kernel_size=3, padding=1),
                nn.BatchNorm1d(64),
                nn.ReLU(inplace=True)
            ),
            nn.Sequential(
                nn.Conv1d(256, 64, kernel_size=5, padding=2),
                nn.BatchNorm1d(64),
                nn.ReLU(inplace=True)
            ),
            nn.Sequential(
                nn.Conv1d(256, 64, kernel_size=7, padding=3),
                nn.BatchNorm1d(64),
                nn.ReLU(inplace=True)
            ),
            nn.Sequential(
                nn.Conv1d(256, 64, kernel_size=11, padding=5),
                nn.BatchNorm1d(64),
                nn.ReLU(inplace=True)
            )
        ])

        # Bottleneck
        self.bottleneck = nn.Sequential(
            nn.Conv1d(256, 512, kernel_size=3, padding=1),
            nn.BatchNorm1d(512),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate)
        )

        # Temporal pyramid pooling
        self.pyramid_pool = nn.ModuleList([
            nn.AdaptiveAvgPool1d(1),
            nn.AdaptiveAvgPool1d(2),
            nn.AdaptiveAvgPool1d(4)
        ])

        # Final classifier
        self.classifier = nn.Sequential(
            nn.Linear(512*7, 256),  # 7 = 1+2+4 from pyramid pooling
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(256, num_classes)
        )

    def forward(self, x, lengths):
        # x shape: (B, T, C) where B=batch, T=time, C=channels
        batch_size, seq_len, feat_dim = x.shape
        
        # Apply MLP to each time step independently
        # Reshape to (B*T, C) for batch processing through MLP
        x_reshaped = x.view(-1, feat_dim)
        x_transformed = self.feature_mlp(x_reshaped)
        
        # Reshape back to (B, T, hidden_dim) and transpose to (B, hidden_dim, T) for Conv1d
        x = x_transformed.view(batch_size, seq_len, -1).transpose(1, 2)

        # Temporal feature extraction
        x = self.temporal_conv(x)

        # Multi-scale temporal feature extraction
        scale_features = []
        for conv in self.multi_scale:
            scale_features.append(conv(x))
        x = torch.cat(scale_features, dim=1)

        # Bottleneck
        x = self.bottleneck(x)

        # Masking for variable length sequences
        max_time = x.size(2)
        mask = torch.arange(max_time, device=x.device)[None, :] < lengths[:, None]
        mask = mask.unsqueeze(1).float()
        x = x * mask

        # Temporal pyramid pooling
        pyramid_features = []
        for pool in self.pyramid_pool:
            pooled = pool(x)  # (B, C, pool_size)
            pooled = pooled.view(pooled.size(0), -1)  # Flatten
            pyramid_features.append(pooled)

        # Combine features
        features = torch.cat(pyramid_features, dim=1)

        # Classification
        return self.classifier(features)

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [5]:
import torch
import torch.nn as nn
import os


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model ensemble
model_ensemble = []
for i in range(0, 10):
    # Create model instance
    model = IMUModel(28, 18).to(device)
    
    # Load weights

    model_path = f"/kaggle/input/trained-model/bst_model_fold{i}.pth"

    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    
    # Set to evaluation mode
    model.eval()
    model_ensemble.append(model)

print(f"✅ Successfully loaded {len(model_ensemble)} models")

# Example inference function using ensemble
def ensemble_predict(x):
    """
    Predict using model ensemble with averaging
    
    Args:
        x: Input tensor (batch_size, seq_len, features)
    
    Returns:
        logits: Averaged logits from all models
        probs: Class probabilities
        preds: Class predictions
    """
    with torch.no_grad():
        # Get predictions from all models
        all_logits = []
        for model in model_ensemble:
            logits = model(x)
            all_logits.append(logits)
        
        # Average logits across models
        avg_logits = torch.mean(torch.stack(all_logits), dim=0)
        probs = torch.softmax(avg_logits, dim=-1)
        preds = torch.argmax(probs, dim=-1)
        
    return avg_logits, probs, preds

# Usage example:
# x = torch.randn(32, 128, 20).to(device)  # Example input
# logits, probs, preds = ensemble_predict(x)





✅ Successfully loaded 10 models


In [6]:
initial_imu_cols = ['acc_x', 'acc_y', 'acc_z', 'rot_x', 'rot_y', 'rot_z', 'rot_w']

imu_cols = initial_imu_cols + [
    # Engineered features
    'acc_mag', 'rot_angle',
    'acc_mag_jerk', 'rot_angle_vel',
    'linear_acc_x', 'linear_acc_y', 'linear_acc_z',
    'linear_acc_mag', 'linear_acc_mag_jerk',
    'angular_vel_x', 'angular_vel_y', 'angular_vel_z',
    'angular_distance'
]
df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
gestures = sorted(df['gesture'].unique())
idx_to_gesture = {i: g for i, g in enumerate(gestures)}

In [7]:
class ResidualBlock1D(nn.Module):
    def __init__(self, channels):
        super().__init__()
        self.conv1 = nn.Conv1d(channels, channels, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(channels)
        self.relu = nn.ReLU()
        self.conv2 = nn.Conv1d(channels, channels, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(channels)

    def forward(self, x):
        residual = x
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += residual
        out = self.relu(out)
        return out

class FPN1D(nn.Module):
    def __init__(self, in_channels=256, out_channels=256):
        super().__init__()
        self.lateral_conv0 = nn.Conv1d(in_channels, out_channels, kernel_size=1)
        self.lateral_conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=1)
        self.lateral_conv2 = nn.Conv1d(in_channels, out_channels, kernel_size=1)

        self.smooth_conv0 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
        self.smooth_conv1 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)
        self.smooth_conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, padding=1)

    def forward(self, c0):
        c1 = F.max_pool1d(c0, kernel_size=2, stride=2, ceil_mode=True)
        c2 = F.max_pool1d(c1, kernel_size=2, stride=2, ceil_mode=True)

        p0 = self.lateral_conv0(c0)
        p1 = self.lateral_conv1(c1)
        p2 = self.lateral_conv2(c2)

        p2_up = F.interpolate(p2, size=p1.size(2), mode='linear', align_corners=True)
        p1_combined = p1 + p2_up
        p1_smoothed = self.smooth_conv1(p1_combined)

        p1_up = F.interpolate(p1_smoothed, size=p0.size(2), mode='linear', align_corners=True)
        p0_combined = p0 + p1_up
        p0_smoothed = self.smooth_conv0(p0_combined)

        return p0_smoothed

class SensorBranch(nn.Module):
    def __init__(self, input_dim, hidden_dim):
        super().__init__()
        # 1×1 projection
        self.input_proj = nn.Conv1d(input_dim, 256, kernel_size=1)

        # Residual blocks
        self.res_cnn = nn.Sequential(
            ResidualBlock1D(256),
            ResidualBlock1D(256),
            ResidualBlock1D(256),
        )

        # Feature Pyramid Network
        self.fpn = FPN1D(256, 256)

        # GRU layers
        self.gru1 = nn.GRU(256, hidden_dim, num_layers=3, batch_first=True, bidirectional=True)
        self.gru2 = nn.GRU(hidden_dim*2, hidden_dim, num_layers=2, batch_first=True, bidirectional=True)
        self.gru3 = nn.GRU(hidden_dim*2, hidden_dim, num_layers=1, batch_first=True, bidirectional=True)

        # Skip connection
        self.skip_proj = nn.Linear(256, hidden_dim*2)

        # Initialize weights
        self._init_weights()

    def _init_weights(self):
        for name, param in self.named_parameters():
            if 'weight' in name and 'gru' in name:
                nn.init.xavier_uniform_(param)
            elif 'bias' in name and 'gru' in name:
                nn.init.constant_(param, 0.0)
            elif isinstance(param, nn.Linear):
                nn.init.xavier_uniform_(param)
                if param.bias is not None:
                    nn.init.zeros_(param.bias)

    def forward(self, x, lengths):
        # Input transformation
        x = x.transpose(1, 2)  # (B, C, T)
        x = self.input_proj(x)
        x = self.res_cnn(x)
        x = self.fpn(x)
        x = x.transpose(1, 2)  # (B, T, C)

        # GRU processing
        packed1 = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        out1, _ = self.gru1(packed1)
        pad1, _ = pad_packed_sequence(out1, batch_first=True)
        skip1 = self.skip_proj(x)
        prev = pad1 + skip1

        packed2 = pack_padded_sequence(prev, lengths.cpu(), batch_first=True, enforce_sorted=False)
        out2, _ = self.gru2(packed2)
        pad2, _ = pad_packed_sequence(out2, batch_first=True)
        prev = pad2 + prev

        packed3 = pack_padded_sequence(prev, lengths.cpu(), batch_first=True, enforce_sorted=False)
        out3, _ = self.gru3(packed3)
        pad3, _ = pad_packed_sequence(out3, batch_first=True)
        prev = pad3 + prev

        # Last timestep
        idx = (lengths - 1).unsqueeze(1).unsqueeze(2).expand(-1, 1, prev.size(2))
        last = prev.gather(1, idx).squeeze(1)

        return last

class MultiSensorClassifier(nn.Module):
    def __init__(self,
                 imu_input_dim=20,  # Updated for engineered features
                 thm_input_dim=5,
                 tof_input_dim=64,
                 hidden_dim=256,
                 num_heads=8,
                 ffn_hidden=256,
                 num_classes=18,
                 p_branch_mask=0.1,
                 p_feat_dropout=0.2):
        super().__init__()
        # Sensor branches
        self.imu_branch = SensorBranch(imu_input_dim, hidden_dim)
        self.thm_branch = SensorBranch(thm_input_dim, hidden_dim)
        self.tof_branches = nn.ModuleList([
            SensorBranch(tof_input_dim, hidden_dim) for _ in range(5)
        ])

        # Feature fusion
        self.pre_norm = nn.LayerNorm(hidden_dim*2)
        self.attention_layers = nn.ModuleList([
            nn.ModuleDict({
                'attention': nn.MultiheadAttention(
                    embed_dim=hidden_dim*2,
                    num_heads=num_heads,
                    batch_first=True
                ),
                'norm': nn.LayerNorm(hidden_dim*2)
            }) for _ in range(3)
        ])
        self.ffn = nn.Sequential(
            nn.Linear(hidden_dim*2, ffn_hidden),
            nn.ReLU(),
            nn.Linear(ffn_hidden, hidden_dim*2)
        )
        self.post_norm = nn.LayerNorm(hidden_dim*2)

        # Classifier
        self.classifier = nn.Linear(hidden_dim*2, num_classes)
        nn.init.xavier_uniform_(self.classifier.weight)
        nn.init.zeros_(self.classifier.bias)

        # Regularization
        self.p_branch_mask = p_branch_mask
        self.feat_dropout = nn.Dropout(p_feat_dropout)

    def forward(self,
                imu_seq, imu_len,
                thm_seq, thm_len,
                tof_inputs, tof_len,
                tof_attention_masks,
                return_features=False):
        # Process IMU
        imu_feat = self.imu_branch(imu_seq, imu_len)

        # Process THM
        thm_feat = self.thm_branch(thm_seq, thm_len)

        # Process ToF sensors
        tof_feats = []
        for i, branch in enumerate(self.tof_branches):
            masked_tof = tof_inputs[i].clone()
            masked_tof[~tof_attention_masks[i]] = 0
            tof_feat = branch(masked_tof, tof_len)
            tof_feats.append(tof_feat)

        # Combine features
        tokens = [imu_feat, thm_feat] + tof_feats
        x = torch.stack(tokens, dim=1)

        # Branch masking
        attention_mask = None
        if self.training and self.p_branch_mask > 0:
            batch_mask = torch.bernoulli(
                torch.full((len(tokens),), 1 - self.p_branch_mask, device=x.device)
            )
            attention_mask = (~batch_mask.bool()).expand(x.size(0), -1)

        # Feature fusion
        x = self.pre_norm(x)
        x = self.feat_dropout(x)

        for attn_layer in self.attention_layers:
            attn_out, _ = attn_layer['attention'](
                x, x, x,
                key_padding_mask=attention_mask
            )
            x = x + attn_out
            x = attn_layer['norm'](x)

        if attention_mask is not None:
            mask_expanded = (~attention_mask).float().unsqueeze(-1)
            fused = (x * mask_expanded).sum(dim=1) / mask_expanded.sum(dim=1)
        else:
            fused = x.mean(dim=1)

        out = self.ffn(fused)
        out = self.post_norm(out)
        logits = self.classifier(out)

        if return_features:
            return logits, out
        return logits

In [8]:
import torch
import torch.nn as nn
import os


# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize model ensemble
model_ensemble1 = []
for i in range(1, 4):
    # Create model instance
    model = MultiSensorClassifier().to(device)
    
    # Load weights
    if i==1:
        model_path = '/kaggle/input/trained-model/best_model_fold1-9.pth'
    else:
        model_path = f"/kaggle/input/trained-model/best_model_fold{i}.pth"

    state_dict = torch.load(model_path, map_location=device)
    model.load_state_dict(state_dict)
    
    # Set to evaluation mode
    model.eval()
    model_ensemble1.append(model)

print(f"✅ Successfully loaded {len(model_ensemble1)} models")







✅ Successfully loaded 3 models


The evaluation API requires that you set up a server which will respond to inference requests. We have already defined the server; you just need write the predict function. When we evaluate your submission on the hidden test set the client defined in the gateway will run in a different container with direct access to the hidden test set and hand off the one sequence at a time.

Your code will always have access to the published copies of the files.

In [9]:
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    # Convert to pandas and sort
    df_seq = sequence.to_pandas().sort_values('sequence_counter')
    
    # Global references
    global model_ensemble, model_ensemble1, idx_to_gesture, imu_cols
    
    # --- Check sensor availability first ---
    # THM availability (check if any THM sensor has valid data)
    thm_available = False
    thm_cols = [f'thm_{i}' for i in range(1, 6)]
    if all(col in df_seq.columns for col in thm_cols):
        thm_data = df_seq[thm_cols].values.astype(np.float32)
        if not np.all(np.isnan(thm_data)):
            thm_available = True
    
    # ToF availability (check if any ToF sensor has valid data)
    tof_available = False
    for i in range(1, 6):
        tof_cols = [f'tof_{i}_v{j}' for j in range(64)]
        if all(col in df_seq.columns for col in tof_cols):
            tof_vals = df_seq[tof_cols].values.astype(np.float32)
            if not np.all(np.isnan(tof_vals)):
                tof_available = True
                break
    
    # Determine which model will be used
    use_full_model = thm_available and tof_available  # Set to (thm_available and tof_available) when ready
    
    # --- Feature Engineering ---
    # Replace NaN values with 0 in rotation columns
    rotation_cols = ['rot_x', 'rot_y', 'rot_z', 'rot_w']
    for col in rotation_cols:
        if col in df_seq.columns:
            df_seq[col] = df_seq[col].fillna(0)
    
    # Base features (needed by both models)
    df_seq['acc_mag'] = np.sqrt(df_seq['acc_x']**2 + df_seq['acc_y']**2 + df_seq['acc_z']**2).astype(np.float32)
    df_seq['rot_angle'] = (2 * np.arccos(df_seq['rot_w'].clip(-1, 1))).astype(np.float32)
    
    # Derivatives (needed by both models)
    df_seq['acc_mag_jerk'] = df_seq['acc_mag'].diff().fillna(0).astype(np.float32)
    df_seq['rot_angle_vel'] = df_seq['rot_angle'].diff().fillna(0).astype(np.float32)
    
    # Gravity removal (needed by both models)
    acc_data = df_seq[['acc_x', 'acc_y', 'acc_z']]
    rot_data = df_seq[['rot_x', 'rot_y', 'rot_z', 'rot_w']]
    linear_accel = remove_gravity_from_acc(acc_data, rot_data)
    df_seq[['linear_acc_x', 'linear_acc_y', 'linear_acc_z']] = linear_accel
    
    # Linear acceleration features (needed by both models)
    df_seq['linear_acc_mag'] = np.sqrt(df_seq['linear_acc_x']**2 + 
                                       df_seq['linear_acc_y']**2 + 
                                       df_seq['linear_acc_z']**2).astype(np.float32)
    df_seq['linear_acc_mag_jerk'] = df_seq['linear_acc_mag'].diff().fillna(0).astype(np.float32)
    
    # Angular velocity (needed by both models)
    angular_vel = calculate_angular_velocity_from_quat(rot_data)
    df_seq[['angular_vel_x', 'angular_vel_y', 'angular_vel_z']] = angular_vel
    
    # Angular distance (needed by both models)
    angular_dist = calculate_angular_distance(rot_data)
    df_seq['angular_distance'] = angular_dist.astype(np.float32)
    
    # --- Additional features only for IMU-only model ---
    if not use_full_model:
        # Second derivatives (jerk of jerk)
        df_seq['acc_x_jerk'] = df_seq['acc_x'].diff().fillna(0).astype(np.float32)
        df_seq['acc_y_jerk'] = df_seq['acc_y'].diff().fillna(0).astype(np.float32)
        df_seq['acc_z_jerk'] = df_seq['acc_z'].diff().fillna(0).astype(np.float32)
        df_seq['acc_x_jerk_jerk'] = df_seq['acc_x_jerk'].diff().fillna(0).astype(np.float32)
        df_seq['acc_y_jerk_jerk'] = df_seq['acc_y_jerk'].diff().fillna(0).astype(np.float32)
        df_seq['acc_z_jerk_jerk'] = df_seq['acc_z_jerk'].diff().fillna(0).astype(np.float32)
        
        # Angular velocity magnitude and jerk
        df_seq['angular_vel_mag'] = np.sqrt(df_seq['angular_vel_x']**2 + 
                                           df_seq['angular_vel_y']**2 + 
                                           df_seq['angular_vel_z']**2).astype(np.float32)
        df_seq['angular_vel_mag_jerk'] = df_seq['angular_vel_mag'].diff().fillna(0).astype(np.float32)
    
    # Ensure all numeric columns are float32
    for col in df_seq.select_dtypes(include=[np.number]).columns:
        df_seq[col] = df_seq[col].astype(np.float32)
    
    # Define IMU columns for FULL model (fewer features)
    # Full model uses: 7 base + 13 engineered = 20 features
    initial_imu_cols = ['acc_x', 'acc_y', 'acc_z', 'rot_x', 'rot_y', 'rot_z', 'rot_w']
    imu_cols_full = initial_imu_cols + [
        # Basic engineered features
        'acc_mag', 'rot_angle', 'acc_mag_jerk', 'rot_angle_vel',
        # Linear acceleration
        'linear_acc_x', 'linear_acc_y', 'linear_acc_z', 'linear_acc_mag', 'linear_acc_mag_jerk',
        # Angular features (only basic ones)
        'angular_vel_x', 'angular_vel_y', 'angular_vel_z',
        'angular_distance'
    ]
    
    # Define IMU columns for IMU-ONLY model
    # IMU-only model uses: 7 base + 21 engineered = 28 features
    imu_cols_imu_only = initial_imu_cols + [
        # Basic engineered features (4)
        'acc_mag', 'rot_angle', 'acc_mag_jerk', 'rot_angle_vel',
        # Jerk features (6)
        'acc_x_jerk', 'acc_y_jerk', 'acc_z_jerk',
        'acc_x_jerk_jerk', 'acc_y_jerk_jerk', 'acc_z_jerk_jerk',
        # Linear acceleration (5)
        'linear_acc_x', 'linear_acc_y', 'linear_acc_z', 'linear_acc_mag', 'linear_acc_mag_jerk',
        # Angular features (6)
        'angular_vel_x', 'angular_vel_y', 'angular_vel_z', 'angular_vel_mag', 'angular_vel_mag_jerk',
        'angular_distance'
    ]
    
    # Select which feature set to use based on model
    if use_full_model:
        imu_cols = imu_cols_full
    else:
        imu_cols = imu_cols_imu_only
    
    # --- Prepare IMU features ---
    # Ensure all IMU columns exist
    for col in imu_cols:
        if col not in df_seq:
            df_seq[col] = 0.0
            
    X = df_seq[imu_cols].values.astype(np.float32)
    X = np.nan_to_num(X, nan=0.0)
    T = X.shape[0]
    
    # --- Prepare tensors ---
    device = next(model_ensemble[0].parameters()).device
    
    # IMU tensor (used by both models)
    imu_tensor = torch.tensor(X, dtype=torch.float32).unsqueeze(0).to(device)  # (1, T, C)
    imu_len = torch.tensor([T]).to(device)
    
    # --- Model Selection & Inference ---
    if use_full_model:
        # --- Full model with all sensors ---
        # Prepare IMU data with only features used by full model
        X_full = df_seq[imu_cols_full].values.astype(np.float32)
        X_full = np.nan_to_num(X_full, nan=0.0)
        imu_tensor_full = torch.tensor(X_full, dtype=torch.float32).unsqueeze(0).to(device)  # (1, T, C)
        
        # Prepare THM data
        thm_data = np.zeros((T, 5), dtype=np.float32)
        for i, col in enumerate(thm_cols):
            if col in df_seq:
                thm_data[:, i] = df_seq[col].fillna(0).values
        
        # Prepare ToF data and masks
        tof_inputs = []
        tof_masks = []
        
        for i in range(1, 6):
            tof_cols = [f'tof_{i}_v{j}' for j in range(64)]
            arr = np.zeros((T, 64), dtype=np.float32)
            mask = np.zeros(T, dtype=bool)
            
            if all(col in df_seq.columns for col in tof_cols):
                tof_vals = df_seq[tof_cols].values.astype(np.float32)
                valid_mask = ~(np.isnan(tof_vals).all(axis=1) | (tof_vals == -1).all(axis=1))
                
                if np.any(valid_mask):
                    arr = tof_vals.copy()
                    arr[arr == -1] = 512  # Using 512 as your replacement value
                    arr = np.nan_to_num(arr, nan=1000)
                    mask = valid_mask
            
            tof_inputs.append(arr)
            tof_masks.append(mask)
        
        # Create tensors
        thm_tensor = torch.tensor(thm_data, dtype=torch.float32).unsqueeze(0).to(device)
        thm_len = torch.tensor([T]).to(device)
        tof_tensors = [torch.tensor(arr, dtype=torch.float32).unsqueeze(0).to(device)
                       for arr in tof_inputs]
        tof_mask_tensors = [torch.tensor(mask, dtype=torch.bool).unsqueeze(0).to(device)
                            for mask in tof_masks]
        tof_len = torch.tensor([T]).to(device)
        
        # Run full model ensemble inference
        all_logits = []
        for model in model_ensemble1:
            model.eval()
            with torch.no_grad():
                logits = model(
                    imu_tensor_full, imu_len,
                    thm_tensor, thm_len,
                    tof_tensors, tof_len,
                    tof_mask_tensors
                )
                all_logits.append(logits)
    else:
        # --- IMU-only model ---
        all_logits = []
        for model in model_ensemble:
            model.eval()
            with torch.no_grad():
                # New model expects (B, T, C) and handles transpose internally
                logits = model(imu_tensor, imu_len)   # shape (1, n_classes)
                all_logits.append(logits)

    # --- Ensemble Weighted Averaging ---
    stacked = torch.stack(all_logits)      # shape: (num_models, batch, n_classes)
    num_models = stacked.size(0)
    
    if num_models == 10:
        w = torch.tensor([0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], device=device)
    else:
        w = torch.tensor([0.33, 0.33, 0.33], device=device)  # pick whatever you like
    
    w = w.view(num_models, 1, 1)            # (num_models, 1, 1)
    avg_logits = (stacked * w).sum(dim=0)   # weighted sum → (batch, n_classes)
    pred_idx = avg_logits.argmax(1).item()
    
    # Timing and output
    print(f"Prediction: {idx_to_gesture[pred_idx]}")
    
    return idx_to_gesture[pred_idx]

In [10]:
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )

Prediction: Neck - scratch
Prediction: Eyelash - pull hair
