In [None]:
import os
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, GlobalMaxPooling1D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import polars as pl
import kaggle_evaluation.cmi_inference_server

# Set global seed for reproducibility
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("Imports loaded")

# Load Train Data

In [None]:
# Load the dataset
print("Loading dataset...")
df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
print(f"Loaded {len(df)} rows.")

# Encode gesture (our target)
* Save encodings to file (in case we want to use model in another notebook)

In [None]:
label_encoder = LabelEncoder()
df['gesture'] = label_encoder.fit_transform(df['gesture'].astype(str))

# Save class names for inference
np.save('gesture_classes.npy', label_encoder.classes_)

# Print class label mapping
print("Gesture label mapping:")
for idx, label in enumerate(label_encoder.classes_):
    print(f"  {idx}: {label}")

# 50% of Hidden Test Set is IMU-only!
* "Half of the hidden-test sequences are recorded with IMU only; the thermopile (thm_) and time-of-flight (tof__v*) columns are still present but contain null values for those sequences."
* https://www.kaggle.com/competitions/cmi-detect-behavior-with-sensor-data/data
* Is our train data like that?  **NO!**

In [None]:
print("Checking for IMU-only sequences...")

def check_for_imu_only_seqs():
    # Identify thermopile and TOF columns
    thermal_tof_cols = [col for col in df.columns if col.startswith('thm_') or col.startswith('tof_')]
    
    # Group by sequence and check if all thm_/tof_ values are null
    imu_only_flags = df[thermal_tof_cols].isna().groupby(df['sequence_id']).all().all(axis=1)
    
    # Report statistics
    total_sequences = df['sequence_id'].nunique()
    imu_only_count = imu_only_flags.sum()
    imu_only_pct = (imu_only_count / total_sequences) * 100
    
    print(f"Total sequences: {total_sequences}")
    print(f"IMU-only sequences (all thm_/tof_ null): {imu_only_count} ({imu_only_pct:.1f}%)")

check_for_imu_only_seqs()

# Define Feature Columns (IMU-only)
* Exclude train-only data

In [None]:
excluded_cols = {
    'gesture', 'sequence_type', 'behavior', 'orientation',  # train-only
    'row_id', 'subject', 'phase',  # metadata
    'sequence_id', 'sequence_counter'  # identifiers
}

feature_cols = [c for c in df.columns if c not in excluded_cols]
imu_cols = [c for c in feature_cols if not (c.startswith("thm_") or c.startswith("tof_"))]
tof_cols = [c for c in feature_cols if c.startswith("thm_") or c.startswith("tof_")]

# Setting this true makes model ignore thermal and tof data
drop_thermal_and_tof = False

if drop_thermal_and_tof:
    thermal_tof_cols = [col for col in df.columns if col.startswith('thm_') or col.startswith('tof_')]
    excluded_cols.update(thermal_tof_cols)
    print(f"Ignoring {len(thermal_tof_cols)} thermopile / time-of-flight columns.")

# Select numeric feature columns
# imu_cols = [col for col in df.columns if col not in excluded_cols]
print(f"Using {len(imu_cols)} numeric feature columns for training:")
print(imu_cols)

# Check for Missing Values
* Yup - there are a bunch - we'll take care of these...

In [None]:
# Check for NaNs in selected feature columns
nan_counts = df[imu_cols].isna().sum()
total_nans = nan_counts.sum()
print(f"\nTotal missing values in feature columns: {total_nans}")
if total_nans > 0:
    print("Columns with missing values:")
    print(nan_counts[nan_counts > 0])
else:
    print("No missing values found in feature columns.")

# Pre-process data: scale and simple impute
* Fill missing values: forward-fill, then back-fill, then fill remaining with 0
* Scale features to zero mean and unit variance
* We call this function as part of building input sequences and doing inference

In [None]:
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

def preprocess_sequence(df_sequence: pd.DataFrame, imu_cols: list) -> np.ndarray:
    # 1. 取原始特徵與處理缺值
    data = df_sequence[imu_cols].copy().ffill().bfill().fillna(0)
    
    # 2. 標準化
    scaler = StandardScaler()
    scaled = scaler.fit_transform(data)

    # 3. 原始統計特徵
    mean = np.mean(scaled, axis=1, keepdims=True)
    std = np.std(scaled, axis=1, keepdims=True)
    max_val = np.max(scaled, axis=1, keepdims=True)
    min_val = np.min(scaled, axis=1, keepdims=True)

    # 4. 特徵強化：差分特徵
    delta = np.diff(scaled, axis=0, prepend=scaled[[0]])

    # 5. 特徵強化：移動平均
    def moving_average(arr, window=3):
        ret = np.cumsum(arr, axis=0)
        ret[window:] = ret[window:] - ret[:-window]
        return np.vstack([arr[:window-1], ret[window-1:] / window])
    mov_avg = moving_average(scaled, window=3)

    # 6. 特徵強化：range/std
    range_val = max_val - min_val
    range_std_ratio = range_val / (std + 1e-6)

    # 7. 合併所有特徵
    features = np.concatenate([
        scaled,          # 原始標準化特徵
        mean, std, max_val, min_val,  # 統計量
        delta,           # 差分
        mov_avg,         # 移動平均
        range_std_ratio  # 變異程度
    ], axis=1)

    return features  # shape: (T, original_dim + 強化特徵數)




# Build input sequences
* All sequences must be the same length to train the model
* Pad or truncate to a length that covers 90% of sequence lengths
* This defines the fixed input size for our 1D CNN

In [None]:
# Build sequences
sequence_ids = df['sequence_id'].unique()
sequences = df.groupby('sequence_id')

X = []
# X_2 = []
seq_lengths = []

print("Building sequences...")
for i, (seq_id, seq) in enumerate(sequences):
    if i % 500 == 0:
        print(f"Processing sequence {i}...")
    processed = preprocess_sequence(seq, imu_cols)
    # processed_2 = preprocess_sequence(seq, tof_cols)
    X.append(processed)
    # X_2.append(processed_2)
    seq_lengths.append(processed.shape[0])

max_len_perentile = 90

# Report sequence length stats
minlen = min(seq_lengths)
avglen = int(np.mean(seq_lengths))
pad_len_to_use = int(np.percentile(seq_lengths, max_len_perentile))  
print(f"Sequence length stats - Min: {minlen}, Avg: {avglen}, {max_len_perentile}th percentile: {pad_len_to_use}")
print(f"Padding / truncating all sequences to fixed length {pad_len_to_use}...")

np.save("sequence_maxlen.npy", pad_len_to_use)  # Save for inference

# Pad/truncate to fixed length
X = pad_sequences(X, maxlen=pad_len_to_use, dtype='float32', padding='post', truncating='post')
# X_2 = pad_sequences(X_2, maxlen=pad_len_to_use, dtype='float32', padding='post', truncating='post')

# Prepare target labels as one-hot vectors
* Use the first gesture label from each sequence as its target class (already converted to integer earlier)
* Convert integer class labels to one-hot vectors for training (required for categorical cross-entropy)
* The number of unique labels defines the model's output layer size

In [None]:
# Use groupby to get the first gesture per sequence (already integer-encoded)
y = df.groupby('sequence_id')['gesture'].first().values

print("Integer labels:", y[:4])

# Convert to one-hot vectors
num_classes = len(np.unique(y))
y = to_categorical(y, num_classes=num_classes)

print("After one-hot encoding:", y[:4])

# Train / test split

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=SEED)

# Build, compile, and train 1D CNN model
* Use Conv1D layers to extract temporal patterns from sensor sequences
* Each Conv1D layer uses:
  - `kernel_size` to define the number of time steps it looks at (temporal window)
  - `filters` to define how many distinct patterns it tries to learn at each layer
* Apply max pooling and dropout for regularization and dimensionality reduction
* Flatten and pass through dense layers for final classification
* Save trained model to disk for inference

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Conv1D, MaxPooling1D, Dropout, Dense,
    GlobalMaxPooling1D, BatchNormalization, LayerNormalization,
    MultiHeadAttention, Add, Concatenate
)
from tensorflow.keras.models import Model

def build_transformer_multiwindow_cnn_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)  # (sequence_len, features)

    # --- Transformer Block ---
    attn_output = MultiHeadAttention(num_heads=4, key_dim=input_shape[-1])(inputs, inputs)
    attn_output = Dropout(0.1)(attn_output)
    out1 = LayerNormalization(epsilon=1e-6)(Add()([inputs, attn_output]))

    ffn = tf.keras.Sequential([
        Dense(256, activation="relu"),
        Dense(input_shape[-1])  # residual match
    ])
    ffn_output = ffn(out1)
    ffn_output = Dropout(0.1)(ffn_output)
    x = LayerNormalization(epsilon=1e-6)(Add()([out1, ffn_output]))

    # --- Multi-Window CNN Block ---
    convs = []
    kernel_sizes = [2, 4, 6, 8, 10]  # four parallel CNNs with different window sizes
    for k in kernel_sizes:
        conv = Conv1D(filters=256, kernel_size=k, padding='same', activation='relu')(x)
        conv = BatchNormalization()(conv)
        conv = MaxPooling1D(pool_size=2)(conv)
        conv = Dropout(0.3)(conv)
        convs.append(conv)

    # Concatenate multi-window outputs
    x = Concatenate()(convs)

    # Refinement Conv layer (1x1 conv)
    x = Conv1D(512, kernel_size=1, padding='same', activation='relu')(x)
    x = BatchNormalization()(x)
    x = Dropout(0.4)(x)

    # --- Dense Layers ---
    x = GlobalMaxPooling1D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.4)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    return model

# Usage
input_shape = X_train.shape[1:]
num_classes = y_train.shape[-1]
model = build_transformer_multiwindow_cnn_model(input_shape=input_shape, num_classes=num_classes)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Callbacks
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=4, min_lr=1e-6, verbose=1)

# Training
model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=64,
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, reduce_lr]
)

model.save("transformer_multiwindow_cnn_model.h5")



# Use Competition Metric for LB Estimate
* We import: https://www.kaggle.com/code/richolson/cmi-2025-metric-copy-for-import
* Which is a copy of: https://www.kaggle.com/code/metric/cmi-2025

In [None]:
from cmi_2025_metric_copy_for_import import CompetitionMetric

# Get predicted labels for the validation set
print("Predicting on validation set...")
y_val_pred_probs = model.predict(X_val, verbose=0)
y_val_pred = np.argmax(y_val_pred_probs, axis=1)
y_val_true = np.argmax(y_val, axis=1)

# Map integer labels back to gesture strings
gesture_classes = np.load("gesture_classes.npy", allow_pickle=True)
val_pred_labels = pd.Series(y_val_pred).map(lambda i: gesture_classes[i])
val_true_labels = pd.Series(y_val_true).map(lambda i: gesture_classes[i])

# Build DataFrames for the metric
val_submission = pd.DataFrame({'gesture': val_pred_labels})
val_solution = pd.DataFrame({'gesture': val_true_labels})

# Run competition metric
metric = CompetitionMetric()
score = metric.calculate_hierarchical_f1(val_solution, val_submission)
print(f"Estimated leaderboard (val) score: {score:.4f}")

# Predict function for inference server
* Runs same preprocess_sequence function on inference data as we did for training
* Pads / truncates to sequences to same length as for training
* Loads trained model and predicts gesture class
* Maps predicted index back to original gesture label

In [None]:
model = load_model("transformer_multiwindow_cnn_model.h5")
def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    df_seq = sequence.to_pandas()
    processed = preprocess_sequence(df_seq, imu_cols)
    maxlen = int(np.load("sequence_maxlen.npy"))  # ensure consistent shape
    padded = pad_sequences([processed], maxlen=maxlen, dtype='float32', padding='post', truncating='post')
    prediction = model.predict(padded, verbose=0)
    predicted_index = np.argmax(prediction, axis=1)[0]
    gesture_classes = np.load("gesture_classes.npy", allow_pickle=True)
    return gesture_classes[predicted_index]

# Launch inference server
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )

# Verify prediction function

In [None]:
# Manual test (only runs outside Kaggle gateway)
if not os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print("\nRunning manual test...")
    test_df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv')
    sample_seq_id = test_df['sequence_id'].unique()[0]
    test_seq = test_df[test_df['sequence_id'] == sample_seq_id]
    prediction = predict(pl.DataFrame(test_seq), None)
    print(f"Manual prediction result for sequence_id {sample_seq_id}: {prediction}")