# Let's try using a 1D CNN!

A friend told me they are good for dealing with time-series data like this.

### 50% of Hidden Test Set is IMU-only!
* "Half of the hidden-test sequences are recorded with IMU only; the thermopile (thm_) and time-of-flight (tof__v*) columns are still present but contain null values for those sequences."
* https://www.kaggle.com/competitions/cmi-detect-behavior-with-sensor-data/data

Easy approach: **Just use the IMU data.**

Also ignoring demographics data for now.

*Friendly Reminder: If re-using large parts of this work in a public notebook - **please credit where you found the code**.*

In [1]:
import os
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, GlobalMaxPooling1D, BatchNormalization
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

from tensorflow.keras.preprocessing.sequence import pad_sequences
import tensorflow as tf
import polars as pl
import kaggle_evaluation.cmi_inference_server

# Set global seed for reproducibility
SEED = 42
os.environ['PYTHONHASHSEED'] = str(SEED)
random.seed(SEED)
np.random.seed(SEED)
tf.random.set_seed(SEED)

print("Imports loaded")

2025-06-09 15:53:29.405088: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749484409.603996      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749484409.662094      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


Imports loaded


# Load Train Data

In [2]:
# Load the dataset
print("Loading dataset...")
df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
print(f"Loaded {len(df)} rows.")

Loading dataset...
Loaded 574945 rows.


# Encode gesture (our target)
* Save encodings to file (in case we want to use model in another notebook)

In [3]:
label_encoder = LabelEncoder()
df['gesture'] = label_encoder.fit_transform(df['gesture'].astype(str))

# Save class names for inference
np.save('gesture_classes.npy', label_encoder.classes_)

# Print class label mapping
print("Gesture label mapping:")
for idx, label in enumerate(label_encoder.classes_):
    print(f"  {idx}: {label}")

Gesture label mapping:
  0: Above ear - pull hair
  1: Cheek - pinch skin
  2: Drink from bottle/cup
  3: Eyebrow - pull hair
  4: Eyelash - pull hair
  5: Feel around in tray and pull out an object
  6: Forehead - pull hairline
  7: Forehead - scratch
  8: Glasses on/off
  9: Neck - pinch skin
  10: Neck - scratch
  11: Pinch knee/leg skin
  12: Pull air toward your face
  13: Scratch knee/leg skin
  14: Text on phone
  15: Wave hello
  16: Write name in air
  17: Write name on leg


# 50% of Hidden Test Set is IMU-only!
* "Half of the hidden-test sequences are recorded with IMU only; the thermopile (thm_) and time-of-flight (tof__v*) columns are still present but contain null values for those sequences."
* https://www.kaggle.com/competitions/cmi-detect-behavior-with-sensor-data/data
* Is our train data like that?  **NO!**

In [4]:
print("Checking for IMU-only sequences...")

def check_for_imu_only_seqs():
    # Identify thermopile and TOF columns
    thermal_tof_cols = [col for col in df.columns if col.startswith('thm_') or col.startswith('tof_')]
    
    # Group by sequence and check if all thm_/tof_ values are null
    imu_only_flags = df[thermal_tof_cols].isna().groupby(df['sequence_id']).all().all(axis=1)
    
    # Report statistics
    total_sequences = df['sequence_id'].nunique()
    imu_only_count = imu_only_flags.sum()
    imu_only_pct = (imu_only_count / total_sequences) * 100
    
    print(f"Total sequences: {total_sequences}")
    print(f"IMU-only sequences (all thm_/tof_ null): {imu_only_count} ({imu_only_pct:.1f}%)")

check_for_imu_only_seqs()

Checking for IMU-only sequences...
Total sequences: 8151
IMU-only sequences (all thm_/tof_ null): 96 (1.2%)


# Define Feature Columns (IMU-only)
* Exclude train-only data

In [5]:
excluded_cols = {
    'gesture', 'sequence_type', 'behavior', 'orientation',  # train-only
    'row_id', 'subject', 'phase',  # metadata
    'sequence_id', 'sequence_counter'  # identifiers
}

# Setting this true makes model ignore thermal and tof data
drop_thermal_and_tof = True

if drop_thermal_and_tof:
    thermal_tof_cols = [col for col in df.columns if col.startswith('thm_') or col.startswith('tof_')]
    excluded_cols.update(thermal_tof_cols)
    print(f"Ignoring {len(thermal_tof_cols)} thermopile / time-of-flight columns.")

# Select numeric feature columns
feature_cols = [col for col in df.columns if col not in excluded_cols]
print(f"Using {len(feature_cols)} numeric feature columns for training:")
print(feature_cols)

Ignoring 325 thermopile / time-of-flight columns.
Using 7 numeric feature columns for training:
['acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z']


# Check for Missing Values
* Yup - there are a bunch - we'll take care of these...

In [6]:
# Check for NaNs in selected feature columns
nan_counts = df[feature_cols].isna().sum()
total_nans = nan_counts.sum()
print(f"\nTotal missing values in feature columns: {total_nans}")
if total_nans > 0:
    print("Columns with missing values:")
    print(nan_counts[nan_counts > 0])
else:
    print("No missing values found in feature columns.")


Total missing values in feature columns: 14768
Columns with missing values:
rot_w    3692
rot_x    3692
rot_y    3692
rot_z    3692
dtype: int64


# Pre-process data: scale and simple impute
* Fill missing values: forward-fill, then back-fill, then fill remaining with 0
* Scale features to zero mean and unit variance
* We call this function as part of building input sequences and doing inference

In [7]:
def preprocess_sequence(df_sequence: pd.DataFrame, feature_cols: list) -> np.ndarray:
    data = df_sequence[feature_cols].copy()
    data = data.ffill().bfill().fillna(0)
    scaled = StandardScaler().fit_transform(data)
    return scaled

# Build input sequences
* All sequences must be the same length to train the model
* Pad or truncate to a length that covers 90% of sequence lengths
* This defines the fixed input size for our 1D CNN

In [8]:
# Build sequences
sequence_ids = df['sequence_id'].unique()
sequences = df.groupby('sequence_id')

X = []
seq_lengths = []

print("Building sequences...")
for i, (seq_id, seq) in enumerate(sequences):
    if i % 500 == 0:
        print(f"Processing sequence {i}...")
    processed = preprocess_sequence(seq, feature_cols)
    X.append(processed)
    seq_lengths.append(processed.shape[0])

max_len_perentile = 90

# Report sequence length stats
minlen = min(seq_lengths)
avglen = int(np.mean(seq_lengths))
pad_len_to_use = int(np.percentile(seq_lengths, max_len_perentile))  
print(f"Sequence length stats - Min: {minlen}, Avg: {avglen}, {max_len_perentile}th percentile: {pad_len_to_use}")
print(f"Padding / truncating all sequences to fixed length {pad_len_to_use}...")

np.save("sequence_maxlen.npy", pad_len_to_use)  # Save for inference

# Pad/truncate to fixed length
X = pad_sequences(X, maxlen=pad_len_to_use, dtype='float32', padding='post', truncating='post')

Building sequences...
Processing sequence 0...
Processing sequence 500...
Processing sequence 1000...
Processing sequence 1500...
Processing sequence 2000...
Processing sequence 2500...
Processing sequence 3000...
Processing sequence 3500...
Processing sequence 4000...
Processing sequence 4500...
Processing sequence 5000...
Processing sequence 5500...
Processing sequence 6000...
Processing sequence 6500...
Processing sequence 7000...
Processing sequence 7500...
Processing sequence 8000...
Sequence length stats - Min: 29, Avg: 70, 90th percentile: 103
Padding / truncating all sequences to fixed length 103...


# Prepare target labels as one-hot vectors
* Use the first gesture label from each sequence as its target class (already converted to integer earlier)
* Convert integer class labels to one-hot vectors for training (required for categorical cross-entropy)
* The number of unique labels defines the model's output layer size

In [9]:
# Use groupby to get the first gesture per sequence (already integer-encoded)
y = df.groupby('sequence_id')['gesture'].first().values

print("Integer labels:", y[:4])

# Convert to one-hot vectors
num_classes = len(np.unique(y))
y = to_categorical(y, num_classes=num_classes)

print("After one-hot encoding:", y[:4])

Integer labels: [ 1  6  1 17]
After one-hot encoding: [[0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]


# Train / test split

In [10]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=SEED)

# Build, compile, and train 1D CNN model
* Use Conv1D layers to extract temporal patterns from sensor sequences
* Each Conv1D layer uses:
  - `kernel_size` to define the number of time steps it looks at (temporal window)
  - `filters` to define how many distinct patterns it tries to learn at each layer
* Apply max pooling and dropout for regularization and dimensionality reduction
* Flatten and pass through dense layers for final classification
* Save trained model to disk for inference

In [11]:
%%time
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.layers import BatchNormalization

# Build 1D CNN model

model = Sequential([
    # Block 1 
    Conv1D(filters=512, kernel_size=7, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    
    # Block 2 
    Conv1D(filters=768, kernel_size=5, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    
    # Block 3
    Conv1D(filters=1024, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.4),
    
    # Block 4
    Conv1D(filters=1536, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.4),
    
    # Block 5
    Conv1D(filters=2048, kernel_size=3, activation='relu'),
    BatchNormalization(),
    GlobalMaxPooling1D(),  # Alternative to MaxPooling1D + Flatten
    Dropout(0.5),
    
    # Larger Dense layers
    Dense(2048, activation='relu'),
    Dropout(0.5),
    Dense(1024, activation='relu'),
    Dropout(0.4),
    Dense(512, activation='relu'),
    Dropout(0.3),
    
    # Output
    Dense(num_classes, activation='softmax')
])

# Compile model with categorical crossentropy loss (for one-hot labels)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Define early stopping to prevent overfitting and restore best model
early_stopping = EarlyStopping(
    monitor='val_accuracy',           
    patience=10,
    restore_best_weights=True,
    verbose=1
)

reduce_lr = ReduceLROnPlateau(
    monitor='val_accuracy',      
    factor=0.7,                 
    patience=3,                  
    min_lr=1e-7,                 
    verbose=1
)

# Train model using explicitly split validation set (80/20 held out)
print("Training model...")
model.fit(
    X_train, y_train,
    epochs=150,                  
    batch_size=64,              
    validation_data=(X_val, y_val),
    callbacks=[early_stopping, reduce_lr]
)

model.save("gesture_cnn_model.h5")
print("Training complete.")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1749484471.296249      19 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 15513 MB memory:  -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0


Training model...
Epoch 1/150


I0000 00:00:1749484480.060656      58 service.cc:148] XLA service 0x7ed784020460 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1749484480.061256      58 service.cc:156]   StreamExecutor device (0): Tesla P100-PCIE-16GB, Compute Capability 6.0
I0000 00:00:1749484480.817288      58 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-06-09 15:54:45.220523: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng4{k11=2} for conv (f32[64,512,1,48]{3,2,1,0}, u8[0]{0}) custom-call(f32[64,768,1,44]{3,2,1,0}, f32[768,512,1,5]{3,2,1,0}), window={size=1x5}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBackwardInput", backend_config={"cudnn_conv_backend_config":{"activation_mode":"kNone","conv_result_scale":1,"leakyrelu_alpha":0,"side_input_scale":0},"force_earliest_schedule":false,"operation_queue_id":"0","wait_on_operation_queues":[]} is taking a while...
2025-06-09 15:54:45.558259: E external/local_xla/x

[1m  5/102[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2s[0m 30ms/step - accuracy: 0.0630 - loss: 5.0687

I0000 00:00:1749484492.160399      58 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 150ms/step - accuracy: 0.1038 - loss: 3.4446 - val_accuracy: 0.0822 - val_loss: 3.0649 - learning_rate: 0.0010
Epoch 2/150
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.1772 - loss: 2.5173 - val_accuracy: 0.0822 - val_loss: 2.8166 - learning_rate: 0.0010
Epoch 3/150
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.2230 - loss: 2.3197 - val_accuracy: 0.1140 - val_loss: 2.6389 - learning_rate: 0.0010
Epoch 4/150
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.2387 - loss: 2.2127 - val_accuracy: 0.2029 - val_loss: 2.3881 - learning_rate: 0.0010
Epoch 5/150
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 30ms/step - accuracy: 0.2603 - loss: 2.1397 - val_accuracy: 0.2869 - val_loss: 2.0161 - learning_rate: 0.0010
Epoch 6/150
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1

# Use Competition Metric for LB Estimate
* We import: https://www.kaggle.com/code/richolson/cmi-2025-metric-copy-for-import
* Which is a copy of: https://www.kaggle.com/code/metric/cmi-2025

In [12]:
from cmi_2025_metric_copy_for_import import CompetitionMetric

# Get predicted labels for the validation set
print("Predicting on validation set...")
y_val_pred_probs = model.predict(X_val, verbose=0)
y_val_pred = np.argmax(y_val_pred_probs, axis=1)
y_val_true = np.argmax(y_val, axis=1)

# Map integer labels back to gesture strings
gesture_classes = np.load("gesture_classes.npy", allow_pickle=True)
val_pred_labels = pd.Series(y_val_pred).map(lambda i: gesture_classes[i])
val_true_labels = pd.Series(y_val_true).map(lambda i: gesture_classes[i])

# Build DataFrames for the metric
val_submission = pd.DataFrame({'gesture': val_pred_labels})
val_solution = pd.DataFrame({'gesture': val_true_labels})

# Run competition metric
metric = CompetitionMetric()
score = metric.calculate_hierarchical_f1(val_solution, val_submission)
print(f"Estimated leaderboard (val) score: {score:.4f}")

Predicting on validation set...
Estimated leaderboard (val) score: 0.6754


# Predict function for inference server
* Runs same preprocess_sequence function on inference data as we did for training
* Pads / truncates to sequences to same length as for training
* Loads trained model and predicts gesture class
* Maps predicted index back to original gesture label

In [13]:
# loading model and setup outside of predict function to reduce overhead
model = load_model("gesture_cnn_model.h5")
maxlen = int(np.load("sequence_maxlen.npy"))  # ensure consistent shape
gesture_classes = np.load("gesture_classes.npy", allow_pickle=True)

def predict(sequence: pl.DataFrame, demographics: pl.DataFrame) -> str:
    df_seq = sequence.to_pandas()
    processed = preprocess_sequence(df_seq, feature_cols)
    padded = pad_sequences([processed], maxlen=maxlen, dtype='float32', padding='post', truncating='post')
    prediction = model.predict(padded, verbose=0)
    predicted_index = np.argmax(prediction, axis=1)[0]
    return gesture_classes[predicted_index]

# Launch inference server
inference_server = kaggle_evaluation.cmi_inference_server.CMIInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        data_paths=(
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv',
            '/kaggle/input/cmi-detect-behavior-with-sensor-data/test_demographics.csv',
        )
    )

# Verify prediction function

In [14]:
# Manual test (only runs outside Kaggle gateway)
if not os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    print("\nRunning manual test...")
    test_df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/test.csv')
    sample_seq_id = test_df['sequence_id'].unique()[0]
    test_seq = test_df[test_df['sequence_id'] == sample_seq_id]
    prediction = predict(pl.DataFrame(test_seq), None)
    print(f"Manual prediction result for sequence_id {sample_seq_id}: {prediction}")


Running manual test...
Manual prediction result for sequence_id SEQ_000001: Eyebrow - pull hair
