In [5]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score

from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv1D, Conv2D, MaxPooling1D, MaxPooling2D,
    Flatten, Dense, Dropout, TimeDistributed, GlobalMaxPooling1D,
    GlobalMaxPooling2D, concatenate, BatchNormalization
)
from tensorflow.keras.optimizers import Adam

# For reproducibility
SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)


2025-07-03 21:11:24.101503: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-03 21:11:24.110628: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751559084.121766   11326 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751559084.125188   11326 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1751559084.133479   11326 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

In [6]:
# Load
df = pd.read_csv("train.csv")

# Replace -1 with NaN and fill
df.replace(-1, np.nan, inplace=True)
df.fillna(method='ffill', inplace=True)
df.fillna(method='bfill', inplace=True)

# Config
time_steps = 100
imu_channels = ['acc_x', 'acc_y', 'acc_z', 'rot_x', 'rot_y', 'rot_z', 'thm_1', 'thm_2', 'thm_3']
tof_sensors = [f'tof_{i}_v{v}' for i in range(1, 6) for v in [0, 7, 56, 63]]

X_imu, X_tof, y_binary, y_gesture = [], [], [], []

# Binary target: Target vs Non-Target
binary_map = df.groupby('sequence_id')['sequence_type'].first().map(lambda x: 1 if x == 'Target' else 0)
gesture_labels = df[df['sequence_type'] == 'Target'].groupby('sequence_id')['gesture'].first()
gesture_encoder = LabelEncoder()
gesture_encoded = pd.Series(gesture_encoder.fit_transform(gesture_labels), index=gesture_labels.index)

for seq_id in tqdm(df['sequence_id'].unique()):
    group = df[df['sequence_id'] == seq_id]

    imu_data = group[imu_channels].values
    tof_data = group[tof_sensors].values.reshape(-1, 5, 4)

    # Pad or truncate
    if len(imu_data) >= time_steps:
        imu_data = imu_data[:time_steps]
        tof_data = tof_data[:time_steps]
    else:
        pad_len = time_steps - len(imu_data)
        imu_data = np.pad(imu_data, ((0, pad_len), (0, 0)), mode='constant')
        tof_data = np.pad(tof_data, ((0, pad_len), (0, 0), (0, 0)), mode='constant')

    yb = binary_map.get(seq_id, 0)
    yg = gesture_encoded.get(seq_id, -1)

    X_imu.append(imu_data)
    X_tof.append(tof_data[..., np.newaxis])
    y_binary.append(yb)
    y_gesture.append(yg)

# Convert to arrays
X_imu = np.array(X_imu)
X_tof = np.array(X_tof)
y_binary = np.array(y_binary)
y_gesture = np.array(y_gesture)

# Keep only valid gesture entries
mask = y_gesture >= 0
X_imu = X_imu[mask]
X_tof = X_tof[mask]
y_binary = y_binary[mask]
y_gesture = y_gesture[mask]


  df.fillna(method='ffill', inplace=True)
  df.fillna(method='bfill', inplace=True)
100%|███████████████████████████████████████| 8151/8151 [02:41<00:00, 50.45it/s]


In [9]:
# Normalize IMU
imu_flat = X_imu.reshape(-1, len(imu_channels))
scaler_imu = StandardScaler()
imu_scaled = scaler_imu.fit_transform(imu_flat).reshape(X_imu.shape)

# Normalize TOF
tof_flat = X_tof.reshape(-1, 5 * 4)
scaler_tof = StandardScaler()
tof_scaled = scaler_tof.fit_transform(tof_flat).reshape(X_tof.shape)

X_imu = imu_scaled
X_tof = tof_scaled


In [10]:
# IMU Input (1D CNN)
imu_input = Input(shape=(time_steps, len(imu_channels)), name="imu_input")
x1 = Conv1D(64, 5, activation='relu')(imu_input)
x1 = BatchNormalization()(x1)
x1 = MaxPooling1D(2)(x1)
x1 = Conv1D(128, 3, activation='relu')(x1)
x1 = BatchNormalization()(x1)
x1 = GlobalMaxPooling1D()(x1)

# TOF Input (2D CNN via TimeDistributed)
tof_input = Input(shape=(time_steps, 5, 4, 1), name="tof_input")
x2 = TimeDistributed(Conv2D(32, (3, 3), activation='relu', padding='same'))(tof_input)
x2 = TimeDistributed(BatchNormalization())(x2)
x2 = TimeDistributed(MaxPooling2D((2, 2)))(x2)
x2 = TimeDistributed(Flatten())(x2)
x2 = GlobalMaxPooling1D()(x2)

# Merge
merged = concatenate([x1, x2])
x = Dense(128, activation='relu')(merged)
x = Dropout(0.3)(x)

# Outputs
binary_output = Dense(1, activation='sigmoid', name="binary_output")(x)
gesture_output = Dense(len(gesture_encoder.classes_), activation='softmax', name="gesture_output")(x)

# Compile
model = Model(inputs=[imu_input, tof_input], outputs=[binary_output, gesture_output])
optimizer = Adam(learning_rate=1e-4, clipvalue=1.0)
model.compile(
    optimizer=optimizer,
    loss={
        'binary_output': 'binary_crossentropy',
        'gesture_output': 'sparse_categorical_crossentropy'
    },
    metrics={
        'binary_output': 'accuracy',
        'gesture_output': 'accuracy'
    }
)
model.summary()


In [11]:
# Split data
X_imu_train, X_imu_val, X_tof_train, X_tof_val, yb_train, yb_val, yg_train, yg_val = train_test_split(
    X_imu, X_tof, y_binary, y_gesture, stratify=y_binary, test_size=0.2, random_state=42
)

# Train
history = model.fit(
    x={'imu_input': X_imu_train, 'tof_input': X_tof_train},
    y={'binary_output': yb_train, 'gesture_output': yg_train},
    validation_data=(
        {'imu_input': X_imu_val, 'tof_input': X_tof_val},
        {'binary_output': yb_val, 'gesture_output': yg_val}
    ),
    epochs=50,
    batch_size=32
)


Epoch 1/50


I0000 00:00:1751559285.118742   12006 service.cc:152] XLA service 0x79f9bc006670 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1751559285.118758   12006 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce GTX 1650, Compute Capability 7.5
2025-07-03 21:14:45.775245: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1751559287.977838   12006 cuda_dnn.cc:529] Loaded cuDNN version 90300
2025-07-03 21:14:50.531884: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.308 = (f32[32,32,5,4]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,1,5,4]{3,2,1,0} %bitcast.71516, f32[32,1,3,3]{3,2,1,0} %bitcast.71523, f32[32]{0} %bitcast.101332), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="_

[1m127/128[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 161ms/step - binary_output_accuracy: 0.8941 - binary_output_loss: 0.2909 - gesture_output_accuracy: 0.1324 - gesture_output_loss: 4.0294 - loss: 4.3203

2025-07-03 21:16:07.899444: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.308 = (f32[26,32,5,4]{3,2,1,0}, u8[0]{0}) custom-call(f32[26,1,5,4]{3,2,1,0} %bitcast.71516, f32[32,1,3,3]{3,2,1,0} %bitcast.71523, f32[32]{0} %bitcast.101332), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", metadata={op_type="Conv2D" op_name="functional_1_1/time_distributed_4_1/convolution" source_file="/home/daniyal/miniconda3/envs/tf_gpu/lib/python3.10/site-packages/tensorflow/python/framework/ops.py" source_line=1200}, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kNone","side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false}



[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 518ms/step - binary_output_accuracy: 0.8946 - binary_output_loss: 0.2894 - gesture_output_accuracy: 0.1325 - gesture_output_loss: 4.0236 - loss: 4.3131

2025-07-03 21:16:51.013238: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_picker.cc:549] Omitted potentially buggy algorithm eng14{k25=0} for conv %cudnn-conv-bias-activation.308 = (f32[32,32,5,4]{3,2,1,0}, u8[0]{0}) custom-call(f32[32,1,5,4]{3,2,1,0} %bitcast.8805, f32[32,1,3,3]{3,2,1,0} %bitcast.8812, f32[32]{0} %bitcast.12161), window={size=3x3 pad=1_1x1_1}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convBiasActivationForward", metadata={op_type="Conv2D" op_name="functional_1_1/time_distributed_4_1/convolution" source_file="/home/daniyal/miniconda3/envs/tf_gpu/lib/python3.10/site-packages/tensorflow/python/framework/ops.py" source_line=1200}, backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_backend_config":{"conv_result_scale":1,"activation_mode":"kRelu","side_input_scale":0,"leakyrelu_alpha":0},"force_earliest_schedule":false}
2025-07-03 21:16:53.207350: I external/local_xla/xla/service/gpu/autotuning/conv_algorithm_pi

[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m139s[0m 567ms/step - binary_output_accuracy: 0.8952 - binary_output_loss: 0.2881 - gesture_output_accuracy: 0.1325 - gesture_output_loss: 4.0179 - loss: 4.3060 - val_binary_output_accuracy: 1.0000 - val_binary_output_loss: 0.0890 - val_gesture_output_accuracy: 0.1398 - val_gesture_output_loss: 2.4484 - val_loss: 2.5372
Epoch 2/50
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 163ms/step - binary_output_accuracy: 0.9990 - binary_output_loss: 0.0393 - gesture_output_accuracy: 0.1598 - gesture_output_loss: 2.4629 - loss: 2.5022 - val_binary_output_accuracy: 1.0000 - val_binary_output_loss: 0.0469 - val_gesture_output_accuracy: 0.1535 - val_gesture_output_loss: 2.1533 - val_loss: 2.2002
Epoch 3/50
[1m128/128[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 163ms/step - binary_output_accuracy: 0.9997 - binary_output_loss: 0.0400 - gesture_output_accuracy: 0.2083 - gesture_output_loss: 2.1434 - loss: 2.183

In [12]:
# Predict
binary_preds = model.predict({'imu_input': X_imu_val, 'tof_input': X_tof_val})[0]
gesture_preds = model.predict({'imu_input': X_imu_val, 'tof_input': X_tof_val})[1]

# Binary classification scores
binary_preds_class = (binary_preds > 0.5).astype(int)
print("✅ Binary Accuracy:", accuracy_score(yb_val, binary_preds_class))
print("✅ Binary F1 Score:", f1_score(yb_val, binary_preds_class))

# Multiclass scores
gesture_preds_class = np.argmax(gesture_preds, axis=1)
print("🎯 Gesture Accuracy:", accuracy_score(yg_val, gesture_preds_class))
print("🎯 Gesture Macro F1 Score:", f1_score(yg_val, gesture_preds_class, average='macro'))


[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 110ms/step
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
✅ Binary Accuracy: 1.0
✅ Binary F1 Score: 1.0
🎯 Gesture Accuracy: 0.3626588465298143
🎯 Gesture Macro F1 Score: 0.3499648957759224
