# TPU approach

I'm learning to use TPUs (see https://www.kaggle.com/code/ryanholbrook/getting-started-with-tpus), so this is my attempt for this competition.

**Note** TPU is not currently working, I've asked in the forums https://www.kaggle.com/discussions/questions-and-answers/582296

In [6]:
import math, re, os
import tensorflow as tf
import numpy as np
from matplotlib import pyplot as plt
#from kaggle_datasets import KaggleDatasets
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix

import pandas as pd
#import polars as pl
from sklearn.preprocessing import LabelEncoder
#from kaggle_evaluation.api import GesturePredictor

In [7]:
# copy&paste from https://www.kaggle.com/code/ryanholbrook/getting-started-with-tpus
print("Tensorflow version " + tf.__version__)
AUTO = tf.data.experimental.AUTOTUNE

# Detect TPU, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None
    print("not running on TPU")

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() 

print("REPLICAS: ", strategy.num_replicas_in_sync)

Tensorflow version 2.18.0
not running on TPU
REPLICAS:  1


## Read Data

In [None]:
# Load data
train_df = pd.read_csv("/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv")
targets = train_df[['sequence_id', 'gesture']].drop_duplicates()


In [None]:
train_df.head()

In [None]:
train_demographics = pd.read_csv("/kaggle/input/cmi-detect-behavior-with-sensor-data/train_demographics.csv")
train_demographics.head()

### Merge all in one dataset

There may be smarter way to do this merge, but for now, let's put it all together.

In [None]:
train_df = train_df.merge(train_demographics, on='subject', how='left')
train_df.fillna(method='ffill', inplace=True)

In [None]:
train_df.head()

In [None]:
train_df.describe()


In [None]:
', '.join(train_df.columns.to_list())

## Prepare Targets and Feature Selection

We only use a few features for now, to simplify the model.

In [None]:

# Encode gesture labels
label_encoder = LabelEncoder()
targets['gesture_enc'] = label_encoder.fit_transform(targets['gesture'])
gesture2id = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

# Features to use (IMU only)
FEATURES = [
    'acc_x', 'acc_y', 'acc_z',
    'rot_w', 'rot_x', 'rot_y', 'rot_z'
]



In [None]:

# Feature selection
IMU_FEATURES = ['acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z']
THERMO_FEATURES = [f'thm_{i}' for i in range(1, 6)]
TOF_FEATURES = [f'tof_{i}_v{j}' for i in range(1, 6) for j in range(64)]
DEMO_FEATURES = ['adult_child', 'age', 'sex', 'handedness', 'height_cm', 'shoulder_to_wrist_cm', 'elbow_to_wrist_cm']

FEATURES = IMU_FEATURES + THERMO_FEATURES + DEMO_FEATURES  # TOF excluded for now due to sparsity

In [None]:
FEATURES

## Prepare Sequences

In [None]:
# Prepare sequences
sequence_ids = train_df['sequence_id'].unique()
X, y = [], []
for seq_id in sequence_ids:
    df = train_df[train_df['sequence_id'] == seq_id]
    if df[FEATURES].isnull().values.any():
        continue  # skip incomplete sequences
    x = df[FEATURES].values.astype(np.float32)
    if x.shape[0] < 64:
        pad_width = 64 - x.shape[0]
        x = np.pad(x, ((0, pad_width), (0, 0)), mode='edge')
    else:
        x = x[:64]
    X.append(x)
    y.append(targets.loc[targets['sequence_id'] == seq_id, 'gesture_enc'].values[0])

X = np.stack(X)
y = np.array(y)

## Build TPU model

Apparently Keras is easier for developing TPU models.

In [None]:
# Build model with TPU strategy
with strategy.scope():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(64, len(FEATURES))),
        tf.keras.layers.Conv1D(64, 5, padding='same', activation='relu'),
        tf.keras.layers.Conv1D(128, 5, padding='same', activation='relu'),
        tf.keras.layers.GlobalAveragePooling1D(),
        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(1e-3),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )

model

## Train!

In [None]:
model.fit(train_ds, validation_data=val_ds, epochs=10)


## Create Submission Interface

In [None]:
# Create submission interface
predictor = GesturePredictor()

def preprocess_sequence(df):
    df = df.merge(train_demographics, on='subject', how='left')
    df.fillna(method='ffill', inplace=True)
    x = df[FEATURES].values.astype(np.float32)
    if x.shape[0] < 64:
        pad_width = 64 - x.shape[0]
        x = np.pad(x, ((0, pad_width), (0, 0)), mode='edge')
    else:
        x = x[:64]
    return x

@predictor.predict_function
def predict_fn(df: pd.DataFrame) -> str:
    x = preprocess_sequence(df)
    x = np.expand_dims(x, axis=0)
    probs = model.predict(x, verbose=0)[0]
    pred_idx = np.argmax(probs)
    return label_encoder.inverse_transform([pred_idx])[0]

predictor.save("submission.zip")
