In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
import os
import re

# ===================================
# Load Data
# ===================================

print("Loading dataset...")
df = pd.read_csv('/kaggle/input/cmi-detect-behavior-with-sensor-data/train.csv')
print(f"Loaded {len(df)} rows.")

train_column = ['acc_x', 'acc_y', 'acc_z', 'rot_w', 'rot_x', 'rot_y', 'rot_z']


# ===================================
# sensor trends over time for the first sequence_id
# ===================================

# select sequence_id
example_seq = df[df['sequence_id'] == df['sequence_id'].iloc[0]]
# plot
plt.figure(figsize=(12, 6))
for col in train_column:
    plt.plot(example_seq[col], label=col)
plt.grid()
plt.legend()
plt.title(f"Sensor signal for {df['sequence_id'].iloc[0]} (gesture:{df['gesture'].iloc[0]})")
# plt.show()
plt.savefig(f"Sensor signal for {df['sequence_id'].iloc[0]} (gesture:{df['gesture'].iloc[0]})")


# ===================================
# calculate the average sensor trajectories and plot
# ===================================

# calculate uniform length to 90th percentile
seq_lengths = df.groupby('sequence_id').size().values
TARGET_LENGTH = int(np.percentile(seq_lengths, 90))
print(f"set uniform length：{TARGET_LENGTH}")

gesture_avg = {}
gestures = df['gesture'].unique()

print("calculating...")
for gesture in gestures:
    all_sequences = []
    seq_ids = df[df['gesture'] == gesture]['sequence_id'].unique()

    for seq_id in seq_ids:
        seq_data = df[df['sequence_id'] == seq_id][train_column].values
        L = seq_data.shape[0]
        if L < 10: continue    # do not take if too short

        # interpolate for each data by sequence_id
        interp_seq = []
        for i in range(seq_data.shape[1]):
            f = interp1d(np.linspace(0, 1, L), seq_data[:, i])
            interp = f(np.linspace(0, 1, TARGET_LENGTH))
            interp_seq.append(interp)
        interp_seq = np.stack(interp_seq, axis=1)  # shape: (T, 7)

        all_sequences.append(interp_seq)

    if all_sequences:
        avg_waveform = np.mean(np.stack(all_sequences), axis=0)
        gesture_avg[gesture] = avg_waveform
print("calculate completed.")

# average sensor trajectories figure for each gesture
os.makedirs("gesture_plots", exist_ok=True)

for gesture, avg_seq in gesture_avg.items():
    plt.figure(figsize=(12, 6))
    for i, col in enumerate(train_column):
        plt.plot(avg_seq[:, i], label=col)
    plt.title(f"Average sensor pattern for gesture_{gesture}")
    plt.xlabel("Normalized Time Step")
    plt.ylabel("Sensor Value")
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    
    safe_gesture = re.sub(r'[\\/:"*?<>|]+', '_', str(gesture))
    filename = f"gesture_plots/gesture_{safe_gesture}.png"
    
    # save the figure
    plt.savefig(filename)
    plt.close()


Loading dataset...
Loaded 574945 rows.
set uniform length：103
calculating...
