In [3]:
# Step 1: Imports and configuration
import numpy as np
import pandas as pd
import os
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import tensorflow as tf

# Configurable parameters
data_dir = 'ML/data/kinect_good_preprocessed'
window_size = 10  # number of frames in each sequence
n_features = 26   # 13 joints × 2 (x and y)
n_outputs = 13    # 13 z-values to predict
model_type = 'conv'  

# Collect all relevant CSVs
csv_files = [f for f in os.listdir(data_dir) if f.endswith('kinect.csv')]
all_data = pd.DataFrame()

for file in csv_files:
    df = pd.read_csv(os.path.join(data_dir, file))
    all_data = pd.concat([all_data, df], ignore_index=True)

# Drop FrameNo column
if 'FrameNo' in all_data.columns:
    all_data.drop(columns='FrameNo', inplace=True)

# Separate x, y, z columns
x_columns = [col for col in all_data.columns if col.endswith('_x')]
y_columns = [col for col in all_data.columns if col.endswith('_y')]
z_columns = [col for col in all_data.columns if col.endswith('_z')]

# Create inputs and targets
X = all_data[x_columns + y_columns].values  # shape: (N, 26)
y = all_data[z_columns].values              # shape: (N, 13)

print("Raw data shapes:")
print("X:", X.shape)
print("y:", y.shape)


Raw data shapes:
X: (24005, 26)
y: (24005, 13)


In [4]:
# Step 2: Create overlapping sequences
def create_sequences(X, y, seq_len):
    X_seq, y_seq = [], []
    for i in range(len(X) - seq_len):
        X_seq.append(X[i:i+seq_len])        # 10-frame sequence
        y_seq.append(y[i + seq_len - 1])    # z-value at the last frame
    return np.array(X_seq), np.array(y_seq)

# Generate the sequences
X_seq, y_seq = create_sequences(X, y, window_size)

print("Sequence data shapes:")
print("X_seq:", X_seq.shape)
print("y_seq:", y_seq.shape)


Sequence data shapes:
X_seq: (23995, 10, 26)
y_seq: (23995, 13)


Run 10 separate train/val splits

Normalize inputs & outputs properly per fold

Use early stopping to avoid overfitting

Print per-fold and average metrics

In [None]:
# Step 3: Conv1D model with cross-validation
from sklearn.model_selection import KFold
from tensorflow.keras.layers import Conv1D, Flatten, Dense, Dropout, LayerNormalization, Input
from tensorflow.keras.models import Model

# Define LSTM model
def create_lstm_model(window_size, n_features, n_outputs):
    inputs = tf.keras.Input(shape=(window_size, n_features))
    
    # LSTM layer (can also try return_sequences=True for stacking)
    x = layers.LSTM(64, return_sequences=False)(inputs)
    x = layers.Dropout(0.3)(x)
    
    # Fully connected layers after temporal encoding
    x = layers.Dense(64, activation='relu')(x)
    outputs = layers.Dense(n_outputs, activation='linear')(x)
    
    model = models.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model


# conv model
def create_conv_model(window_size, n_features, n_outputs):
    inputs = Input(shape=(window_size, n_features))
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(inputs)
    x = LayerNormalization()(x)
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(x)
    x = Flatten()(x)
    x = Dense(64, activation='relu')(x)
    outputs = Dense(n_outputs)(x)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# Cross-validation setup
kf = KFold(n_splits=10, shuffle=True, random_state=42)
val_scores = []

fold = 1
for train_idx, val_idx in kf.split(X_seq):
    print(f"\nTraining fold {fold} of {kf.get_n_splits()}...")
    
    # Split
    X_train_fold, X_val_fold = X_seq[train_idx], X_seq[val_idx]
    y_train_fold, y_val_fold = y_seq[train_idx], y_seq[val_idx]
    
    # Scale inputs (frame-wise)
    X_scaler = StandardScaler()
    X_train_flat = X_train_fold.reshape(-1, n_features)
    X_val_flat = X_val_fold.reshape(-1, n_features)
    X_train_scaled = X_scaler.fit_transform(X_train_flat).reshape(X_train_fold.shape)
    X_val_scaled = X_scaler.transform(X_val_flat).reshape(X_val_fold.shape)
    
    # Scale targets
    y_scaler = StandardScaler()
    y_train_scaled = y_scaler.fit_transform(y_train_fold)
    y_val_scaled = y_scaler.transform(y_val_fold)
    
    # Create model
    model = create_conv_model(window_size, n_features, n_outputs)
    
    # Train
    history = model.fit(
        X_train_scaled, y_train_scaled,
        validation_data=(X_val_scaled, y_val_scaled),
        epochs=100,
        batch_size=32,
        callbacks=[EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)],
        verbose=0
    )
    
    # Evaluate
    val_loss, val_mae = model.evaluate(X_val_scaled, y_val_scaled, verbose=0)
    print(f" Fold {fold} - Validation MSE: {val_loss:.4f}, MAE: {val_mae:.4f}")
    val_scores.append((val_loss, val_mae))
    fold += 1

# Average results
avg_mse = np.mean([score[0] for score in val_scores])
avg_mae = np.mean([score[1] for score in val_scores])
print("\nFinal 10-Fold Results:")
print(f"Average Validation MSE: {avg_mse:.4f}")
print(f"Average Validation MAE: {avg_mae:.4f}")



Training fold 1 of 10...
 Fold 1 - Validation MSE: 0.0252, MAE: 0.1183

Training fold 2 of 10...
 Fold 2 - Validation MSE: 0.0237, MAE: 0.1160

Training fold 3 of 10...
 Fold 3 - Validation MSE: 0.0249, MAE: 0.1187

Training fold 4 of 10...
 Fold 4 - Validation MSE: 0.0241, MAE: 0.1169

Training fold 5 of 10...
 Fold 5 - Validation MSE: 0.0294, MAE: 0.1287

Training fold 6 of 10...
 Fold 6 - Validation MSE: 0.0266, MAE: 0.1227

Training fold 7 of 10...
 Fold 7 - Validation MSE: 0.0241, MAE: 0.1163

Training fold 8 of 10...
 Fold 8 - Validation MSE: 0.0256, MAE: 0.1198

Training fold 9 of 10...
 Fold 9 - Validation MSE: 0.0248, MAE: 0.1182

Training fold 10 of 10...
 Fold 10 - Validation MSE: 0.0275, MAE: 0.1258

Final 10-Fold Results:
Average Validation MSE: 0.0256
Average Validation MAE: 0.1201
