In [1]:
# --- Step 1: Dependencies ---
import json
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import warnings

warnings.filterwarnings('ignore')
sns.set_style('whitegrid')



In [2]:
# --- Step 2: Load and Parse Data ---
# (This cell contains your `parse_data_file` function)
# (It loads `human_data.txt` and `animal_data.txt` into `all_data`)

def parse_data_file(filename):
    """Reads a data file, skipping headers and parsing JSON lines."""
    if not os.path.exists(filename):
        print(f"Warning: File not found - {filename}. Skipping.")
        return []
    
    data = []
    in_data_stream = False
    with open(filename, 'r') as f:
        for line in f:
            line = line.strip().replace('\\\\', '\\')
            if line == '=== BEGIN DATA STREAM ===':
                in_data_stream = True; continue
            if line == '=== END DATA STREAM ===':
                in_data_stream = False; continue
            
            if in_data_stream and line.startswith('{'):
                try:
                    data.append(json.loads(line))
                except json.JSONDecodeError:
                    print(f"Warning: Could not parse line in {filename}: {line}")
    return data

In [3]:
# NOTE: Update your data files to match your new format
human_data = parse_data_file('../data/data_human.txt')
animal_data = parse_data_file('../data/data_animal.txt')
all_data = human_data + animal_data

print(f"Loaded {len(human_data)} human data samples.")
print(f"Loaded {len(animal_data)} animal data samples.")
print(f"Total samples: {len(all_data)}")

Loaded 250 human data samples.
Loaded 250 animal data samples.
Total samples: 500


## Step 3: Prepare Data for Mixed-Input Model

This step is new. We won't create a `DataFrame`. Instead, we will create two separate input arrays for our model:

1.  `X_thermal`: A 4D NumPy array `(num_samples, 8, 8, 1)` for the CNN.
2.  `X_tabular`: A 2D NumPy array `(num_samples, 5)` for the Doppler/Mic data.

In [5]:
# Based on your data files:
# Doppler: 3 features (speed, range, energy)
# Mic: 2 features (rms_mean, peak_mean)
# Total tabular features = 5
TABULAR_FEATURES = 5

thermal_images = []
tabular_data = []
labels = []

# --- 1. Parse all data into lists ---
for sample in all_data:
    try:
        # Input 1: Thermal Image
        thermal = np.array(sample['thermal'], dtype=np.float32)
        if thermal.shape[0] != 64:
            print(f"Skipping sample, thermal is not 64 pixels: {thermal.shape[0]}")
            continue
        thermal_images.append(thermal.reshape(8, 8, 1)) # Reshape for CNN
        
        # Input 2: Tabular Data
        # --- FIX ---
        # Using the keys from your original .txt files
        tabular_data.append([
            sample['doppler']['speed'],
            sample['doppler']['range'],
            sample['doppler']['energy'],
            sample['mic']['rms_mean'],  # <-- FIXED (was 'rms')
            sample['mic']['peak_mean'] # <-- FIXED (was 'peak')
        ])
        
        # Target: Label
        labels.append(sample['label'])
        
    except KeyError as e:
        print(f"SkiPping sample, missing key: {e}")

# --- 2. Convert lists to NumPy arrays ---
X_thermal = np.array(thermal_images)
X_tabular = np.array(tabular_data, dtype=np.float32)
y_labels = np.array(labels)

# --- This is the check that was failing ---
if X_tabular.shape[0] == 0:
    print("\n" + "="*50)
    print("ERROR: No data was loaded into the tabular array.")
    print("This is likely a KEY MISMATCH.")
    print("Please check that the keys in the code (e.g., 'rms_mean')")
    print("exactly match the keys in your .txt data files.")
    print("="*50 + "\n")
else:
    print(f"Thermal input shape: {X_thermal.shape}")
    print(f"Tabular input shape: {X_tabular.shape}")
    print(f"Labels shape: {y_labels.shape}")

    # --- 3. Scale ONLY the tabular data ---
    scaler = StandardScaler()
    X_tabular_scaled = scaler.fit_transform(X_tabular)

    # --- IMPORTANT: Save these scaling values for the ESP32! ---
    print("\n--- ESP32 SCALING CONSTANTS ---")
    print(f"MEANS = {scaler.mean_.tolist()}")
    print(f"STDS = {scaler.scale_.tolist()}")
    print("---------------------------------")

    # --- 4. Create Train/Test Split ---
    X_thermal_train, X_thermal_test, \
    X_tabular_train, X_tabular_test, \
    y_train, y_test = train_test_split(
        X_thermal,
        X_tabular_scaled, # Use the scaled data
        y_labels,
        test_size=0.2,
        stratify=y_labels,
        random_state=42
    )

    print(f"\nTraining Thermal shape: {X_thermal_train.shape}")
    print(f"Training Tabular shape: {X_tabular_train.shape}")
    print(f"Training Labels shape: {y_train.shape}")

Thermal input shape: (500, 8, 8, 1)
Tabular input shape: (500, 5)
Labels shape: (500,)

--- ESP32 SCALING CONSTANTS ---
MEANS = [-1.695539999909699, 3.00282000374794, 11138.166, 0.0007140000037034042, 11863.846]
STDS = [1.6808685565195818, 1.792998449711218, 42628.66762002359, 0.0004173296001566178, 7253.8063380741005]
---------------------------------

Training Thermal shape: (400, 8, 8, 1)
Training Tabular shape: (400, 5)
Training Labels shape: (400,)


## Step 4: Build the Mixed-Input Keras Model

Here is the core of the new design. We use the Keras "Functional API" to build the two branches and merge them.

In [6]:
def create_mixed_model(tabular_shape=5, img_shape=(8, 8, 1)):
    # --- 1. CNN Branch (for Thermal Image) ---
    img_input = layers.Input(shape=img_shape, name="thermal_input")
    
    # A small, ESP32-friendly CNN
    x = layers.Conv2D(8, kernel_size=(3, 3), activation='relu', padding='same')(img_input)
    x = layers.MaxPooling2D(pool_size=(2, 2))(x)
    x = layers.Flatten()(x)
    cnn_output = layers.Dense(16, activation='relu')(x) # Output 16 features
    
    # --- 2. Vector Branch (for Doppler/Mic) ---
    vec_input = layers.Input(shape=(tabular_shape,), name="vector_input")
    
    # A small dense layer to process the tabular data
    vec_output = layers.Dense(8, activation='relu')(vec_input) # Output 8 features

    # --- 3. Merge Branches ---
    combined = layers.Concatenate()([cnn_output, vec_output])
    
    # --- 4. Classification Head ---
    z = layers.Dense(16, activation='relu')(combined)
    z = layers.Dropout(0.5)(z) # Add dropout for regularization
    output = layers.Dense(1, activation='sigmoid')(z) # Sigmoid for 0/1 probability
    
    # --- 5. Create and return the model ---
    model = Model(inputs=[img_input, vec_input], outputs=output)
    return model

In [7]:
model = create_mixed_model(tabular_shape=TABULAR_FEATURES, img_shape=(8, 8, 1))

# Compile the model
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()

## Step 5: Train the Mixed-Input Model

When we call `model.fit()`, we have to pass the input data as a list, matching the order of our `Model` inputs: `[X_thermal_train, X_tabular_train]`

In [8]:
print("Training the mixed-input model...")

# Create the list of inputs for training
X_train_list = [X_thermal_train, X_tabular_train]
X_test_list = [X_thermal_test, X_tabular_test]

history = model.fit(
    X_train_list,
    y_train,
    epochs=100, # This model may need more epochs to train
    validation_data=(X_test_list, y_test),
    batch_size=16,
    verbose=1
)

print("Model training complete.")

Training the mixed-input model...
Epoch 1/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.4701 - loss: 1.1216 - val_accuracy: 0.5800 - val_loss: 0.6856
Epoch 2/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5290 - loss: 0.7057 - val_accuracy: 0.4600 - val_loss: 0.6870
Epoch 3/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5230 - loss: 0.7074 - val_accuracy: 0.6000 - val_loss: 0.6829
Epoch 4/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.4771 - loss: 0.7015 - val_accuracy: 0.5900 - val_loss: 0.6835
Epoch 5/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5071 - loss: 0.7163 - val_accuracy: 0.6100 - val_loss: 0.6817
Epoch 6/100
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5678 - loss: 0.6803 - val_accuracy: 0.6700 - val_loss: 0.6713


In [9]:
# --- Evaluate the model on the test set ---`

test_loss, test_accuracy = model.evaluate(X_test_list, y_test, verbose=0)
print(f"\nTest Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


Test Loss: 0.4038
Test Accuracy: 0.8600


## Step 6: Convert to TFLite and Quantize

This is now more complex. The `representative_dataset` generator must provide data for *both* inputs.

In [13]:
print("Converting Keras model to TensorFlow Lite...")

# 1. Define the representative dataset generator
def representative_dataset_gen():
    # Use 100 samples from the training data
    for i in range(100):
        # Get one sample for each input
        thermal_sample = X_thermal_train[i].reshape(1, 8, 8, 1)
        tabular_sample = X_tabular_train[i].reshape(1, TABULAR_FEATURES)
        
        # Yield a list of inputs
        yield [thermal_sample.astype(np.float32), tabular_sample.astype(np.float32)]

# 2. Create a converter
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# 3. Set the converter flags for INT8 quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
# Note: Input/Output types are now mixed, so let TFLite handle it.
# We will still quantize our inputs on the ESP32 manually.

# 4. Convert the model!
tflite_quant_model = converter.convert()

# 5. Save the quantized model to a file
model_filename = 'movement_model_cnn.tflite'
with open(model_filename, 'wb') as f:
    f.write(tflite_quant_model)

print(f"Quantized TFLite model saved as: {model_filename}")
print(f"Quantized model size: {len(tflite_quant_model)} bytes")

Converting Keras model to TensorFlow Lite...
INFO:tensorflow:Assets written to: /var/folders/r5/m5977dy13hd_9klxgsmylm740000gn/T/tmp3xly6li4/assets


INFO:tensorflow:Assets written to: /var/folders/r5/m5977dy13hd_9klxgsmylm740000gn/T/tmp3xly6li4/assets


Saved artifact at '/var/folders/r5/m5977dy13hd_9klxgsmylm740000gn/T/tmp3xly6li4'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): List[TensorSpec(shape=(None, 8, 8, 1), dtype=tf.float32, name='thermal_input'), TensorSpec(shape=(None, 5), dtype=tf.float32, name='vector_input')]
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  5415061808: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419416208: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419701168: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419700992: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419702752: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419702400: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419719488: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419718432: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419740320: TensorSpec(shape=(), dtype=tf.resource, name=None)
  5419751552: Te

W0000 00:00:1761849455.229785 3678178 tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
W0000 00:00:1761849455.229801 3678178 tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2025-10-30 14:37:35.229918: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /var/folders/r5/m5977dy13hd_9klxgsmylm740000gn/T/tmp3xly6li4
2025-10-30 14:37:35.230402: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2025-10-30 14:37:35.230409: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /var/folders/r5/m5977dy13hd_9klxgsmylm740000gn/T/tmp3xly6li4
2025-10-30 14:37:35.235333: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2025-10-30 14:37:35.249362: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /var/folders/r5/m5977dy13hd_9klxgsmylm740000gn/T/tmp3xly6li4
2025-10-30 14:37:35.254578: I tensorflow/cc/saved_model/loader.cc:

## Step 7: Convert TFLite Model to C Array

Same as before. This creates the `.h` file you'll copy into your ESP32 project.

In [14]:
# Use the 'xxd' tool to convert the file to a C array
# This creates a file named 'model_data_cnn.h'
!echo "const unsigned char model[] = {" > model_data_cnn.h
!xxd -i movement_model_cnn.tflite >> model_data_cnn.h
!echo "};" >> model_data_cnn.h

print("Created 'model_data_cnn.h' file.")
print("\n--- First 10 lines of 'model_data_cnn.h' ---")
!head -n 10 model_data_cnn.h
print("------------------------------------------")

Created 'model_data_cnn.h' file.

--- First 10 lines of 'model_data_cnn.h' ---
const unsigned char model[] = {
unsigned char movement_model_cnn_tflite[] = {
  0x20, 0x00, 0x00, 0x00, 0x54, 0x46, 0x4c, 0x33, 0x00, 0x00, 0x00, 0x00,
  0x14, 0x00, 0x20, 0x00, 0x1c, 0x00, 0x18, 0x00, 0x14, 0x00, 0x10, 0x00,
  0x0c, 0x00, 0x00, 0x00, 0x08, 0x00, 0x04, 0x00, 0x14, 0x00, 0x00, 0x00,
  0x1c, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x08, 0x01, 0x00, 0x00,
  0x8c, 0x0d, 0x00, 0x00, 0x9c, 0x0d, 0x00, 0x00, 0x4c, 0x21, 0x00, 0x00,
  0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00,
  0xa6, 0xf0, 0xff, 0xff, 0x0c, 0x00, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x00,
  0x3c, 0x00, 0x00, 0x00, 0x0f, 0x00, 0x00, 0x00, 0x73, 0x65, 0x72, 0x76,
------------------------------------------
