## Deploying and Benchmarking a Shallow Neural Network for Ship Detection on Clustergate

In this demo, we train a quantized aware model to detect ships from satellite imagery and later deploy it on the flight computer of Clustergate. 
The goal is to provide a simple demonstration on how to deploy machine learning on Clustergate and provide a benchmark on the performance of the Phoenix computer. 

With this project, we aim to explore techniques to increase inference speed and efficiency on the flight computer.

### Preparing the Dataset
Download the shipsnet.json from: https://www.kaggle.com/datasets/rhammell/ships-in-satellite-imagery

### Training the model
Run the block below to train the model. The training is performed in float32 first then performs finetuning for INT8 to improve the accuracy of the quantized model. 

In [20]:
import tensorflow as tf
import numpy as np
import json
from sklearn.model_selection import train_test_split
import tensorflow_model_optimization as tfmot

# Load ShipsNet dataset from JSON
def load_shipsnet(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)
    images = np.array(data['data']).reshape(-1, 80, 80, 3) / 255.0  # Normalize to [0, 1]
    labels = np.array(data['labels'])
    return images, labels

# Build shallow CNN
def create_shallow_cnn():
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(80, 80, 3)),
        tf.keras.layers.Conv2D(16, (3, 3), padding='same', name='conv1'),
        tf.keras.layers.ReLU(name='relu1'),
        tf.keras.layers.MaxPooling2D((2, 2), name='pool1'),
        tf.keras.layers.Conv2D(32, (3, 3), padding='same', name='conv2'),
        tf.keras.layers.ReLU(name='relu2'),
        tf.keras.layers.MaxPooling2D((2, 2), name='pool2'),
        tf.keras.layers.Flatten(name='flatten'),
        tf.keras.layers.Dense(64, name='dense1'),
        tf.keras.layers.ReLU(name='relu3'),
        tf.keras.layers.Dense(2, activation='softmax', name='output')
    ])
    return model

# Main training function
def main():
    # Load dataset
    json_path = 'shipsnet.json'  # Adjust path if needed
    images, labels = load_shipsnet(json_path)

    # Split dataset
    X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size=0.2, random_state=42)
    y_train = tf.keras.utils.to_categorical(y_train, 2)
    y_test = tf.keras.utils.to_categorical(y_test, 2)

    # Create float32 model and train to get baseline
    float_model = create_shallow_cnn()
    float_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    print("Training float32 model...")
    float_model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_test, y_test))
    float_model.save("shipsnet_float_new.h5")
    print("Float32 model saved as shipsnet_float_new.h5")

    # Evaluate float32 accuracy
    preds = float_model.predict(X_test)
    pred_labels = np.argmax(preds, axis=1)
    y_test_labels = np.argmax(y_test, axis=1)
    from sklearn.metrics import accuracy_score
    float_accuracy = accuracy_score(y_test_labels, pred_labels)
    print(f"Float32 Accuracy: {float_accuracy:.4f}")

    # Apply QAT
    qat_model = tfmot.quantization.keras.quantize_model(float_model)
    qat_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

    # Fine-tune with quantization
    print("Fine-tuning with quantization-aware training...")
    qat_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

    # Save quantized Keras model
    qat_model.save("shipsnet_quantized_new.h5")
    print("Quantized Keras model saved as shipsnet_quantized_new.h5")

    # Export to ONNX
    import tf2onnx
    onnx_path = "shipsnet_new.onnx"
    model_proto, _ = tf2onnx.convert.from_keras(
        qat_model,
        input_signature=[tf.TensorSpec([None, 80, 80, 3], tf.float32, name='input')],
        opset=18,
        output_path=onnx_path
    )
    print(f"Converted to ONNX: {onnx_path}")

if __name__ == "__main__":
    main()

Training float32 model...
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Float32 model saved as shipsnet_float_new.h5
Float32 Accuracy: 0.9712
Fine-tuning with quantization-aware training...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Quantized Keras model saved as shipsnet_quantized_new.h5


  saving_api.save_model(
I0000 00:00:1740345866.283539  314698 devices.cc:76] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 0 (Note: TensorFlow was not compiled with CUDA or ROCm support)
I0000 00:00:1740345866.284634  314698 single_machine.cc:361] Starting new session
I0000 00:00:1740345866.286403  314698 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1740345866.286424  314698 pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)
I0000 00:00:1740345866.415053  314698 pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
I0000 00:00:1740345866.415096  314698 pluggable_device_factory.cc:271] Created 

Converted to ONNX: shipsnet_new.onnx


### Inference test

In [22]:
import numpy as np
import pandas as pd
import json
import time
import os
import onnxruntime as ort
from sklearn.metrics import accuracy_score

os.makedirs('./output', exist_ok=True)
print("Output directory created: /output")

with open('shipsnet_20.json', 'r') as f:
    data = json.load(f)
X_test = np.array(data['data']).reshape(-1, 80, 80, 3) / 255.0  # Preprocess: Normalize to [0, 1]
y_test = np.array(data['labels'])
print(f"Loaded {X_test.shape[0]} test samples")

session = ort.InferenceSession('shipsnet_new.onnx')
input_name = session.get_inputs()[0].name
output_name = session.get_outputs()[0].name
print("ONNX model loaded")

timings = []
for i in range(X_test.shape[0]):
    timing = {'image_id': i}
    input_data = X_test[i:i+1].astype(np.float32)  # Float32 input
    start = time.time()
    outputs = session.run([output_name], {input_name: input_data})
    timing['total_inference'] = time.time() - start
    timings.append(timing)
    pred_label = np.argmax(outputs[0][0])
    if i == 0:
        preds_labels = [pred_label]
    else:
        preds_labels.append(pred_label)
print(f"Ran inference on {len(timings)} images")

quantized_accuracy = accuracy_score(y_test, preds_labels)
df = pd.DataFrame(timings)
df = df[['image_id', 'total_inference']]
df.to_csv('./output/inference_timings.csv', index=False)
print("CSV saved to /output/inference_timings.csv")

print(f"Quantized Accuracy: {quantized_accuracy:.4f}")
print(f"Average total inference time per image: {df['total_inference'].mean():.6f} seconds")
print("Total inference timings saved to /output/inference_timings.csv")

Output directory created: /output
Loaded 20 test samples
ONNX model loaded
Ran inference on 20 images
CSV saved to /output/inference_timings.csv
Quantized Accuracy: 0.9000
Average total inference time per image: 0.000709 seconds
Total inference timings saved to /output/inference_timings.csv
