# Deployment Benchmarking Script

This script evaluates inference time, model size, and memory usage for a trained PyTorch model under deployment-like CPU conditions.

## What it measures
- Inference time (average over 100 CPU runs)
- Model size (`.pt` file saved via `torch.save`)
- RAM usage during execution (`psutil`)

## Usage
1. Set `MODEL_PATH` to your saved model.
2. Define `INPUT_SHAPE` as used during training (e.g., `(1, 800, 7)`).
3. Run the script. Results are printed to the console.

All benchmarks are performed on CPU to reflect edge deployment scenarios.


In [13]:
import torch
import torch.nn as nn
import time
import os
import numpy as np
import psutil
import torch.nn.functional as F

class CNNClassifier(nn.Module):
    def __init__(self, input_size, num_filters=32, kernel_sizes=[3, 5, 7], dropout=0.1, pooling="avg"):
        super(CNNClassifier, self).__init__()
        self.pooling = pooling
        self.conv_layers = nn.ModuleList([
            nn.Conv1d(input_size, num_filters, kernel_size=k, padding='same') for k in kernel_sizes
        ])
        self.batch_norms = nn.ModuleList([nn.BatchNorm1d(num_filters) for _ in kernel_sizes])
        self.fc = nn.Sequential(
            nn.Linear(num_filters * len(kernel_sizes), 32),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = x.transpose(1, 2)  # [batch, seq_len, features] -> [batch, features, seq_len]
        conv_outputs = []
        for conv, bn in zip(self.conv_layers, self.batch_norms):
            out = F.relu(bn(conv(x)))
            if self.pooling == "max":
                pooled = F.adaptive_max_pool1d(out, 1).squeeze(2)
            else:
                pooled = F.adaptive_avg_pool1d(out, 1).squeeze(2)
            conv_outputs.append(pooled)
        x = torch.cat(conv_outputs, dim=1)
        return self.fc(x)



In [14]:
# %%
# --- Load the model ---
MODEL_PATH = "results/1D-CNN_DATA_AUGMENTATION/20250528_193914/model_20250528_201856.pt"
INPUT_SHAPE = (1, 800, 7)
device = torch.device("cpu")

model = CNNClassifier(input_size=INPUT_SHAPE[2])
state_dict = torch.load(MODEL_PATH, map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()

print("✅ CNN model loaded on CPU")

✅ CNN model loaded on CPU


In [15]:
# -------------------------------
# Dummy preprocessing function
# -------------------------------
def mock_preprocessing():
    # Simulate any windowing or normalization before inference
    x = np.random.randn(*INPUT_SHAPE).astype(np.float32)
    return torch.tensor(x)

In [16]:
# --- Inference Benchmark ---
from timeit import default_timer as timer
import gc

sample_input = mock_preprocessing().to(device)
for _ in range(10):  # Warm-up
    _ = model(sample_input)

times, latencies = [], []
for _ in range(100):
    gc.collect()
    input_tensor = mock_preprocessing().to(device)

    start_all = time.perf_counter()
    _ = model(input_tensor)
    end_all = time.perf_counter()

    start_infer = time.perf_counter()
    _ = model(sample_input)
    end_infer = time.perf_counter()

    latencies.append((end_all - start_all) * 1000)
    times.append((end_infer - start_infer) * 1000)

avg_time = np.mean(times)
std_time = np.std(times)
avg_latency = np.mean(latencies)

print(f"Inference Time (avg ± std): {avg_time:.2f} ± {std_time:.2f} ms")
print(f"Total Window-to-decision Latency: {avg_latency:.2f} ms")

Inference Time (avg ± std): 0.26 ± 0.07 ms
Total Window-to-decision Latency: 0.37 ms


In [17]:
# -------------------------------
# Model Size
# -------------------------------
model_size_mb = os.path.getsize(MODEL_PATH) / (1024 * 1024)
print(f"Model Size: {model_size_mb:.2f} MB")

Model Size: 0.03 MB


In [18]:
# --- Memory Usage ---
sample_input = torch.randn(INPUT_SHAPE).to(device)
process = psutil.Process(os.getpid())
mem_before = process.memory_info().rss

with torch.no_grad():
    _ = model(sample_input)

mem_after = process.memory_info().rss
delta_mem_mb = (mem_after - mem_before) / (1024 ** 2)
total_mem_mb = mem_after / (1024 ** 2)

print(f"Δ Memory Used During Inference: {delta_mem_mb:.2f} MB")
print(f"Total Memory Usage After Inference: {total_mem_mb:.2f} MB")

Δ Memory Used During Inference: 0.00 MB
Total Memory Usage After Inference: 282.16 MB


In [19]:
# %%
# --- Energy Estimate ---
ENERGY_ESTIMATED_WATT = 12
inference_energy = (avg_time / 1000) * ENERGY_ESTIMATED_WATT
print(f"Estimated Energy per Inference: {inference_energy:.3f} J")

Estimated Energy per Inference: 0.003 J


In [20]:
import subprocess
import sys

result = subprocess.run(
    [sys.executable, "memory_benchmark_CNN.py"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True  # decode bytes to string
)

print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)


STDOUT:
 Δ Memory Used During Inference: 3.56 MB
Total Memory Usage After Inference: 180.03 MB

STDERR:
 
