# Deployment Benchmarking Script

This script evaluates inference time, model size, and memory usage for a trained PyTorch model under deployment-like CPU conditions.

## What it measures
- Inference time (average over 100 CPU runs)
- Model size (`.pt` file saved via `torch.save`)
- RAM usage during execution (`psutil`)

## Usage
1. Set `MODEL_PATH` to your saved model.
2. Define `INPUT_SHAPE` as used during training (e.g., `(1, 800, 7)`).
3. Run the script. Results are printed to the console.

All benchmarks are performed on CPU to reflect edge deployment scenarios.


In [1]:
import sys
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold
from tqdm import tqdm
from scipy import stats
from scipy.special import softmax
import random
import builtins
import torch
import torch.nn as nn
import torch.nn.functional as F
#Use local Executorch compatible copy of TorchHD
import os
sys.path.insert(0, os.path.abspath("../../../torchhd"))
sys.path.insert(0, os.path.abspath("../../../torchhd/torchhd"))
import torchhd
from torchhd import embeddings
from torchhd import models
print(torchhd.__file__) #Check
print(embeddings.__file__) #Check
print(models.__file__) #Check
from typing import Union, Literal
import json 
import pickle
# import torchmetrics
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import (
    f1_score,
    roc_auc_score,
    accuracy_score,
    precision_score,
    recall_score,
)
from sklearn.metrics import (
    mean_squared_error,
    median_absolute_error,
    r2_score,
    average_precision_score,
)
import warnings
from sklearn.preprocessing import StandardScaler
import gc
import time
from datetime import datetime
import logging
from tqdm import tqdm
from IPython.display import display
import os
from glob import glob
import polars as pl 
warnings.filterwarnings("ignore")


random.seed(0)
torch.manual_seed(0)
np.random.seed(0)

/Users/jofremosegui/Desktop/TFG/wearbac_experiments/torchhd/torchhd/__init__.py
/Users/jofremosegui/Desktop/TFG/wearbac_experiments/torchhd/torchhd/embeddings.py
/Users/jofremosegui/Desktop/TFG/wearbac_experiments/torchhd/torchhd/models.py


In [2]:
from torchhd import models, embeddings

class HdcGenericEncoder(nn.Module):
    def __init__(self, input_size, out_dimension, ngrams=7, dtype=torch.float32, device="cpu"):
        super().__init__()
        self.input_size = input_size
        self.keys = embeddings.Random(input_size, out_dimension, dtype=dtype, device=device, vsa="HRR")
        self.motion_embed = embeddings.Level(3000, out_dimension, dtype=dtype, low=-3.0, high=3.0, device=device, vsa="HRR")
        self.hr_embed = embeddings.Level(200, out_dimension, dtype=dtype, low=50, high=200, device=device, vsa="HRR")
        self.ngrams = ngrams
        self.device = device

    def batch_generic(self, id, levels, ngram):
        batch_size = levels.shape[0]
        multiset_list = []
        for b in range(batch_size):
            level = levels[b]
            b_levels = [
                torchhd.ngrams(level[0][i : i + ngram], ngram)
                for i in range(1, id.shape[0] - ngram + 1)
            ]
            if len(b_levels) > 0:
                b_levels = torch.stack(b_levels)
                multiset_list.append(torchhd.multiset(torchhd.bind(id[:-ngram], b_levels)).unsqueeze(0))
            else:
                multiset_list.append(torchhd.multiset(torchhd.bind(id, level)))
        return torch.stack(multiset_list)

    def forward(self, channels):
        motion = channels[:, :, :self.input_size - 1]
        hr = channels[:, :, self.input_size - 1].unsqueeze(-1)
        enc_motion = self.motion_embed(motion)
        enc_hr = self.hr_embed(hr)
        enc = torch.cat([enc_motion, enc_hr], dim=2)
        hvs = self.batch_generic(self.keys.weight, enc, self.ngrams)
        return torchhd.hard_quantize(torchhd.multiset(hvs))

class HdcModel(nn.Module):
    def __init__(self, input_size, out_dimension=5000, ngrams=7, device="cpu"):
        super().__init__()
        self.encoder = HdcGenericEncoder(input_size, out_dimension, ngrams, device=device)
        self.centroid = models.Centroid(out_dimension, 2, device=device)

    def vector_norm(self, x, p=2, dim=None, keepdim=False):
        return torch.pow(torch.sum(torch.abs(x) ** p, dim=dim, keepdim=keepdim), 1 / p)

    def normalized_inference(self, input, dot=False):
        weight = self.centroid.weight.detach().clone()
        norms = self.vector_norm(weight, p=2, dim=1, keepdim=True).clamp(min=1e-12)
        weight.div_(norms)
        return torchhd.functional.dot_similarity(input, weight) if dot else torchhd.functional.cosine_similarity(input, weight)

    def binary_hdc_output(self, outputs):
        return torch.nn.functional.softmax(outputs, dim=1)[:, 1]

    def forward(self, x):
        hv = self.encoder(x)
        out = self.normalized_inference(hv, dot=True)
        return self.binary_hdc_output(out)



In [10]:
# %%
# --- Load the model ---
# MODEL_PATH = "results/1D-CNN_DATA_AUGMENTATION/20250528_193914/model_20250528_201856.pt"
# INPUT_SHAPE = (1, 800, 7)
# device = torch.device("cpu")

# model = CNNClassifier(input_size=INPUT_SHAPE[2])
# state_dict = torch.load(MODEL_PATH, map_location=device)
# model.load_state_dict(state_dict)
# model.to(device)
# model.eval()
MODEL_PATH = "results/HDC_HRR_UNDERSAMPLING/20250430_144301/model_20250430_182301.pt"

device = 'cpu'
INPUT_SHAPE = (1, 800, 7)
model = HdcModel(input_size=7, out_dimension=5000, ngrams=7, device=device)
state_dict = torch.load(MODEL_PATH, map_location=device)
model.load_state_dict(state_dict)
model.to(device)
model.eval()

print("✅ HRR model loaded on CPU")

✅ HRR model loaded on CPU


In [5]:
# -------------------------------
# Dummy preprocessing function
# -------------------------------
def mock_preprocessing():
    x = np.random.randn(*INPUT_SHAPE).astype(np.float32)
    return torch.tensor(x)


In [6]:
# --- Inference Benchmark ---
from timeit import default_timer as timer
import gc

sample_input = mock_preprocessing().to(device)
for _ in range(10):  # Warm-up
    _ = model(sample_input)

times, latencies = [], []
for _ in range(100):
    gc.collect()
    input_tensor = mock_preprocessing().to(device)

    start_all = time.perf_counter()
    _ = model(input_tensor)
    end_all = time.perf_counter()

    start_infer = time.perf_counter()
    _ = model(sample_input)
    end_infer = time.perf_counter()

    latencies.append((end_all - start_all) * 1000)
    times.append((end_infer - start_infer) * 1000)

avg_time = np.mean(times)
std_time = np.std(times)
avg_latency = np.mean(latencies)

print(f"Inference Time (avg ± std): {avg_time:.2f} ± {std_time:.2f} ms")
print(f"Total Window-to-decision Latency: {avg_latency:.2f} ms")

Inference Time (avg ± std): 10.87 ± 0.63 ms
Total Window-to-decision Latency: 10.90 ms


In [7]:
# -------------------------------
# Model Size
# -------------------------------
model_size_mb = os.path.getsize(MODEL_PATH) / (1024 * 1024)
print(f"Model Size: {model_size_mb:.2f} MB")

Model Size: 61.21 MB


In [7]:
import psutil
# --- Memory Usage ---
sample_input = torch.randn(INPUT_SHAPE).to(device)
process = psutil.Process(os.getpid())
mem_before = process.memory_info().rss

with torch.no_grad():
    _ = model(sample_input)

mem_after = process.memory_info().rss
delta_mem_mb = (mem_after - mem_before) / (1024 ** 2)
total_mem_mb = mem_after / (1024 ** 2)

print(f"Δ Memory Used During Inference: {delta_mem_mb:.2f} MB")
print(f"Total Memory Usage After Inference: {total_mem_mb:.2f} MB")

Δ Memory Used During Inference: 0.02 MB
Total Memory Usage After Inference: 942.41 MB


In [8]:
# %%
# --- Energy Estimate ---
ENERGY_ESTIMATED_WATT = 12
inference_energy = (avg_time / 1000) * ENERGY_ESTIMATED_WATT
print(f"Estimated Energy per Inference: {inference_energy:.3f} J")

Estimated Energy per Inference: 0.130 J


In [11]:
import subprocess
import sys

result = subprocess.run(
    [sys.executable, "memory_benchmark_HDC_HRR.py"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True  # decode bytes to string
)

print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)


STDOUT:
 Δ Memory Used During Inference: 352.77 MB
Total Memory Usage After Inference: 882.94 MB

STDERR:
 
