# Deployment Benchmarking Script

This script evaluates inference time, model size, and memory usage for a trained PyTorch model under deployment-like CPU conditions.

## What it measures
- Inference time (average over 100 CPU runs)
- Model size (`.pt` file saved via `torch.save`)
- RAM usage during execution (`psutil`)

## Usage
1. Set `MODEL_PATH` to your saved model.
2. Define `INPUT_SHAPE` as used during training (e.g., `(1, 800, 7)`).
3. Run the script. Results are printed to the console.

All benchmarks are performed on CPU to reflect edge deployment scenarios.


In [1]:
import sys
import pandas as pd
import numpy as np
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold
from tqdm import tqdm
from scipy import stats
from scipy.special import softmax
import random
import builtins
import torch
import torch.nn as nn
import torch.nn.functional as F
#Use local Executorch compatible copy of TorchHD
import os
sys.path.insert(0, os.path.abspath("../../../torchhd"))
sys.path.insert(0, os.path.abspath("../../../torchhd/torchhd"))
import torchhd
from torchhd import embeddings
from torchhd import models
print(torchhd.__file__) #Check
print(embeddings.__file__) #Check
print(models.__file__) #Check
from typing import Union, Literal
import json 
import pickle
# import torchmetrics
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import (
    f1_score,
    roc_auc_score,
    accuracy_score,
    precision_score,
    recall_score,
)
from sklearn.metrics import (
    mean_squared_error,
    median_absolute_error,
    r2_score,
    average_precision_score,
)
import warnings
from sklearn.preprocessing import StandardScaler
import gc
import time
from datetime import datetime
import logging
from tqdm import tqdm
from IPython.display import display
import os
from glob import glob
import polars as pl 
warnings.filterwarnings("ignore")
from torchhd.embeddings import Random, Level


random.seed(0)
torch.manual_seed(0)
np.random.seed(0)

/Users/jofremosegui/Desktop/TFG/wearbac_experiments/torchhd/torchhd/__init__.py
/Users/jofremosegui/Desktop/TFG/wearbac_experiments/torchhd/torchhd/embeddings.py
/Users/jofremosegui/Desktop/TFG/wearbac_experiments/torchhd/torchhd/models.py


In [2]:
# === Load Encoder ===
class HdcGenericEncoder(torch.nn.Module):
    def __init__(self, input_size, out_dimension, ngrams=7, dtype=torch.float32, device="cpu"):
        super().__init__()
        self.input_size = input_size
        self.ngrams = ngrams
        self.device = device

        self.keys = Random(input_size, out_dimension, dtype=dtype, device=device, vsa="MAP")
        self.motion_embed = Level(3000, out_dimension, dtype=dtype, low=-3.0, high=3.0, device=device, vsa="MAP")
        self.hr_embed = Level(200, out_dimension, dtype=dtype, low=50, high=200, device=device, vsa="MAP")

    def batch_generic(self, id, levels, ngram):
        batch_size = levels.shape[0]
        multiset_list = []
        for b in range(batch_size):
            level = levels[b]
            b_levels = [
                torchhd.ngrams(level[0][i : i + ngram], ngram)
                for i in range(1, id.shape[0] - ngram + 1)
            ]
            if len(b_levels) > 0:
                b_levels = torch.stack(b_levels)
                multiset_list.append(torchhd.multiset(torchhd.bind(id[:-ngram], b_levels)).unsqueeze(0))
            else:
                multiset_list.append(torchhd.multiset(torchhd.bind(id, level)))
        return torch.stack(multiset_list)

    def forward(self, channels):
        motion = channels[:, :, : self.input_size - 1]
        hr = channels[:, :, self.input_size - 1].unsqueeze(-1)

        enc_motion = self.motion_embed(motion)
        enc_hr = self.hr_embed(hr)
        enc = torch.cat([enc_motion, enc_hr], dim=2)
        hvs = self.batch_generic(self.keys.weight, enc, self.ngrams)
        return torchhd.hard_quantize(torchhd.multiset(hvs))

In [3]:
import joblib
ENCODER_PATH = "results/HDC_MLP/20250517_010909/encoder.pt"
CLASSIFIER_PATH = "results/HDC_MLP/20250517_010909/MLP_model.pkl"
INPUT_SHAPE = (1, 800, 7)
device = torch.device("cpu")
# === Load Encoder and Classifier ===
encoder = HdcGenericEncoder(input_size=7, out_dimension=5000, ngrams=7, device=device)
encoder.load_state_dict(torch.load(ENCODER_PATH, map_location=device))
encoder.eval()

classifier = joblib.load(CLASSIFIER_PATH)
print("Loaded encoder and classifier successfully")

Loaded encoder and classifier successfully


In [4]:
# -------------------------------
# Dummy preprocessing function
# -------------------------------
# === Dummy Sample ===
def generate_input():
    return torch.tensor(np.random.randn(*INPUT_SHAPE).astype(np.float32))


In [5]:
import time

# Warm-up
for _ in range(10):
    sample_input = torch.randn(INPUT_SHAPE).to(device)
    hv = encoder(sample_input)
    _ = classifier.predict(hv.numpy())

encode_times = []
classify_times = []

for _ in range(100):
    # Generate a new random input each time (or use same one if needed)
    input_tensor = torch.randn(1, 800, 7).to("cpu")

    # --- Encode ---
    start_enc = time.perf_counter()
    hv = encoder(input_tensor)
    end_enc = time.perf_counter()
    encode_times.append((end_enc - start_enc) * 1000)

    # --- Classify ---
    start_clf = time.perf_counter()
    _ = classifier.predict(hv.numpy())
    end_clf = time.perf_counter()
    classify_times.append((end_clf - start_clf) * 1000)

# --- Results ---
print(f"Encoding Time (avg ± std): {np.mean(encode_times):.2f} ± {np.std(encode_times):.2f} ms")
print(f"Classifier Time (avg ± std): {np.mean(classify_times):.2f} ± {np.std(classify_times):.2f} ms")
print(f"Total Inference Time (avg): {np.mean(encode_times) + np.mean(classify_times):.2f} ms")


Encoding Time (avg ± std): 70.87 ± 40.60 ms
Classifier Time (avg ± std): 7.68 ± 5.37 ms
Total Inference Time (avg): 78.55 ms


In [6]:
# -------------------------------
# Model Size
# -------------------------------
classifier_size = os.path.getsize(CLASSIFIER_PATH) / (1024 ** 2)
encoder_size = os.path.getsize(ENCODER_PATH) / (1024 ** 2)
print("Classifier Size:", classifier_size)
print("Encoder Size:", encoder_size)
print(f"Model Size: {(classifier_size + encoder_size):.2f} MB")

Classifier Size: 19.796714782714844
Encoder Size: 61.17036819458008
Model Size: 80.97 MB


In [7]:
import psutil
# --- Memory Usage ---
process = psutil.Process(os.getpid())
mem_before = process.memory_info().rss
_ = classifier.predict(encoder(generate_input()).numpy())
mem_after = process.memory_info().rss
delta_mem = (mem_after - mem_before) / (1024 ** 2)
total_mem = mem_after / (1024 ** 2)
print(f"Δ Memory Used During Inference: {delta_mem:.2f} MB")
print(f"Total Memory Usage After Inference: {total_mem:.2f} MB")

Δ Memory Used During Inference: 0.09 MB
Total Memory Usage After Inference: 963.67 MB


In [8]:
# %%
# --- Energy Estimate ---
ENERGY_ESTIMATED_WATT = 12
avg_time = np.mean(encode_times) + np.mean(classify_times)
inference_energy = (avg_time / 1000) * ENERGY_ESTIMATED_WATT
print(f"Estimated Energy per Inference: {inference_energy:.3f} J")


Estimated Energy per Inference: 0.943 J


In [9]:
import subprocess
import sys

result = subprocess.run(
    [sys.executable, "memory_benchmark_HYBRID_MLP.py"],
    stdout=subprocess.PIPE,
    stderr=subprocess.PIPE,
    text=True  # decode bytes to string
)

print("STDOUT:\n", result.stdout)
print("STDERR:\n", result.stderr)


STDOUT:
 Δ Memory Used During Inference: 0.03 MB
Total Memory Usage After Inference: 900.41 MB

STDERR:
 
