<a href="https://colab.research.google.com/github/itsPronay/HSIC/blob/main/ResNet18_qai_hub_benchmark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [63]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader
import torch.optim as optim
from torch.utils.data import Subset

In [13]:
import argparse

parser = argparse.ArgumentParser('ResNet')

parser.add_argument('--model', choices=['resnet18', 'resnet34', 'resnet50', 'all'], default='resnet18')
parser.add_argument('--epoch', type=int, default=1)
parser.add_argument('--learning_rate', type=float, default=0.0005)
parser.add_argument('--batch', type=int, default=32)
parser.add_argument('--imagesize', type=int, nargs='+', default=[112, 224, 336, 448, 560, 672, 784, 896, 1008, 1120])
# parser.add_argument('--imagesize', type=int, nargs='+', default=[112, 224, 336])
parser.add_argument('--seed', type=int, default=42)
parser.add_argument('--num_class', type=int, default=2)

args, unknown = parser.parse_known_args()

In [3]:
!curl -L -o dataset.zip\
  https://www.kaggle.com/api/v1/datasets/download/chetankv/dogs-cats-images

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  434M  100  434M    0     0  26.3M      0  0:00:16  0:00:16 --:--:-- 29.5M


In [64]:
#!unzip /content/dataset.zip

Helper function to control batch size based on image size

In [14]:
def getbatchSize(imagesize):
  if imagesize < 500:
    batch = 32
  elif imagesize < 800:
    batch = 12
  else:
    batch = 4
  return batch

In [15]:
train_dir = '/content/dataset/training_set'

loaders = []

for res in args.imagesize:
  train_transform = transforms.Compose([
      transforms.Resize((res, res)),
      transforms.RandomHorizontalFlip(),
      transforms.ToTensor()
  ])

  train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
  train_subset = Subset(train_dataset, range(50))

  batchsize = getbatchSize(res)
  train_loader = DataLoader(dataset = train_subset, batch_size=batchsize, shuffle=True)

  loaders.append(train_loader)

Paths where the model's would be saved

In [16]:
resnet_18_savedpath = []
resnet_34_savedpath = []
resnet_50_savedpath = []

In [17]:
def trainResnet18(size, train_loader):
  resnet18 = models.resnet18(weights=True)
  resnet18.fc = nn.Linear(resnet18.fc.in_features, args.num_class)
  resnet18.to(device)

  criterion = nn.CrossEntropyLoss()
  resnet18optimizer = optim.Adam(resnet18.parameters(), lr=args.learning_rate)

  print('Training Resnet 18 Model on size ' + str(size) + " pixel")
  for epoch in range(args.epoch):
      resnet18.train()
      running_loss = 0.0
      correct = 0
      total = 0

      for images, labels in train_loader:
          images, labels = images.to(device), labels.to(device)

          resnet18optimizer.zero_grad()
          outputs = resnet18(images)
          loss = criterion(outputs, labels)
          loss.backward()
          resnet18optimizer.step()

          running_loss += loss.item()
          _, preds = torch.max(outputs, 1)
          correct += (preds == labels).sum().item()
          total += labels.size(0)

      train_acc = 100 * correct / total

      print(f"Epoch [{epoch+1}/{args.epoch}] "
            f"Loss: {running_loss:.4f} "
            f"Train Acc: {train_acc:.2f}%")

      if epoch + 1 == args.epoch:
          save_dir = "Resnet18_" + str(size) + ".pth"
          torch.save(resnet18.state_dict(), save_dir)
          resnet_18_savedpath.append(save_dir)

### Train Function for Resnet34


In [18]:
def trainResnet34(size, train_loader):
  resnet34 = models.resnet34(weights=True)
  resnet34.fc = nn.Linear(resnet34.fc.in_features, args.num_class)
  resnet34.to(device)

  criterion = nn.CrossEntropyLoss()
  resnet34optimizer = optim.Adam(resnet34.parameters(), lr=args.learning_rate)

  print('Training Resnet 34 Model on size ' + str(size) + " pixel")
  for epoch in range(args.epoch):
      resnet18.train()
      running_loss = 0.0
      correct = 0
      total = 0

      for images, labels in train_loader:
          images, labels = images.to(device), labels.to(device)

          resnet34optimizer.zero_grad()
          outputs = resnet34(images)
          loss = criterion(outputs, labels)
          loss.backward()
          resnet34optimizer.step()

          running_loss += loss.item()
          _, preds = torch.max(outputs, 1)
          correct += (preds == labels).sum().item()
          total += labels.size(0)

      train_acc = 100 * correct / total

      print(f"Epoch [{epoch+1}/{args.epoch}] "
            f"Loss: {running_loss:.4f} "
            f"Train Acc: {train_acc:.2f}%")

      if epoch + 1 == args.epoch:
          save_dir = "Resnet34_" + str(size) + ".pth"
          torch.save(resnet34.state_dict(), save_dir)
          resnet_34_savedpath.append(save_dir)

### Training function for Resnet 50

In [19]:
def trainResnet50(size, train_loader):
  resnet50 = models.resnet50(weights=True)
  resnet50.fc = nn.Linear(resnet50.fc.in_features, args.num_class)
  resnet50.to(device)

  criterion = nn.CrossEntropyLoss()
  resnet50optimizer = optim.Adam(resnet50.parameters(), lr=args.learning_rate)

  print('Training Resnet 50 Model on size ' + str(size) + " pixel")
  for epoch in range(args.epoch):
      resnet50.train()
      running_loss = 0.0
      correct = 0
      total = 0

      for images, labels in train_loader:
          images, labels = images.to(device), labels.to(device)

          resnet50optimizer.zero_grad()
          outputs = resnet50(images)
          loss = criterion(outputs, labels)
          loss.backward()
          resnet50optimizer.step()

          running_loss += loss.item()
          _, preds = torch.max(outputs, 1)
          correct += (preds == labels).sum().item()
          total += labels.size(0)

      train_acc = 100 * correct / total

      print(f"Epoch [{epoch+1}/{args.epoch}] "
            f"Loss: {running_loss:.4f} "
            f"Train Acc: {train_acc:.2f}%")

      if epoch + 1 == args.epoch:
          save_dir = "Resnet50_" + str(size) + ".pth"
          torch.save(resnet50.state_dict(), save_dir)
          resnet_50_savedpath.append(save_dir)

In [20]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for train_loader in loaders:
  image_size = train_loader.dataset[0][0].shape[1]
  if args.model == 'resnet18':
    trainResnet18(image_size, train_loader)
  elif args.model == 'resnet34':
    trainResnet34(image_size, train_loader)
  elif args.model == 'resnet50':
    trainResnet50(image_size, train_loader)
  elif args.model == 'all':
    trainResnet18(image_size, train_loader)
    trainResnet34(image_size, train_loader)
    trainResnet50(image_size, train_loader)
  else:
    raise ValueError('Invalid model choice')


Training Resnet 18 Model on size 112 pixel
Epoch [1/1] Loss: 59.6501 Train Acc: 89.92%
Training Resnet 18 Model on size 224 pixel
Epoch [1/1] Loss: 34.3784 Train Acc: 94.72%
Training Resnet 18 Model on size 336 pixel
Epoch [1/1] Loss: 30.5674 Train Acc: 94.86%
Training Resnet 18 Model on size 448 pixel
Epoch [1/1] Loss: 32.2565 Train Acc: 94.86%
Training Resnet 18 Model on size 560 pixel
Epoch [1/1] Loss: 146.2335 Train Acc: 91.09%
Training Resnet 18 Model on size 672 pixel
Epoch [1/1] Loss: 175.6554 Train Acc: 89.22%
Training Resnet 18 Model on size 784 pixel
Epoch [1/1] Loss: 204.3293 Train Acc: 87.22%
Training Resnet 18 Model on size 896 pixel
Epoch [1/1] Loss: 1257.6325 Train Acc: 65.54%
Training Resnet 18 Model on size 1008 pixel
Epoch [1/1] Loss: 1306.3796 Train Acc: 61.76%
Training Resnet 18 Model on size 1120 pixel
Epoch [1/1] Loss: 1346.6319 Train Acc: 58.73%


In [21]:
!pip install "qai-hub[torch]"
!qai-hub configure --api_token vqh9wt98ef7yptfydrf1tiuf6i5klo3q74gu52kv

Collecting qai-hub[torch]
  Downloading qai_hub-0.44.0-py3-none-any.whl.metadata (2.6 kB)
Collecting backoff>=2.2 (from qai-hub[torch])
  Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)
Collecting s3transfer<0.14,>=0.10.3 (from qai-hub[torch])
  Downloading s3transfer-0.13.1-py3-none-any.whl.metadata (1.7 kB)
Collecting semver>=3.0 (from qai-hub[torch])
  Downloading semver-3.0.4-py3-none-any.whl.metadata (6.8 kB)
Collecting botocore<2.0a.0,>=1.37.4 (from s3transfer<0.14,>=0.10.3->qai-hub[torch])
  Downloading botocore-1.42.46-py3-none-any.whl.metadata (5.9 kB)
Collecting jmespath<2.0.0,>=0.7.1 (from botocore<2.0a.0,>=1.37.4->s3transfer<0.14,>=0.10.3->qai-hub[torch])
  Downloading jmespath-1.1.0-py3-none-any.whl.metadata (7.6 kB)
Downloading backoff-2.2.1-py3-none-any.whl (15 kB)
Downloading s3transfer-0.13.1-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.3/85.3 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading semver

## Moving models to CPU as well, as ai hub expects CPU tracing, Tracing it on gpu leads to failure of compile job

Error - Unable to load torch model via torch.jit.load().  We recommend using at least torch 1.11 to trace a pytorch model. You can install the latest recommended torch via: `pip install "qai-hub[torch]"`.


In [38]:
loaded_models = []

for path in resnet_18_savedpath:
  if args.model == 'resnet18' or args.model == 'all':
    model = models.resnet18(weights=None)
    model.fc = nn.Linear(model.fc.in_features, args.num_class)

    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint)

    model = model.to("cpu").eval()
    loaded_models.append((path[:-4], model))

for path in resnet_34_savedpath:
  if args.model == 'resnet34' or args.model == 'all':
    model = models.resnet34(weights=None)
    model.fc = nn.Linear(model.fc.in_features, args.num_class)

    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint)

    model = model.to("cpu").eval()
    loaded_models.append((path[:-4], model))

for path in resnet_50_savedpath:
  if args.model != 'resnet50' or args.model != 'all':
    model = models.resnet50(weights=None)
    model.fc = nn.Linear(model.fc.in_features, args.num_class)

    checkpoint = torch.load(path, map_location=device)
    model.load_state_dict(checkpoint)

    model = model.to("cpu").eval()
    loaded_models.append((path[:-4], model))

In [39]:
print("******************************************")
print('Loaded Models: ')
print("******************************************")
for name, _ in loaded_models:
  print(name)

******************************************
Loaded Models: 
******************************************
Resnet18_112
Resnet18_224
Resnet18_336
Resnet18_448
Resnet18_560
Resnet18_672
Resnet18_784
Resnet18_896
Resnet18_1008
Resnet18_1120


In [40]:
import qai_hub as hub

devices = [
    hub.Device('Dragonwing IQ-9075 EVK'),
    # hub.Device('QCS8550 (Proxy)'),
    # hub.Device('Google Pixel 10 Pro XL'),
    # hub.Device('Samsung Galaxy S24 (Family)'),
    hub.Device('Samsung Galaxy S24 Ultra')
]

In [44]:
def get_image_res(s: str) -> int:
    """
    Returns the integer after the last underscore in a string.
    Example: "res_334" -> 334
    """
    try:
        return int(s.split('_')[-1])
    except ValueError:
        raise ValueError(f"No valid integer found after underscore in '{s}'")


In [55]:
traced_models = []

for name, model in loaded_models:
  res = get_image_res(name)
  input_shape: tuple[int, ...] = (1, 3, res, res)
  example_input = torch.rand(input_shape)

  model_name = 'traced_' + name
  traced_model = torch.jit.trace(model, example_input)

  traced_models.append((res, name, traced_model, input_shape))


## Compile job count


In [54]:
device_count = len(devices)
model_count = len(traced_models)

print('Devices: ' + str(device_count))
print('Model count: ' + str(model_count))
print('Compile jobs count ' + str(device_count * model_count))

Devices: 5
Model count: 10
Compile jobs count 50


In [58]:

compile_jobs = []

for device in devices:
  for res, name, traced_model, input_shape in traced_models:
    name_formatted = name + "_" + device.name
    print("Submitting compile job for: " + name_formatted)

    job = hub.submit_compile_job(
        model=traced_model,
        name=name_formatted,
        device=device,
        input_specs=dict(image=input_shape),
    )
    assert isinstance(job, hub.CompileJob)
    compile_jobs.append((res, name, job))

Submitting compile job for: Resnet18_112_Dragonwing IQ-9075 EVK
Uploading tmpe6bli7mg.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 25.3MB/s]


Scheduled compile job (jgkydww2p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgkydww2p/

Submitting compile job for: Resnet18_224_Dragonwing IQ-9075 EVK
Uploading tmpcfc0zhr3.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.7MB/s]


Scheduled compile job (j5q2wxx45) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j5q2wxx45/

Submitting compile job for: Resnet18_336_Dragonwing IQ-9075 EVK
Uploading tmpus6sp8s4.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.3MB/s]


Scheduled compile job (jglk7998p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jglk7998p/

Submitting compile job for: Resnet18_448_Dragonwing IQ-9075 EVK
Uploading tmpuq1xu6n_.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.1MB/s]


Scheduled compile job (j561v990p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j561v990p/

Submitting compile job for: Resnet18_560_Dragonwing IQ-9075 EVK
Uploading tmpsbov0nu3.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.9MB/s]


Scheduled compile job (jp3m8lllg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp3m8lllg/

Submitting compile job for: Resnet18_672_Dragonwing IQ-9075 EVK
Uploading tmpveheqzlq.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.8MB/s]


Scheduled compile job (jgovm77x5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgovm77x5/

Submitting compile job for: Resnet18_784_Dragonwing IQ-9075 EVK
Uploading tmpug8de7qw.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.3MB/s]


Scheduled compile job (jpvw4yyjg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpvw4yyjg/

Submitting compile job for: Resnet18_896_Dragonwing IQ-9075 EVK
Uploading tmppjlr0qco.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.2MB/s]


Scheduled compile job (jgjl166xp) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgjl166xp/

Submitting compile job for: Resnet18_1008_Dragonwing IQ-9075 EVK
Uploading tmpjbpbesx7.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.8MB/s]


Scheduled compile job (jpev20015) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpev20015/

Submitting compile job for: Resnet18_1120_Dragonwing IQ-9075 EVK
Uploading tmpfrkvspnm.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.9MB/s]


Scheduled compile job (jgz7wqqkp) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgz7wqqkp/

Submitting compile job for: Resnet18_112_QCS8550 (Proxy)
Uploading tmpf745u5ke.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.2MB/s]


Scheduled compile job (j5w9x0k6p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j5w9x0k6p/

Submitting compile job for: Resnet18_224_QCS8550 (Proxy)
Uploading tmp4r9t_sb3.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.1MB/s]


Scheduled compile job (jg9487rl5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jg9487rl5/

Submitting compile job for: Resnet18_336_QCS8550 (Proxy)
Uploading tmp38co4ewv.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.0MB/s]


Scheduled compile job (jp183k92g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp183k92g/

Submitting compile job for: Resnet18_448_QCS8550 (Proxy)
Uploading tmpmuucu86m.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.3MB/s]


Scheduled compile job (jgdv0ykeg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgdv0ykeg/

Submitting compile job for: Resnet18_560_QCS8550 (Proxy)
Uploading tmpgqahrf8d.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:02<00:00, 22.3MB/s]


Scheduled compile job (j5w9x0k3p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j5w9x0k3p/

Submitting compile job for: Resnet18_672_QCS8550 (Proxy)
Uploading tmpfxhevfy8.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.4MB/s]


Scheduled compile job (jg9487rw5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jg9487rw5/

Submitting compile job for: Resnet18_784_QCS8550 (Proxy)
Uploading tmprwiooers.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:02<00:00, 18.5MB/s]


Scheduled compile job (jp183k98g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp183k98g/

Submitting compile job for: Resnet18_896_QCS8550 (Proxy)
Uploading tmpcq2tske0.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.0MB/s]


Scheduled compile job (jgdv0ykrg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgdv0ykrg/

Submitting compile job for: Resnet18_1008_QCS8550 (Proxy)
Uploading tmps3e0_fir.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.2MB/s]


Scheduled compile job (j57d61mv5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j57d61mv5/

Submitting compile job for: Resnet18_1120_QCS8550 (Proxy)
Uploading tmpqf30vxzq.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 28.4MB/s]


Scheduled compile job (jp4w8678g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp4w8678g/

Submitting compile job for: Resnet18_112_Google Pixel 10 Pro XL
Uploading tmpdnul4d_6.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 25.0MB/s]


Scheduled compile job (jpx1m8q3g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpx1m8q3g/

Submitting compile job for: Resnet18_224_Google Pixel 10 Pro XL
Uploading tmpbtwijird.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 28.4MB/s]


Scheduled compile job (j5mz417dp) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j5mz417dp/

Submitting compile job for: Resnet18_336_Google Pixel 10 Pro XL
Uploading tmpqoy02cdm.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.3MB/s]


Scheduled compile job (jgnexd4kg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgnexd4kg/

Submitting compile job for: Resnet18_448_Google Pixel 10 Pro XL
Uploading tmpnyzisgp6.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 22.8MB/s]


Scheduled compile job (jpry9mr0g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpry9mr0g/

Submitting compile job for: Resnet18_560_Google Pixel 10 Pro XL
Uploading tmp3cpgryp_.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.5MB/s]


Scheduled compile job (jp2mjq1r5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp2mjq1r5/

Submitting compile job for: Resnet18_672_Google Pixel 10 Pro XL
Uploading tmplo9iag1o.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.0MB/s]


Scheduled compile job (jpydnkl8p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpydnkl8p/

Submitting compile job for: Resnet18_784_Google Pixel 10 Pro XL
Uploading tmpzwzxptmw.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.5MB/s]


Scheduled compile job (jp0rk8w9p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp0rk8w9p/

Submitting compile job for: Resnet18_896_Google Pixel 10 Pro XL
Uploading tmpdriwkg0x.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.2MB/s]


Scheduled compile job (jp878dnk5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp878dnk5/

Submitting compile job for: Resnet18_1008_Google Pixel 10 Pro XL
Uploading tmp49rhzgkj.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.3MB/s]


Scheduled compile job (jgkydw1wp) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgkydw1wp/

Submitting compile job for: Resnet18_1120_Google Pixel 10 Pro XL
Uploading tmpd_0m9ha7.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.1MB/s]


Scheduled compile job (j5q2wxnn5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j5q2wxnn5/

Submitting compile job for: Resnet18_112_Samsung Galaxy S24 (Family)
Uploading tmpe9wf7adr.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.0MB/s]


Scheduled compile job (jglk79djp) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jglk79djp/

Submitting compile job for: Resnet18_224_Samsung Galaxy S24 (Family)
Uploading tmpl9vx_e98.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:02<00:00, 22.2MB/s]


Scheduled compile job (j561v9x6p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j561v9x6p/

Submitting compile job for: Resnet18_336_Samsung Galaxy S24 (Family)
Uploading tmpro7ngw02.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:02<00:00, 22.4MB/s]


Scheduled compile job (jp3m8ld3g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp3m8ld3g/

Submitting compile job for: Resnet18_448_Samsung Galaxy S24 (Family)
Uploading tmpsmn34ud3.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.8MB/s]


Scheduled compile job (jgovm7xq5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgovm7xq5/

Submitting compile job for: Resnet18_560_Samsung Galaxy S24 (Family)
Uploading tmpfsjj2x1t.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.8MB/s]


Scheduled compile job (jpvw4yjkg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpvw4yjkg/

Submitting compile job for: Resnet18_672_Samsung Galaxy S24 (Family)
Uploading tmp4owvavfi.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 27.6MB/s]


Scheduled compile job (jgjl16jvp) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgjl16jvp/

Submitting compile job for: Resnet18_784_Samsung Galaxy S24 (Family)
Uploading tmp6n7vjwc2.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.1MB/s]


Scheduled compile job (jpev20jo5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpev20jo5/

Submitting compile job for: Resnet18_896_Samsung Galaxy S24 (Family)
Uploading tmp9u6tbla0.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.0MB/s]


Scheduled compile job (jgz7wq1op) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgz7wq1op/

Submitting compile job for: Resnet18_1008_Samsung Galaxy S24 (Family)
Uploading tmpda9rqnc1.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.8MB/s]


Scheduled compile job (j5w9x0j3p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j5w9x0j3p/

Submitting compile job for: Resnet18_1120_Samsung Galaxy S24 (Family)
Uploading tmp5ih6h8f1.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.4MB/s]


Scheduled compile job (jg94876w5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jg94876w5/

Submitting compile job for: Resnet18_112_Samsung Galaxy S24 Ultra
Uploading tmp1iadaqlb.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.7MB/s]


Scheduled compile job (jp183kr8g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp183kr8g/

Submitting compile job for: Resnet18_224_Samsung Galaxy S24 Ultra
Uploading tmppz5bbkzx.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.8MB/s]


Scheduled compile job (jgdv0yjrg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgdv0yjrg/

Submitting compile job for: Resnet18_336_Samsung Galaxy S24 Ultra
Uploading tmpg49rdlzd.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.8MB/s]


Scheduled compile job (j57d61qv5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j57d61qv5/

Submitting compile job for: Resnet18_448_Samsung Galaxy S24 Ultra
Uploading tmp3rulq52q.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.9MB/s]


Scheduled compile job (jp4w86z8g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp4w86z8g/

Submitting compile job for: Resnet18_560_Samsung Galaxy S24 Ultra
Uploading tmpt62tl7l3.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 24.2MB/s]


Scheduled compile job (jpx1m8w3g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpx1m8w3g/

Submitting compile job for: Resnet18_672_Samsung Galaxy S24 Ultra
Uploading tmp4f1islfq.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.8MB/s]


Scheduled compile job (j5mz41jdp) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j5mz41jdp/

Submitting compile job for: Resnet18_784_Samsung Galaxy S24 Ultra
Uploading tmpnczkljcv.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.9MB/s]


Scheduled compile job (jgnexdjkg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgnexdjkg/

Submitting compile job for: Resnet18_896_Samsung Galaxy S24 Ultra
Uploading tmpihvmtvci.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 26.1MB/s]


Scheduled compile job (jpry9mz0g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpry9mz0g/

Submitting compile job for: Resnet18_1008_Samsung Galaxy S24 Ultra
Uploading tmp7uwahioc.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:01<00:00, 23.1MB/s]


Scheduled compile job (jpydnk98p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpydnk98p/

Submitting compile job for: Resnet18_1120_Samsung Galaxy S24 Ultra
Uploading tmp0sj4liei.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:02<00:00, 22.4MB/s]


Scheduled compile job (jp0rk8n9p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp0rk8n9p/



In [59]:
profile_jobs = []

for res, name, job in compile_jobs:
    device = job.device
    formattedName = job.name + device.name

    print("Submitting profiling job for:" + formattedName)

    pf_job = hub.submit_profile_job(
        model=job.get_target_model(),
        device=device,
        name=job.name + "_profiling"
    )

    assert isinstance(pf_job, hub.ProfileJob)
    profile_jobs.append((res, name, pf_job))


Submitting profiling job for:Resnet18_112_Dragonwing IQ-9075 EVKDragonwing IQ-9075 EVK
Scheduled profile job (j561v9k6p) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/j561v9k6p/

Submitting profiling job for:Resnet18_224_Dragonwing IQ-9075 EVKDragonwing IQ-9075 EVK
Scheduled profile job (jp3m8ly3g) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jp3m8ly3g/

Submitting profiling job for:Resnet18_336_Dragonwing IQ-9075 EVKDragonwing IQ-9075 EVK
Scheduled profile job (jgovm7jq5) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jgovm7jq5/

Submitting profiling job for:Resnet18_448_Dragonwing IQ-9075 EVKDragonwing IQ-9075 EVK
Scheduled profile job (jpvw4y3kg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpvw4y3kg/

Submitting profiling job for:Resnet18_560_Dragonwing IQ-9075 EVKDragonwing IQ-9075 EVK
Scheduled profile

In [62]:
import numpy as np
import pandas as pd

def us_to_ms(x):
    return x / 1e3

def bytes_to_mb(x):
    return x / (1024 ** 2)

def extract_resolution(model_name):
    """Extract resolution from model name (e.g., 'Resnet18_112' -> 112)"""
    parts = model_name.split("_")
    if len(parts) >= 2:
        try:
            return int(parts[-1])
        except ValueError:
            # If last part is not a number, try second-to-last
            if len(parts) >= 3:
                try:
                    return int(parts[-2])
                except ValueError:
                    return None
    return None

def extract_architecture(model_name):
    """Extract architecture from model name (e.g., 'Resnet18_112' -> 'Resnet18')"""
    parts = model_name.split("_")
    if len(parts) >= 2:
        return parts[0]
    return model_name

summary_rows = []
util_rows = []
memory_rows = []
bottleneck_rows = []

for res, name, pf_job in profile_jobs:
    result = pf_job.download_profile()
    s = result["execution_summary"]
    d = pd.DataFrame(result["execution_detail"])
    times = np.array(s["all_inference_times"])

    # Use the 'name' from the tuple (e.g., "Resnet18_112")
    # and 'res' which is the resolution value
    model_name = name
    device_name = pf_job.device.name
    resolution = res  # Use the resolution from the tuple directly
    architecture = extract_architecture(model_name)

    # -------------------------------
    # Table 1: End-to-End Performance
    # -------------------------------
    summary_rows.append({
        "Architecture": architecture,
        "Resolution": resolution,
        "Device": device_name,
        "Mean Latency (ms)": round(us_to_ms(times.mean()), 4),
        "Median Latency (ms)": round(us_to_ms(np.median(times)), 4),
        "P50 Latency (ms)": round(us_to_ms(np.percentile(times, 50)), 4),
        "P95 Latency (ms)": round(us_to_ms(np.percentile(times, 95)), 4),
        "P99 Latency (ms)": round(us_to_ms(np.percentile(times, 99)), 4),
        "Std Dev (ms)": round(us_to_ms(times.std()), 4),
        "Cold Start (ms)": round(us_to_ms(s["first_load_time"]), 4),
        "Warm Start (ms)": round(us_to_ms(s["warm_load_time"]), 4),
        "Speedup (Cold→Warm)": round(s["first_load_time"] / s["warm_load_time"], 2),
    })

    # -------------------------------
    # Table 2: Memory Footprint
    # -------------------------------
    memory_rows.append({
        "Architecture": architecture,
        "Resolution": resolution,
        "Device": device_name,
        "Inference Peak (MB)": round(bytes_to_mb(s["estimated_inference_peak_memory"]), 2),
        "Cold Start Peak (MB)": round(bytes_to_mb(s["first_load_peak_memory"]), 2),
        "Warm Start Peak (MB)": round(bytes_to_mb(s["warm_load_peak_memory"]), 2),
        "Memory Reduction Cold→Warm (%)": round(
            (1 - s["warm_load_peak_memory"] / s["first_load_peak_memory"]) * 100, 2
        ),
        "Memory Reduction Warm→Inference (%)": round(
            (1 - s["estimated_inference_peak_memory"] / s["warm_load_peak_memory"]) * 100, 2
        ),
    })

    # -------------------------------
    # Table 3: Accelerator Utilization
    # -------------------------------
    total_time = d["execution_time"].sum()
    util = d.groupby("compute_unit")["execution_time"].sum() / total_time * 100

    util_rows.append({
        "Architecture": architecture,
        "Resolution": resolution,
        "Device": device_name,
        "CPU (%)": round(util.get("CPU", 0.0), 2),
        "GPU (%)": round(util.get("GPU", 0.0), 2),
        "NPU (%)": round(util.get("NPU", 0.0), 2),
        "Total Time (ms)": round(us_to_ms(total_time), 2),
        "Dominant Unit": util.idxmax() if len(util) > 0 else "N/A",
    })

    # -------------------------------
    # Table 4: Performance Bottlenecks
    # -------------------------------
    # Find top 5 slowest operations
    top_ops = d.nlargest(5, "execution_time")[["name", "type", "compute_unit", "execution_time"]]

    bottleneck_rows.append({
        "Architecture": architecture,
        "Resolution": resolution,
        "Device": device_name,
        "Slowest Op": top_ops.iloc[0]["name"].split("/")[-1],
        "Op Type": top_ops.iloc[0]["type"],
        "Op Time (ms)": round(us_to_ms(top_ops.iloc[0]["execution_time"]), 4),
        "Op Unit": top_ops.iloc[0]["compute_unit"],
        "Top 5 Ops Time (ms)": round(us_to_ms(top_ops["execution_time"].sum()), 2),
        "% of Total": round(top_ops["execution_time"].sum() / total_time * 100, 2),
    })

# Create tables (rows will be in the same order as profile_jobs)
table_perf = pd.DataFrame(summary_rows)
table_mem = pd.DataFrame(memory_rows)
table_util = pd.DataFrame(util_rows)
table_bottleneck = pd.DataFrame(bottleneck_rows)

# Display tables (Markdown format)
print("\n" + "="*140)
print("TABLE 1: End-to-End Performance")
print("="*140)
print(table_perf.to_markdown(index=False))

print("\n" + "="*140)
print("TABLE 2: Memory Footprint")
print("="*140)
print(table_mem.to_markdown(index=False))

print("\n" + "="*140)
print("TABLE 3: Accelerator Utilization")
print("="*140)
print(table_util.to_markdown(index=False))

print("\n" + "="*140)
print("TABLE 4: Performance Bottlenecks")
print("="*140)
print(table_bottleneck.to_markdown(index=False))

print(f"\n✓ Total profile jobs: {len(profile_jobs)}")
print(f"✓ Rows in each table: {len(table_perf)}")
print(f"✓ Architectures: {table_perf['Architecture'].unique().tolist()}")
print(f"✓ Resolutions: {sorted(table_perf['Resolution'].unique().tolist())}")

Waiting for profile job (jgovm7jq5) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Waiting for profile job (jpvw4y3kg) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Waiting for profile job (jgjl16xvp) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Waiting for profile job (jpev209o5) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Waiting for profile job (jgz7wqeop) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Waiting for profile job (j5w9x0o3p) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Waiting for profile job (jg9487vw5) completion. Type Ctrl+C to stop waiting at any time.
    ✅ SUCCESS                          
Waiting for profile job (jp183k08g) completion. Type Ctrl+C to stop waiting at any time.
 