In [2]:
import torch
import torch.nn as nn
import torchvision.models as models
from torchvision import transforms, models, datasets
from torch.utils.data import DataLoader
import torch.optim as optim

In [3]:
import argparse

parser = argparse.ArgumentParser('ResNet')

parser.add_argument('--epoch', type=int, default=2)
parser.add_argument('--learning_rate', type=float, default=0.0005)
parser.add_argument('--batch', type=int, default=64)
parser.add_argument('--imagesize', type=int, nargs=2, default=[256, 256])
parser.add_argument('--seed', type=int, default=42)
parser.add_argument('--num_class', type=int, default=3)

args, unknown = parser.parse_known_args()

In [5]:
!curl -L -o dataset.zip\
  https://www.kaggle.com/api/v1/datasets/download/chetankv/dogs-cats-images

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  434M  100  434M    0     0  21.0M      0  0:00:20  0:00:20 --:--:-- 23.4M


In [6]:
train_dir = '/content/dataset/training_set'
test_dir = '/content/dataset/test_set'

print(args.imagesize)

train_transform = transforms.Compose([
    transforms.Resize(args.imagesize),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor()
])

test_transform = transforms.Compose([
    transforms.Resize(args.imagesize),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder(root=train_dir, transform=train_transform)
test_dataset = datasets.ImageFolder(root=test_dir, transform=test_transform)

train_loader = DataLoader(dataset = train_dataset, batch_size=args.batch, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch, shuffle=False)


[256, 256]


In [7]:
resnet18 = models.resnet18(weights=True)
resnet34 = models.resnet34(weights=None)
resnet50 = models.resnet50(weights=None)

In [8]:
resnet18.fc = nn.Linear(resnet18.fc.in_features, 2)
resnet34.fc = nn.Linear(resnet34.fc.in_features, 2)
resnet50.fc = nn.Linear(resnet50.fc.in_features, 2)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

resnet18.to(device)
resnet34.to(device)
resnet50.to(device)


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [9]:
criterion = nn.CrossEntropyLoss()

resnet18optimizer = optim.Adam(resnet18.parameters(), lr=args.learning_rate)
resnet34optimizer = optim.Adam(resnet34.parameters(), lr=args.learning_rate)
resnet50optimizer = optim.Adam(resnet50.parameters(), lr=args.learning_rate)

## Training ResNet18 Model

In [13]:
print('Training Resnet 18 Model')

for epoch in range(args.epoch):
    resnet18.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        resnet18optimizer.zero_grad()
        outputs = resnet18(images)
        loss = criterion(outputs, labels)
        loss.backward()
        resnet18optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    train_acc = 100 * correct / total

    print(f"Epoch [{epoch+1}/{args.epoch}] "
          f"Loss: {running_loss:.4f} "
          f"Train Acc: {train_acc:.2f}%")

    if epoch + 1 == args.epoch:
        torch.save(resnet18.state_dict(), "Resnet18.pth")


Training Resnet 18 Model
Epoch [1/2] Loss: 14.1309 Train Acc: 95.47%
Epoch [2/2] Loss: 7.4600 Train Acc: 97.67%


## Training Resnet 34

In [11]:
# print('Training Resnet 34 Model')

# for epoch in range(args.epoch):
#     resnet34.train()
#     running_loss = 0.0
#     correct = 0
#     total = 0

#     for images, labels in train_loader:
#         images, labels = images.to(device), labels.to(device)

#         resnet34optimizer.zero_grad()
#         outputs = resnet34(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         resnet34optimizer.step()

#         running_loss += loss.item()
#         _, preds = torch.max(outputs, 1)
#         correct += (preds == labels).sum().item()
#         total += labels.size(0)

#     train_acc = 100 * correct / total

#     print(f"Epoch [{epoch+1}/{args.epoch}] "
#           f"Loss: {running_loss:.4f} "
#           f"Train Acc: {train_acc:.2f}%")

#     if epoch + 1 == args.epoch:
#         torch.save(resnet34.state_dict(), "Resnet34.pth")


## Training Resnet 50

In [12]:
# print('Training Resnet 50 Model')

# for epoch in range(args.epoch):
#     resnet50.train()
#     running_loss = 0.0
#     correct = 0
#     total = 0

#     for images, labels in train_loader:
#         images, labels = images.to(device), labels.to(device)

#         resnet50optimizer.zero_grad()
#         outputs = resnet50(images)
#         loss = criterion(outputs, labels)
#         loss.backward()
#         resnet50optimizer.step()

#         running_loss += loss.item()
#         _, preds = torch.max(outputs, 1)
#         correct += (preds == labels).sum().item()
#         total += labels.size(0)

#     train_acc = 100 * correct / total

#     print(f"Epoch [{epoch+1}/{args.epoch}] "
#           f"Loss: {running_loss:.4f} "
#           f"Train Acc: {train_acc:.2f}%")

#     if epoch + 1 == args.epoch:
#         torch.save(resnet50.state_dict(), "Resnet50.pth")


In [None]:
!pip install --upgrade "qai-hub[torch]"
!qai-hub configure --api_token mglpng0fw56ohxnp58sx3smm1nw6k5jjl0ojhswr

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

checkpoint = torch.load('/content/Resnet18.pth', map_location=device)
resnet18.load_state_dict(checkpoint)
resnet18.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [20]:
import qai_hub as hub

devices = [
    # hub.Device('Dragonwing IQ-9075 EVK'),
    # hub.Device('QCS8550 (Proxy)'),
    # hub.Device('Snapdragon X Elite CRD'),
    # hub.Device('Samsung Galaxy S24 (Family)'),
    hub.Device('Samsung Galaxy S24 Ultra')
]

models = [
    'Resnet18',
    # 'Resnet34',
    # 'Resnet50'
]

print(hub.get_devices())

[Device(name='Google Pixel 3 (Family)', os='10', attributes=['os:android', 'framework:tflite', 'framework:onnx', 'abi:aarch64-android', 'vendor:google', 'format:phone', 'chipset:qualcomm-snapdragon-845', 'chipset:sdm845', 'hexagon:v65', 'soc-model:1']), Device(name='Google Pixel 3', os='10', attributes=['os:android', 'framework:tflite', 'framework:onnx', 'abi:aarch64-android', 'vendor:google', 'format:phone', 'chipset:qualcomm-snapdragon-845', 'chipset:sdm845', 'hexagon:v65', 'soc-model:1']), Device(name='Google Pixel 3a', os='10', attributes=['os:android', 'framework:tflite', 'framework:onnx', 'abi:aarch64-android', 'vendor:google', 'format:phone', 'chipset:qualcomm-snapdragon-670', 'chipset:sdm670', 'hexagon:v65', 'soc-model:6']), Device(name='Google Pixel 3 XL', os='10', attributes=['os:android', 'framework:tflite', 'framework:onnx', 'abi:aarch64-android', 'vendor:google', 'format:phone', 'chipset:qualcomm-snapdragon-845', 'chipset:sdm845', 'hexagon:v65', 'soc-model:1']), Device(nam

### This is to fix the following error

Unable to load torch model via torch.jit.load().  We recommend using at least torch 1.11 to trace a pytorch model. You can install the latest recommended torch via: `pip install "qai-hub[torch]"`.


In [21]:
print(torch.__version__)

2.9.0+cu126


Tytorch model is > 1.11, still just to double check running this command as said by ai_hub

In [22]:
!pip install "qai-hub[torch]"



## Compile job

3 models, 5 Devices for each model

15 jobs in total

In [23]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

h, w = args.imagesize
print("Height:{0}, Width:{1}".format(h, w))

input_shape: tuple[int, ...] = (1, 3, h, w)
example_input = torch.rand(input_shape).to(device)

pt_model = torch.jit.trace(resnet18, example_input)

jobs = []

for device in devices:
  for model in models:
    name = model + "_" + device.name
    print("Submitting compile job for " + name)

    job = hub.submit_compile_job(
        pt_model,
        name=name,
        device=device,
        input_specs=dict(image=input_shape),
    )
    assert isinstance(job, hub.CompileJob)
    jobs.append(job)


Height:256, Width:256
Submitting compile job for Resnet18_Samsung Galaxy S24 Ultra
Uploading tmpjnnqn9d4.pt


100%|[34m██████████[0m| 42.8M/42.8M [00:02<00:00, 18.8MB/s]


Scheduled compile job (jpry9xneg) successfully. To see the status and results:
    https://workbench.aihub.qualcomm.com/jobs/jpry9xneg/



In [24]:
profile_jobs = []

for device in devices:
  for job in jobs:
    name = model + "_" + device.name
    print("Submitting profiling job for : " + name)

    pf_job = hub.submit_profile_job(
        model=job.get_target_model(),
        device=device,
        name=name
    )

    assert isinstance(pf_job, hub.ProfileJob)
    profile_jobs.append(pf_job)


Submitting profiling job for : Resnet18_Samsung Galaxy S24 Ultra
Waiting for compile job (jpry9xneg) completion. Type Ctrl+C to stop waiting at any time.
    ❌ FAILED               Unable to load torch model via torch.jit.load().  We recommend using at least torch 1.11 to trace a pytorch model. You can install the latest recommended torch via: `pip install "qai-hub[torch]"`.            


UserError: Model passed in was 'None' (make sure this is not the target of a failed compile job)

In [18]:
print(torch.__version__)

2.9.0+cu126
