# Offline Step: Generation of Training Artifacts and Dataset Download

## Part 1: Training Artficat Generation

In [None]:
%pip install --upgrade pip
%pip install onnxruntime torch torchvision medmnist tqdm
%pip install cerberus flatbuffers h5py numpy onnx packaging protobuf sympy setuptools
%pip install -i https://aiinfra.pkgs.visualstudio.com/PublicPackages/_packaging/ORT/pypi/simple/ onnxruntime-training-cpu
%pip install netron

In [None]:
# largely borrowed from https://www.youtube.com/watch?v=u7YCaiHOC9o

import torch
import os
import torchvision
import netron

os.makedirs("public", exist_ok=True)
os.makedirs("public/training_artifacts", exist_ok=True)

# load weights from pretrained resnet model
model = torchvision.models.resnet50(
    weights=torchvision.models.ResNet50_Weights.DEFAULT
)


# changing last layer to only output one of 8 classes since bloodMNIST only has 8
num_fc_feats = model.fc.in_features
model.fc = torch.nn.Linear(num_fc_feats, 8)

# convert the model to ONNX
model.train()
model_name = "resnet50"
torch.onnx.export(model,
                  torch.randn(1, 3, 224, 224),
                  f"public/training_artifacts/{model_name}.onnx",
                  input_names=["input"],
                  output_names=["output"],
                  dynamic_axes={"input": {0: "batch"}, "output": {0: "batch"}},
                  export_params=True,
                  do_constant_folding=False
)

netron.start(f"public/training_artifacts/{model_name}.onnx")

Now we have a model converted to ONNX format, need to generate the training artifacts that we will use later:

In [None]:
import onnx
from onnxruntime.training import artifacts

# get the onnx model
onnx_model = onnx.load(f"..public/training_artifacts/{model_name}.onnx")

# distinguish which weights we want to train and which we want to leave (in demo case, only last layer)
requires_grad = ["fc.weight", "fc.bias"]
frozen_params = [
    param.name
    for param in onnx_model.graph.initializer
    if param.name not in requires_grad
]

# generate the training artifacts
artifacts.generate_artifacts(
    onnx_model,
    requires_grad=requires_grad,
    frozen_params=frozen_params,
    loss=artifacts.LossType.CrossEntropyLoss,
    optimizer=artifacts.OptimType.AdamW,
    artifact_directory="..public/training_artifacts"
)

## Part 2: Dataset Download and Install

For the purposes of this demo, we install the bloodMNIST dataset and test using that.

In [None]:
!python convert_dataset.py --data_flag "bloodmnist" --output_dir "public/data"