In [1]:
import yaml
import pathlib
import time

import torch
import torch.nn

import mnn.vision.models.vision_transformer.encoder.config as mnn_encoder_config
import mnn.vision.config as mnn_config
from mnn.vision.models.vision_transformer.e2e import (
    MyVisionTransformer
)
from mnn.vision.models.vision_transformer.tasks.object_detection import ObjectDetectionOrdinalHead 

## UTILITIES

In [2]:
def inference_test(image: torch.Tensor, model: torch.nn.Module):
    t0 = time.time()
    output = model(image)
    t1 = time.time()
    print("Time taken:", t1 - t0, "seconds")
    print("Model's output shape:", output.shape)
    traced_model = torch.jit.trace(model.forward, image, check_trace=True, strict=True)
    return traced_model

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

def read_yaml_file(file_path: pathlib.Path) -> dict:
    with file_path.open(mode="r") as f:
        # Python 3.11 need Loader
        return yaml.load(f, Loader=yaml.FullLoader)

## CHOOSE EXPERIMENT

In [3]:
experiment_number = int(input("Choose experiment Number and press Enter:"))
experiment_name = f"experiment{experiment_number}"

## INITIALIZATION

In [6]:
""" CONFIGURATION """
n = 1

model_config_as_dict = read_yaml_file(pathlib.Path(experiment_name) / "model.yaml")
model_config = mnn_encoder_config.MyVisionTransformerConfiguration.from_dict(
    model_config_as_dict["MyVisionTransformer"]
)
encoder_config = model_config.encoder_config

hyperparameters_config_as_dict = read_yaml_file(pathlib.Path(experiment_name) / "hyperparameters.yaml")
hyperparameters_config = mnn_config.HyperparametersConfiguration.from_dict(hyperparameters_config_as_dict)
""" MODEL """

image_size = hyperparameters_config.image_size
# image to pytorch tensor
sequence_length = image_size.height
embedding_size = image_size.width
hidden_dim = embedding_size
image_RGB = torch.rand(n, 3, sequence_length, image_size.width) * 255

In [None]:
d_type = torch.float16
my_transformer = MyVisionTransformer(encoder_config, image_size, n_high_level_layers=1, is_input_normalized=True, dtype=d_type)
head_config = mnn_encoder_config.VisionTransformerEncoderConfiguration.from_dict(
    model_config_as_dict["MyVisionTransformer"]["VisionTransformerHead"]
)
image_RGB = image_RGB.to(dtype=d_type)
my_transformer.set_batch_size(n)


### Visualize the model

In [None]:
import mnn.visualize
output = my_transformer(image_RGB)

In [None]:
mnn.visualize.make_dot(output, params=dict(my_transformer.named_parameters())).render("my_transformer", format="png")

In [None]:
traced_model = inference_test(image_RGB, my_transformer)
print("Number of parameters:", count_parameters(my_transformer))