In [1]:
import os
os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # aligns the GPU ids with what is displayed in nvidia-smi/nvtop
os.environ['CUDA_VISIBLE_DEVICES'] = '3' # restrict the available GPUs

import torch

from models import TUMViTG, TUMViTG_modified

torch.set_float32_matmul_precision("medium") # make use of Tensor Cores
torch._dynamo.config.suppress_errors = True # makes torch.compile work, regardless of xFormers

In [None]:
# use the prepared /mnt/data/nfs03-R6/TUMViTG.pth file

model = TUMViTG("/mnt/data/nfs03-R6/TUMViTG.pth", output_mode="class+mean").cuda()
model = model.eval()

In [3]:
# xFormers makes torch.compile throw errors
# So either install xFormers and don't use torch.compile
# or use torch.compile but don't install xFormers

# this 'torch._dynamo.config.suppress_errors = True' makes torch.compile work, regardless of xFormers
# questionable if this gives the performance benefits of both

# model = torch.compile(model, fullgraph=True, mode="reduce-overhead")

In [None]:
input = torch.randn(4, 3, 224, 224, device="cuda")

with torch.no_grad():
    output = model(input)

print(output.shape)

In [None]:
modified_model = TUMViTG_modified("/mnt/data/nfs03-R6/TUMViTG.pth", output_mode="class+mean", img_size=(448, 896)).cuda()
modified_model = modified_model.eval()

In [None]:
modified_input = torch.randn(4, 3, 448, 896, device="cuda")

with torch.no_grad():
    modified_output = modified_model(modified_input)

print(modified_output.shape)