## Update existing WandB run manually

In [None]:
import wandb

api = wandb.Api()

# Find the run path at wandb.ai -> Project -> Run -> Overview -> Run path
run = api.run("dritter/STL-topex/jzymzseu")
run.summary["kid_mean"] = 0.24695
run.summary["kid_std"] = 0.07817
run.update()

## RGBD Dataset Preprocessing
Merge RGB and Depth data to RGBD images using the alpha channel and save it on disk

In [1]:
import os

import numpy as np
from PIL import Image
from torchvision.datasets import ImageFolder

from datamodules.datasets import ImageFolderWithPaths

rgb_dataset = ImageFolderWithPaths("../data/topex-printer-32-depth/train/rgb")
depth_dataset = ImageFolder("../data/topex-printer-32-depth/train/depth_zoe")

for i, (rgb, depth) in enumerate(zip(rgb_dataset, depth_dataset)):
    path = rgb[2]
    rgb = np.asarray(rgb[0])
    depth = np.asarray(depth[0])[:, :, 0]
    sample = np.dstack((rgb, depth))
    sample = Image.fromarray(sample, mode="RGBA")
    out_root = "/".join(path.split("/")[:-3])
    cls = path.split("/")[-2]
    entity = path.split("/")[-1]
    out = f"{out_root}/rgbd-zoedepth/{cls}/{entity}"
    os.makedirs(f"{out_root}/rgbd-zoedepth/{cls}", exist_ok=True)
    sample.save(out)

# depth = np.array(depth_dataset[123][0])[:, :, 0]
# sample = np.array(rgb_dataset[123][0])
# sample = np.dstack((sample, depth))
# # TODO
# print(sample.shape)
# sample = Image.fromarray(sample, mode="RGBA")
# print(sample)
# display(sample)

In [None]:
import os

import numpy as np
from PIL import Image
from torchvision.datasets import ImageFolder

from src.datamodules.datasets import ImageFolderWithPaths

rgb_dataset = ImageFolderWithPaths("data/topex-printer-32-depth/test/rgb")
depth_dataset = ImageFolder("data/topex-printer-32-depth/test/depth_zoedepth")

for i, (rgb, depth) in enumerate(zip(rgb_dataset, depth_dataset)):
    path = rgb[2]
    rgb = np.asarray(rgb[0])
    depth = np.asarray(depth[0])[:, :, 0]
    sample = np.dstack((rgb, depth))
    sample = Image.fromarray(sample, mode="RGBA")
    out_root = "/".join(path.split("/")[:-3])
    cls = path.split("/")[-2]
    entity = path.split("/")[-1]
    out = f"{out_root}/rgbd-normalized/{cls}/{entity}"
    # display(Image.fromarray(rgb))
    # display(Image.fromarray(depth))
    # display(sample)
    os.makedirs(f"{out_root}/rgbd-normalized/{cls}", exist_ok=True)
    sample.save(out)

# depth = np.array(depth_dataset[123][0])[:, :, 0]
# sample = np.array(rgb_dataset[123][0])
# sample = np.dstack((sample, depth))
# # TODO
# print(sample.shape)
# sample = Image.fromarray(sample, mode="RGBA")
# print(sample)
# display(sample)

Load rgbd data dataset

In [None]:
from PIL import Image
from torchvision.transforms import ToPILImage

from src.datamodules.finetune_rgbd_dm import FinetuneRGBD

rgbd_dm = FinetuneRGBD(
    train_dir="data/topex-printer-32-depth/train/rgbd-normalized",
    test_dir="data/topex-printer-32-depth/test/rgbd-normalized",
)
rgbd_dm.setup()
td = rgbd_dm.train_dataloader()
vd = rgbd_dm.val_dataloader()
testd = rgbd_dm.test_dataloader()

to_rgb = ToPILImage(mode="RGB")
to_rgba = ToPILImage(mode="RGBA")
to_g = ToPILImage(mode="L")
for batch in td:
    x, y, paths = batch
    sample_rgb = to_rgb(x[0, :3])
    sample_g = to_g(x[0, 3])
    sample_rgba = to_rgba(x[0])
    # display(sample_rgb)
    # display(sample_g)
    # display(sample_rgba)
    break

RGBD datamodule transforms

In [None]:
import pytorch_lightning as pl
import torch
import torchvision.transforms as T
from PIL import Image
from transformers import AutoFeatureExtractor, AutoModelForImageClassification

from datamodules.adaptation_rgbd_dm import AdaptationRGBD
from models.components.swinv2_twin_rgbd import SwinV2TwinRGBD
from models.swinv2_rgbd_module import SwinV2RGBDModule

torgb = T.ToPILImage(mode="RGB")
togs = T.ToPILImage(mode="L")

rgbd_dm = AdaptationRGBD(
    train_src_dirs=["../data/topex-printer-32-depth/train/rgbd-normalized"],
    train_target_dirs=["../data/topex-printer-32-depth/test/rgbd-zoedepth"],
    val_dirs=["../data/topex-printer-32-depth/test/rgbd-zoedepth"],
    test_dirs=["../data/topex-printer-32-depth/test/rgbd-zoedepth"],
    batch_size=2,
    augmix=True,
)
rgbd_dm.setup()
test_dl = rgbd_dm.test_dataloader()
train_dl = rgbd_dm.train_dataloader()

for batch in train_dl:
    src_batch, target_batch = batch
    src_sample = src_batch[0]
    target_sample = target_batch[0]
    src_rgb = torgb(src_sample[0, :3])
    src_depth = togs(src_sample[0, 3])
    break

# display(src_rgb)
# display(src_depth)

## Test RGBD lightningmodule functions

In [None]:
import pytorch_lightning as pl
import torch
from transformers import AutoFeatureExtractor, AutoModelForImageClassification

from src.datamodules.finetune_rgbd_dm import FinetuneRGBD
from src.models.components.swinv2_twin_rgbd import SwinV2TwinRGBD
from src.models.swinv2_rgbd_module import SwinV2RGBDModule

rgbd_dm = FinetuneRGBD(
    train_dir="data/topex-printer-32-depth/train/rgbd-normalized",
    test_dir="data/topex-printer-32-depth/test/rgbd-zoedepth",
)
rgbd_dm.setup()

# model = VitRGBD(model_name="microsoft/swinv2-base-patch4-window12-192-22k", num_classes=102, optimizer=torch.optim.SGD)
optimizer = torch.optim.SGD
# optimizer = torch.optim.SGD(lr=0.01, momentum=0.9, weight_decay=0.0)
net = SwinV2RGBDModule(model_name="swinv2_twin_rgbd", num_classes=rgbd_dm.num_classes, optimizer=optimizer)
trainer = pl.Trainer(fast_dev_run=True, devices=1)

trainer.fit(net, rgbd_dm)

In [None]:
import data_modules
import torch
from data_modules.baseline_finetune_dm import BaselineFinetuneDM
from data_modules.generic_finetune_dm import GenericFinetuneDM

dm = GenericFinetuneDM(train_dir="data/topex-printer/train/", test_dir="data/topex-printer/test/")
# gftdm = GenericFinetuneDM()
dm.prepare_data()
dm.setup()
train_dataloader = dm.train_dataloader()
val_dataloader = dm.val_dataloader()
test_dataloader = dm.test_dataloader()

inputs, labels = next(iter(train_dataloader))
print(dm.num_classes)
print(len(dm.test.classes))
print(dm.test.classes == dm.num_classes)

In [None]:
import os
import shutil

import pandas as pd

data = "/home/dennis/projects/evaluation-pipeline/data/datasets/visda2017_meshes/train"
out = "/home/dennis/projects/evaluation-pipeline/data/datasets/visda2017_meshgrid"

d = {}
for path, dns, fns in os.walk(data):
    for fn in fns:
        fnsplit = fn.split("__")
        mesh = fnsplit[0]
        angles = fnsplit[1].split("_")
        light_angle = angles[1]
        if f"{mesh}_{light_angle}" in d.keys():
            d[f"{mesh}_{light_angle}"].append(fn)
        else:
            d[f"{mesh}_{light_angle}"] = [fn]

In [None]:
import os
import random

import torchvision
from PIL import Image
from torchvision import transforms as transforms
from torchvision.io import read_image
from torchvision.utils import make_grid


def add_margin(pil_img, top, right, bottom, left, color):
    width, height = pil_img.size
    new_width = width + right + left
    new_height = height + top + bottom
    result = Image.new(pil_img.mode, (new_width, new_height), color)
    result.paste(pil_img, (left, top))
    return result


img_dir = "../data/modelnet10_toy/train/chair/chair_0001"

grid_size = (4, 4)
padding = 0
margin = 16
n_img = grid_size[0] * grid_size[1]
renders = os.listdir(img_dir)
# rnd_model = random.sample(renders, k=1)[0]
# rnd_model = rnd_model.split("_")[3]
# renders = [render for render in renders if render.split("_")[3] == rnd_model]
rnd_imgs = random.sample(renders, k=n_img)
imgs = [read_image(f"{img_dir}/{img_fn}") for img_fn in rnd_imgs]
grid = make_grid(imgs, nrow=grid_size[1], padding=padding)
img_grid = torchvision.transforms.ToPILImage()(grid)
img_grid = img_grid.resize((224, 224))
img_grid = add_margin(img_grid, margin, margin, margin, margin, (0, 0, 0))
img_grid

In [None]:
import os

import numpy as np
import torch
import torchvision.transforms as transforms
from PIL import Image

# Define the mean and standard deviation values from the ImageNet dataset
mean = [0.5, 0.5, 0.5]
std = [0.5, 0.5, 0.5]
# mean = [0.485, 0.456, 0.406]
# std = [0.229, 0.224, 0.225]

# Define a transform pipeline that applies any necessary image transformations and normalization
transform = transforms.Compose(
    [
        transforms.Resize((256, 256)),
        transforms.RandomCrop((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=mean, std=std),
    ]
)

# Load an image from file (in this case, a JPEG file)
dir = "../data/visda2017/train"
for path, dir, fn in os.walk(dir):
    for f in fn:
        image = Image.open(f"{path}/{f}")
        print(f"minmax original: {np.array(image).min()}, {np.array(image).max()}")
        # Apply the transform pipeline to the image
        transformed_image = transform(image)
        # Print the minimum and maximum pixel values in the transformed image tensor
        print(f"minmax transformed: {transformed_image.min()}, {transformed_image.max()}")

### DataModule testing

In [None]:
import torch
from torchvision import transforms

from src.datamodules.generic_finetune_dm import GenericFinetuneDM

dm = GenericFinetuneDM(
    train_dir="data/visda2017/train/",
    test_dir="data/visda2017/test/",
    augmix=True,
    random_color_jitter=False,
    toy=False,
)
# gftdm = GenericFinetuneDM()
dm.prepare_data()
dm.setup()
train_dataloader = dm.train_dataloader()

# print(train_dataloader.dataset.transform)

inputs, labels = next(iter(train_dataloader))
topil = transforms.ToPILImage()
imgs = [topil(img) for img in inputs]
imgs[1]

### generic_adaptation_dm Datamodule


In [None]:
import torch
from IPython.display import display
from torchvision import transforms

from src.datamodules.generic_adaptation_dm import GenericAdaptationDM

dm = GenericAdaptationDM(
    train_src_dirs=["data/visda2017/train/"],
    train_target_dirs=["data/visda2017/test/"],
    val_dirs=["data/visda2017/val/"],
    test_dirs=["data/visda2017/test/"],
    augmix=True,
    random_color_jitter=False,
    toy=True,
    num_workers=1,
)
dm.prepare_data()
dm.setup()
train_dataloader = dm.train_dataloader()

src, target = next(iter(train_dataloader))
src_img, src_label = src
target_img, target_label = target
src_img = [transforms.ToPILImage()(img) for img in src_img]
target_img = [transforms.ToPILImage()(img) for img in target_img]
display(src_img[0])
print("train_src:", dm.idx2label[src_label[0].item()])
display(target_img[0])
print("train_target:", dm.idx2label[target_label[0].item()])

val_dataloader = dm.val_dataloader()
val_img, val_label = next(iter(val_dataloader))
val_img = [transforms.ToPILImage()(img) for img in val_img]
display(val_img[0])
print("val:", dm.idx2label[val_label[0].item()])

test_dataloader = dm.test_dataloader()
test_img, test_label = next(iter(test_dataloader))
test_img = [transforms.ToPILImage()(img) for img in test_img]
display(test_img[0])
print("test:", dm.idx2label[test_label[0].item()])

## Load .ckpt file and extract classifier weights only

In [None]:
from collections import OrderedDict

import torch
from transformers import AutoFeatureExtractor, AutoModelForImageClassification

ckpt = torch.load(
    "out/synthnet-transfer-learning-outputs/train/multiruns/STL-visda2017/vitb16_ft_adamw1e-5_warmupcalr_augmix/2023-04-17_14-42-16/0/checkpoints/epoch_007.ckpt"
)
vit_state = ckpt["state_dict"]
od = OrderedDict()
for k, v in vit_state.items():
    if k[:4] == "net.":
        od[k[4:]] = vit_state[k]
for k, v in od.items():
    print(f"{k}: {v}")
# print(vit_state)
net = AutoModelForImageClassification.from_pretrained(
    "google/vit-base-patch16-224-in21k",
    num_labels=12,
    ignore_mismatched_sizes=True,
    output_hidden_states=True,
    output_attentions=True,
)
net.load_state_dict(od)