In [1]:
import os
import shutil
import sys
sys.path.extend(['../../IndoorPathlossRadioMapPrediction/'])

import numpy as np
import pandas as pd
from tqdm import tqdm
import torch
import imageio.v3 as iio
from matplotlib import pyplot as plt
from skimage.transform import resize
from skimage.io import imread
from PIL import Image

from utils import pad_to_square
from algorithm import ICASSP
from networks.vit_pp_upernet import ViTPlusPlusUPerNet

%load_ext autoreload
%autoreload 2

In [2]:
network = ViTPlusPlusUPerNet(
    **{
        "image_size": 518, 
        "min_mlp_tokens": 0, 
        "mixer_out": None, 
        "mlp_input_dim": 19, 
        "neck_input_dim": 256, 
        "neck_scales": [14, 14, 14, 8, 8, 8, 4, 4, 4, 2, 2, 2, 1, 1], 
        "neck_size": [16, 16, 16, 32, 32, 32, 64, 64, 64, 128, 128, 128, 256, 256], 
        "num_channels": 3, 
        "num_classes": 1, 
        "pre_out_channels": None, 
        "pretrained": "facebook/dinov2-base", 
        "res_hidden_states": None, 
        "up_pool_scales": [1, 2, 4, 7], 
        "use_upernet": True, 
        "v_hidden_size": 768, 
        "v_num_attention_heads": 12, 
        "v_num_channels": 3, 
        "v_num_hidden_layers": 12, 
        "v_patch_size": 14
    }
)
alg_conf = {
    "fixed_scale": False, 
    "out_norm": 160,
    "network": network
}
alg = ICASSP.load_from_checkpoint(
    "./task1.ckpt",
    **alg_conf
)
alg.network.eval()

ViTPlusPlusUPerNet(
  (conv): Conv2d(3, 3, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (vit_pp): ViTPlusPlus(
    (vit): Dinov2Model(
      (embeddings): Dinov2Embeddings(
        (patch_embeddings): Dinov2PatchEmbeddings(
          (projection): Conv2d(3, 768, kernel_size=(14, 14), stride=(14, 14))
        )
        (dropout): Dropout(p=0.0, inplace=False)
      )
      (encoder): Dinov2Encoder(
        (layer): ModuleList(
          (0-11): 12 x Dinov2Layer(
            (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True)
            (attention): Dinov2Attention(
              (attention): Dinov2SelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.0, inplace=False)
              )
              (output): Dinov2SelfOutput(
                

In [3]:
def get_input(img, size=518):
    orig_size = img.shape[:2]
    img = img / np.array([25, 20, 200])
    img = pad_to_square(img)
    img = resize(img, (size, size))
    img = torch.from_numpy(img.astype(np.float32)).permute((2, 0, 1)).unsqueeze(0)
    return img

def get_pred(pred_image, input_img):
    mask = np.ones_like(input_img[..., 0])
    mask = pad_to_square(mask, fill_value=0).astype(bool)
    pred_image = resize(pred_image.squeeze(0).squeeze(0).detach().cpu().numpy(), mask.shape)
    pred_image = pred_image[mask].reshape(input_img[..., 0].shape)
    pred_image = torch.sigmoid(torch.from_numpy(pred_image)).detach().cpu().numpy()
    return pred_image * 160

In [5]:
Buildings = range(1, 7)
ant_ids = [1]
freq = [1]

solution = pd.DataFrame()

for Antenna_ID in (ant_ids):  
    for f_i in  freq:
        for b in (Buildings):
            for sp in tqdm(range(0, 50), total=50):
                image_name = "B" + str(b) +  "_Ant"+  str(Antenna_ID) + "_f"  + str(f_i) + "_S" + str(sp)
                input_image = iio.imread("../ICASSP_TEST_DATA//Inputs/Task_1/" + image_name + '.png')
                your_input_tensor = get_input(input_image)
                out = alg.network(your_input_tensor.to("cuda:0"))
                y_PL = get_pred(out, input_image) # Note that y_PL should have the same dimensions, W x H,  as the input image
                y = Image.fromarray(y_PL).convert("RGB")
                y.save(f"../preds/task1/{image_name}.png")

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:06<00:00,  8.04it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:07<00:00,  6.33it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:07<00:00,  6.25it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:06<00:00,  7.49it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [00:06<00: