# Data Augmentation

In [None]:
pip install -U albumentations

In [None]:
import albumentations as A

In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset

class ImageSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.filenames = [os.path.splitext(f)[0] for f in os.listdir(images_dir) if not f.startswith('.')]

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.filenames[idx] + '.jpg')
        mask_path = os.path.join(self.masks_dir, self.filenames[idx] + '.png') 
        image = Image.open(img_path).convert("RGB")
        mask = Image.open(mask_path).convert("L") 
        batch={"pixel_values": image, 
               "labels": mask
              }
        if self.transform:
            batch = self.transform(batch)
            for k,v in batch.items():
                batch[k].squeeze_() # remove batch dimension
                v[v==255]=1
                
        return batch

In [None]:
images_dir = "/home/erik/Documents/Finetune-Mask2Former/data/rs19/images/train"
masks_dir = "/home/erik/Documents/Finetune-Mask2Former/data/rs19/labels/train"
train_ds = ImageSegmentationDataset(images_dir, masks_dir)

In [None]:
image=train_ds[5]['pixel_values']
label=train_ds[5]['labels']

In [None]:
import matplotlib.pyplot as plt

# Display the original and transformed images
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('Image')
plt.imshow(image)

plt.subplot(1, 2, 2)
plt.title('Label')
plt.imshow(label)

plt.show()

In [None]:
import numpy as np

np_image = np.array(image)
np_label = np.array(label)

In [None]:
# Define the transformation
HFlip = A.Compose([
    A.HorizontalFlip(p=1),  
])
# Define the transformation
VFlip = A.Compose([
    A.VerticalFlip(p=1),  
])
CJ = A.Compose([
    A.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.7, hue=0.2, p=1),
])

## Horizontal Flip

In [None]:
import matplotlib.pyplot as plt

# Apply the transformation
transformed = HFlip(image=np_image)
transformed_image = transformed["image"]

# Display the original and transformed images
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('Original Image')
plt.imshow(np_image)

plt.subplot(1, 2, 2)
plt.title('Transformed Image')
plt.imshow(transformed_image)

plt.show()

## Vertical Flip

In [None]:
import matplotlib.pyplot as plt

# Apply the transformation
transformed = VFlip(image=np_image)
transformed_image = transformed["image"]

# Display the original and transformed images
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('Original Image')
plt.imshow(np_image)

plt.subplot(1, 2, 2)
plt.title('Transformed Image')
plt.imshow(transformed_image)

plt.show()

## ColorJitter 

In [None]:
import matplotlib.pyplot as plt

# Apply the transformation
transformed = CJ(image=np_image)
transformed_image = transformed["image"]

# Display the original and transformed images
plt.figure(figsize=(10, 5))

plt.subplot(1, 2, 1)
plt.title('Original Image')
plt.imshow(np_image)

plt.subplot(1, 2, 2)
plt.title('Transformed Image')
plt.imshow(transformed_image)

plt.show()

# Load Dataset

### Create dataset dict with the images and labels

### The Dataset dict is created by passing in the image directory as well as the labels directory. Images are in jpg format while the labels are in png format. The dictionary entries can be accessed by indexing e.g. dataset[idx]. The images and labels are converted to arrays. The label's pixel_values are also converted from 255 to 1 in order to match the output from argmax from the model's prediction later. For each dictionary entries, there are two elements.
### To access image, dataset[idx][0]. 
### To access label, dataset[idx][1].

In [None]:
import os
from PIL import Image
from torch.utils.data import Dataset
import numpy as np
import pytorch_lightning as pl
from torch.utils.data import DataLoader 
import torch

class ImageSegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform
        self.filenames = [os.path.splitext(f)[0] for f in os.listdir(images_dir) if not f.startswith('.')]

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.filenames[idx] + '.jpg')
        mask_path = os.path.join(self.masks_dir, self.filenames[idx] + '.png') 
        image = Image.open(img_path).convert("RGB")
        np_image=np.array(image)
        # convert to C, H, W
        np_image = np_image.transpose(2,0,1)
        mask = Image.open(mask_path) 
        np_mask=np.array(mask)
        np_mask[np_mask==255]=1
                
        return np_image, np_mask

# Defining Data Module from pytorch lightning

### Defining Data Module as Mask2Former takes in class_labels and mask_labels as a list of tensors and pytorch lightning is unable to load it directly as it expects tensor instead of a list from dataloader.
### collate_fn is used to group images and labels together for each batch instead of each batch containing an image and label.
### While loading the dataloaders, Mask2FormerImageProcessor  is used to process the images and labels for each batch. Mask2FormerImageProcessor normalizes the input using ImageNet mean = (0.485, 0.456, 0.406) and std = (0.229, 0.224, 0.225) and then convert it to tensor. 
### The output size from the Mask2FormerImageProcessor is a batch of images with size (2, 3, 640, 640), a batch of pixel_masks of size (2, 640, 640), which is used to signify the images is not masked, a mask_labels of size (2,640,640) and a class_labels of size (2). The label is converted to lists of binary masks and their respective labels which are mask_labels and class_labels respectively.

In [None]:
def collate_fn(batch):
    inputs = list(zip(*batch))
    images=inputs[0]
    segmentation_maps=inputs[1]
    batch = processor(
        images,
        segmentation_maps=segmentation_maps,
        size=(640,640),
        return_tensors="pt",
    )
    batch["original_images"] = images
    batch["original_segmentation_maps"] = segmentation_maps

    return batch

In [None]:
class SegmentationDataModule(pl.LightningDataModule):
    def __init__(self, dataset_dir, batch_size, num_workers, processor=None):
        super().__init__()
        self.dataset_dir = dataset_dir
        self.batch_size = batch_size
        self.num_workers = num_workers
        self.processor = processor
    
    def setup(self, stage=None):
        if stage == 'fit' or stage is None:
            self.train_dataset = ImageSegmentationDataset(images_dir=os.path.join(self.dataset_dir, 'images', 'train'),
                                                          masks_dir=os.path.join(self.dataset_dir, 'labels', 'train'),
                                                          transform=None) # Add your transforms here
            self.val_dataset = ImageSegmentationDataset(images_dir=os.path.join(self.dataset_dir, 'images', 'val'),
                                                        masks_dir=os.path.join(self.dataset_dir, 'labels', 'val'),
                                                        transform=None) # Add your transforms here
        if stage == 'test' or stage is None:
            self.test_dataset = ImageSegmentationDataset(images_dir=os.path.join(self.dataset_dir, 'images', 'test'),
                                                         masks_dir=os.path.join(self.dataset_dir, 'labels', 'test'),
                                                         transform=None) # Add your transforms here
    
    def train_dataloader(self):
        return DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True, num_workers=self.num_workers, collate_fn=collate_fn)
    
    def val_dataloader(self):
        return DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, collate_fn=collate_fn)
    
    def test_dataloader(self):
        return DataLoader(self.test_dataset, batch_size=self.batch_size, shuffle=False, num_workers=self.num_workers, collate_fn=collate_fn)

### Initialise Mask2Former processor

In [None]:
from transformers import AutoImageProcessor

processor = AutoImageProcessor.from_pretrained("facebook/mask2former-swin-small-ade-semantic")

In [5]:
batch_size=1
num_workers=4

data_module = SegmentationDataModule(dataset_dir='/home/erik/Documents/Finetune-Mask2Former/data/rs19', batch_size=batch_size, num_workers=num_workers, processor=processor)

### Check dataloader has the proper config for Mask2Former model

In [6]:
data_module.setup(stage='fit')
train_dataloader = data_module.train_dataloader()

In [7]:
batch = next(iter(train_dataloader))

for k,v in batch.items():
  if isinstance(v, torch.Tensor):
    print(k,v.shape)
  else:
    print(k,v[0].shape)

pixel_values torch.Size([1, 3, 640, 640])
pixel_mask torch.Size([1, 640, 640])
mask_labels torch.Size([13, 640, 640])
class_labels torch.Size([13])
original_images (3, 1080, 1920)
original_segmentation_maps (1080, 1920)


### Creating id for the class

In [8]:
newid2label = {
    0: "Background",
    1: "road",
    2: "sidewalk",
    3: "construction",
    4: "tram-track",
    5: "fence",
    6: "pole",
    7: "traffic-light",
    8: "traffic-sign",
    9: "vegetation",
    10: "terrain",
    11: "sky",
    12: "human",
    13: "rail-track",
    14: "car",
    15: "truck",
    16: "trackbed",
    17: "on-rails",
    18: "rail-raised",
    19: "rail-embedded"
}
newid2label

{0: 'Background',
 1: 'road',
 2: 'sidewalk',
 3: 'construction',
 4: 'tram-track',
 5: 'fence',
 6: 'pole',
 7: 'traffic-light',
 8: 'traffic-sign',
 9: 'vegetation',
 10: 'terrain',
 11: 'sky',
 12: 'human',
 13: 'rail-track',
 14: 'car',
 15: 'truck',
 16: 'trackbed',
 17: 'on-rails',
 18: 'rail-raised',
 19: 'rail-embedded'}

In [9]:
newlabel2id = {v: k for k, v in newid2label.items()}
newlabel2id

{'Background': 0,
 'road': 1,
 'sidewalk': 2,
 'construction': 3,
 'tram-track': 4,
 'fence': 5,
 'pole': 6,
 'traffic-light': 7,
 'traffic-sign': 8,
 'vegetation': 9,
 'terrain': 10,
 'sky': 11,
 'human': 12,
 'rail-track': 13,
 'car': 14,
 'truck': 15,
 'trackbed': 16,
 'on-rails': 17,
 'rail-raised': 18,
 'rail-embedded': 19}

In [10]:
from transformers import Mask2FormerForUniversalSegmentation
import pytorch_lightning as pl
from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
import torchmetrics
from torchmetrics import Metric
#from datasets import load_metric
from torch import nn
import numpy as np
import random
import torch
torch.set_float32_matmul_precision("medium")
import evaluate

## Defining Pytorch Lightning Module

In [11]:
class Mask2FormerFinetuner(pl.LightningModule):

    def __init__(self, id2label):
        super(Mask2FormerFinetuner, self).__init__()
        self.id2label = id2label
        self.num_classes = len(id2label.keys())
        self.label2id = {v:k for k,v in self.id2label.items()}
        self.model = Mask2FormerForUniversalSegmentation.from_pretrained(
            "facebook/mask2former-swin-small-ade-semantic",
            id2label=self.id2label,
            label2id=self.label2id,
            ignore_mismatched_sizes=True,
        )
        evaluate.load
        self.train_mean_iou = evaluate.load("mean_iou")
        self.val_mean_iou = evaluate.load("mean_iou")
        self.test_mean_iou = evaluate.load("mean_iou")
        
    def forward(self, pixel_values, mask_labels=None, class_labels=None):
        # Your model's forward method
        return self.model(pixel_values=pixel_values, mask_labels=mask_labels, class_labels=class_labels)
        
    def transfer_batch_to_device(self, batch, device, dataloader_idx=0):
        batch['pixel_values'] = batch['pixel_values'].to(device)
        batch['mask_labels'] = [label.to(device) for label in batch['mask_labels']]
        batch['class_labels'] = [label.to(device) for label in batch['class_labels']]
        return batch

    def training_step(self, batch, batch_idx):
        outputs = self(
            pixel_values=batch["pixel_values"],
            mask_labels=batch["mask_labels"],
            class_labels=batch["class_labels"],
        )
        loss = outputs.loss
        self.log("loss", loss)
        return loss

    def validation_step(self, batch, batch_idx):
        outputs = self(
            pixel_values=batch["pixel_values"],
            mask_labels=[labels for labels in batch["mask_labels"]],
            class_labels=[labels for labels in batch["class_labels"]],
        )
        loss = outputs.loss
        self.log("loss", loss)
        return loss
    def test_step(self, batch, batch_idx):
        outputs = self(
            pixel_values=batch["pixel_values"],
            mask_labels=[labels for labels in batch["mask_labels"]],
            class_labels=[labels for labels in batch["class_labels"]],
        )
        loss = outputs.loss
        original_images = batch["original_images"]
        ground_truth = batch["original_segmentation_maps"]
        target_sizes = [(image.shape[1], image.shape[2]) for image in original_images]
        # predict segmentation maps
        predicted_segmentation_maps = processor.post_process_semantic_segmentation(outputs,target_sizes=target_sizes)
        # Optionally log loss here
        metrics = self.train_mean_iou._compute(
            predictions=predicted_segmentation_maps[0].cpu().numpy(),
            references=ground_truth[0],
            num_labels=self.num_classes,
            ignore_index=254,
            reduce_labels=False,
        )
        # Extract per category metrics and convert to list if necessary (pop before defining the metrics dictionary)
        per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
        per_category_iou = metrics.pop("per_category_iou").tolist()
    
        # Re-define metrics dict to include per-category metrics directly
        metrics = {
            'loss': loss, 
            "mean_iou": metrics["mean_iou"], 
            "mean_accuracy": metrics["mean_accuracy"],
            **{f"accuracy_{self.id2label[i]}": v for i, v in enumerate(per_category_accuracy)},
            **{f"iou_{self.id2label[i]}": v for i, v in enumerate(per_category_iou)}
        }
        for k,v in metrics.items():
            self.log(k,v,sync_dist=True,batch_size=batch_size)
        return(metrics)
        
    def configure_optimizers(self):
        return torch.optim.Adam([p for p in self.parameters() if p.requires_grad], lr=0.0001)

# Initialising Model

### Load previously trained model

In [None]:
path="/home/erik/Documents/Finetune-Mask2Former/outputs/lightning_logs_csv/version_4/checkpoints/epoch=6-step=6699.ckpt"
model = Mask2FormerFinetuner.load_from_checkpoint(path,id2label=newid2label,ignore_mismatched_sizes=True)

Some weights of Mask2FormerForUniversalSegmentation were not initialized from the model checkpoint at facebook/mask2former-swin-small-ade-semantic and are newly initialized because the shapes did not match:
- class_predictor.bias: found shape torch.Size([151]) in the checkpoint and torch.Size([21]) in the model instantiated
- class_predictor.weight: found shape torch.Size([151, 256]) in the checkpoint and torch.Size([21, 256]) in the model instantiated
- criterion.empty_weight: found shape torch.Size([151]) in the checkpoint and torch.Size([21]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


RuntimeError: Error(s) in loading state_dict for Mask2FormerFinetuner:
	size mismatch for model.model.pixel_level_module.encoder.embeddings.patch_embeddings.projection.weight: copying a param with shape torch.Size([128, 3, 4, 4]) from checkpoint, the shape in current model is torch.Size([96, 3, 4, 4]).
	size mismatch for model.model.pixel_level_module.encoder.embeddings.patch_embeddings.projection.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.embeddings.norm.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.embeddings.norm.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.layernorm_before.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.layernorm_before.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 4]) from checkpoint, the shape in current model is torch.Size([169, 3]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.query.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.query.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.key.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.key.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.value.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.self.value.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.output.dense.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.attention.output.dense.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.layernorm_after.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.layernorm_after.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.intermediate.dense.weight: copying a param with shape torch.Size([512, 128]) from checkpoint, the shape in current model is torch.Size([384, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.intermediate.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.output.dense.weight: copying a param with shape torch.Size([128, 512]) from checkpoint, the shape in current model is torch.Size([96, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.0.output.dense.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.layernorm_before.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.layernorm_before.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 4]) from checkpoint, the shape in current model is torch.Size([169, 3]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.query.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.query.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.key.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.key.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.value.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.self.value.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.output.dense.weight: copying a param with shape torch.Size([128, 128]) from checkpoint, the shape in current model is torch.Size([96, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.attention.output.dense.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.layernorm_after.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.layernorm_after.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.intermediate.dense.weight: copying a param with shape torch.Size([512, 128]) from checkpoint, the shape in current model is torch.Size([384, 96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.intermediate.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.output.dense.weight: copying a param with shape torch.Size([128, 512]) from checkpoint, the shape in current model is torch.Size([96, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.blocks.1.output.dense.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.downsample.reduction.weight: copying a param with shape torch.Size([256, 512]) from checkpoint, the shape in current model is torch.Size([192, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.downsample.norm.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.0.downsample.norm.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.layernorm_before.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.layernorm_before.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 8]) from checkpoint, the shape in current model is torch.Size([169, 6]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.query.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.query.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.key.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.key.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.value.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.self.value.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.output.dense.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.attention.output.dense.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.layernorm_after.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.layernorm_after.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.intermediate.dense.weight: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.intermediate.dense.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.output.dense.weight: copying a param with shape torch.Size([256, 1024]) from checkpoint, the shape in current model is torch.Size([192, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.0.output.dense.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.layernorm_before.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.layernorm_before.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 8]) from checkpoint, the shape in current model is torch.Size([169, 6]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.query.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.query.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.key.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.key.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.value.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.self.value.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.output.dense.weight: copying a param with shape torch.Size([256, 256]) from checkpoint, the shape in current model is torch.Size([192, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.attention.output.dense.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.layernorm_after.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.layernorm_after.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.intermediate.dense.weight: copying a param with shape torch.Size([1024, 256]) from checkpoint, the shape in current model is torch.Size([768, 192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.intermediate.dense.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.output.dense.weight: copying a param with shape torch.Size([256, 1024]) from checkpoint, the shape in current model is torch.Size([192, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.blocks.1.output.dense.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.downsample.reduction.weight: copying a param with shape torch.Size([512, 1024]) from checkpoint, the shape in current model is torch.Size([384, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.downsample.norm.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.1.downsample.norm.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.0.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.1.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.2.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.3.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.4.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.5.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.6.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.7.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.8.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.9.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.10.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.11.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.12.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.13.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.14.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.15.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.16.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.layernorm_before.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.layernorm_before.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 16]) from checkpoint, the shape in current model is torch.Size([169, 12]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.query.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.query.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.key.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.key.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.value.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.self.value.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.output.dense.weight: copying a param with shape torch.Size([512, 512]) from checkpoint, the shape in current model is torch.Size([384, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.attention.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.layernorm_after.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.layernorm_after.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.intermediate.dense.weight: copying a param with shape torch.Size([2048, 512]) from checkpoint, the shape in current model is torch.Size([1536, 384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.intermediate.dense.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.output.dense.weight: copying a param with shape torch.Size([512, 2048]) from checkpoint, the shape in current model is torch.Size([384, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.blocks.17.output.dense.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.downsample.reduction.weight: copying a param with shape torch.Size([1024, 2048]) from checkpoint, the shape in current model is torch.Size([768, 1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.downsample.norm.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.2.downsample.norm.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([1536]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.layernorm_before.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.layernorm_before.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 32]) from checkpoint, the shape in current model is torch.Size([169, 24]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.query.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.query.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.key.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.key.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.value.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.self.value.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.output.dense.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.attention.output.dense.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.layernorm_after.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.layernorm_after.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.intermediate.dense.weight: copying a param with shape torch.Size([4096, 1024]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.intermediate.dense.bias: copying a param with shape torch.Size([4096]) from checkpoint, the shape in current model is torch.Size([3072]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.output.dense.weight: copying a param with shape torch.Size([1024, 4096]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.0.output.dense.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.layernorm_before.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.layernorm_before.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.relative_position_bias_table: copying a param with shape torch.Size([529, 32]) from checkpoint, the shape in current model is torch.Size([169, 24]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.relative_position_index: copying a param with shape torch.Size([144, 144]) from checkpoint, the shape in current model is torch.Size([49, 49]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.query.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.query.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.key.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.key.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.value.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.self.value.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.output.dense.weight: copying a param with shape torch.Size([1024, 1024]) from checkpoint, the shape in current model is torch.Size([768, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.attention.output.dense.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.layernorm_after.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.layernorm_after.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.intermediate.dense.weight: copying a param with shape torch.Size([4096, 1024]) from checkpoint, the shape in current model is torch.Size([3072, 768]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.intermediate.dense.bias: copying a param with shape torch.Size([4096]) from checkpoint, the shape in current model is torch.Size([3072]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.output.dense.weight: copying a param with shape torch.Size([1024, 4096]) from checkpoint, the shape in current model is torch.Size([768, 3072]).
	size mismatch for model.model.pixel_level_module.encoder.encoder.layers.3.blocks.1.output.dense.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([96]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([192]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([384]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage4.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.encoder.hidden_states_norms.stage4.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([768]).
	size mismatch for model.model.pixel_level_module.decoder.input_projections.0.0.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 768, 1, 1]).
	size mismatch for model.model.pixel_level_module.decoder.input_projections.1.0.weight: copying a param with shape torch.Size([256, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 384, 1, 1]).
	size mismatch for model.model.pixel_level_module.decoder.input_projections.2.0.weight: copying a param with shape torch.Size([256, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 192, 1, 1]).
	size mismatch for model.model.pixel_level_module.decoder.adapter_1.0.weight: copying a param with shape torch.Size([256, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 96, 1, 1]).

## Training the model

### Training, Val and test is done using python instead

# Test model performance

In [None]:
model.eval()

### Function to get the respective batch by index

In [None]:
device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
model.to(device)

In [None]:
data_module.setup(stage='test')
test_dataloader = data_module.test_dataloader()

In [None]:
len(test_dataloader)

In [None]:
def getBatch(index):
    data_module.setup(stage='test')
    test_dataloader = data_module.test_dataloader()
    test_iterator = iter(test_dataloader)
    for i in range(index):
        batch = next(test_iterator)
    return batch

In [None]:
batch=getBatch(177)

### Function to view mask

### Function to do predictions for a batch

In [None]:
import numpy as np
import matplotlib.pyplot as plt

def viewMask(batch):
    # Plot expects (height, width, channels), hence transpose.
    image=batch["original_images"][0]
    label=batch["original_segmentation_maps"][0]
    image_transpose = np.transpose(image, (1, 2, 0))
    
    f, axs = plt.subplots(1, 2)
    f.set_figheight(30)
    f.set_figwidth(50)
    axs[0].set_title("Image", {'fontsize': 40})
    axs[0].imshow(image_transpose)
    axs[1].set_title("Ground truth", {'fontsize': 40})
    axs[1].imshow(label)

In [None]:
viewMask(batch)

In [None]:
#Batch size=1
def batch_prediction(batch):
    original_images = batch["original_images"]
    target_sizes = [(image.shape[1], image.shape[2]) for image in original_images]
    outputs = model(
          pixel_values=batch["pixel_values"].to(device),
          mask_labels=[labels.to(device) for labels in batch["mask_labels"]],
          class_labels=[labels.to(device) for labels in batch["class_labels"]],
      )
    result = processor.post_process_semantic_segmentation(outputs,target_sizes=target_sizes)[0].cpu().numpy()
    original_segmentation_maps = batch["original_segmentation_maps"][0]
    return result, original_segmentation_maps

In [None]:
pred_semantic_map,label=batch_prediction(batch)

In [None]:
def viewPrediction(prediction,label):
    # Need to convert image and labels to numpy to plot. 
    f, axs = plt.subplots(1, 2)
    f.set_figheight(30)
    f.set_figwidth(50)
    axs[0].set_title("Prediction", {'fontsize': 40})
    axs[0].imshow(prediction)
    axs[1].set_title("Ground truth", {'fontsize': 40})
    axs[1].imshow(label)

In [None]:
viewPrediction(pred_semantic_map,label)

### Define a function to display a prediction overlay in which blue represents correct prediction while red represent in correct prediction

In [None]:
def wrongPredictionOverlay(image,label):
    matches = image == label
    # Create an array highlighting wrong predictions: 0 for correct predictions, 2 for mismatches
    wrong_predictions = np.where(matches, 0, image)
    wrong_predictions[wrong_predictions==1]=2
    array_3d_colored = np.zeros((image.shape[0], image.shape[1], 3), dtype=np.uint8)
    color_map = {
        0:(0,0,0),
        1:(0,128,128),
        2:(255,0,0)
        }
    for id, color in color_map.items():
        array_3d_colored[image == id] = color
        array_3d_colored[wrong_predictions == id] = color
    
    # Display the original and transformed images
    f, axs = plt.subplots(1, 2)
    f.set_figheight(30)
    f.set_figwidth(50)
    axs[0].set_title("Prediction overlay", {'fontsize': 40})
    axs[0].imshow(array_3d_colored)
    axs[1].set_title("Ground truth", {'fontsize': 40})
    axs[1].imshow(label)

In [None]:
wrongPredictionOverlay(pred_semantic_map,label)

### Function to compute Miou for single image

In [None]:
def miou(pred,label):
    metric = evaluate.load("mean_iou")
    # Now your data should match the expected format:
    metrics = metric._compute(
        predictions=pred,
        references=label,
        num_labels=len(newid2label),
        ignore_index=254
    )
      # add per category metrics as individual key-value pairs
    # Extract per category metrics and convert to list if necessary (pop before defining the metrics dictionary)
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    # Re-define metrics dict to include per-category metrics directly
    metrics = {
        "mean_iou": metrics["mean_iou"], 
        "mean_accuracy": metrics["mean_accuracy"],
        **{f"accuracy_{newid2label[i]}": v for i, v in enumerate(per_category_accuracy)},
        **{f"iou_{newid2label[i]}": v for i, v in enumerate(per_category_iou)}
    }
    
    return metrics

In [None]:
miou(pred_semantic_map,label)

### Function to sort all the predictions of the dataset into list

In [None]:
def dataset_prediction(test_dataloader):
    result_list=[]
    original_seg_maps_list=[]
    for batch in test_dataloader:
        original_images = batch["original_images"]
        target_sizes = [(image.shape[1], image.shape[2]) for image in original_images]
        outputs = model(
              pixel_values=batch["pixel_values"].to(device),
              mask_labels=[labels.to(device) for labels in batch["mask_labels"]],
              class_labels=[labels.to(device) for labels in batch["class_labels"]],
          )
        result = processor.post_process_semantic_segmentation(outputs,target_sizes=target_sizes)[0]
        result=result.cpu().numpy()
        result_list.append(result)
        original_seg_maps_list.append(batch['original_segmentation_maps'][0])
    return result_list, original_seg_maps_list

In [None]:
result_list, original_seg_maps_list=dataset_prediction(test_dataloader)

### Function to compute mean-Iou for test dataset

In [None]:
def dateset_miou(result_list, original_seg_maps_list):
    metric = evaluate.load("mean_iou")
    # Now your data should match the expected format:
    metrics = metric._compute(
        predictions=result_list,
        references=original_seg_maps_list,
        num_labels=len(newid2label),
        ignore_index=254
    )
      # add per category metrics as individual key-value pairs
    # Extract per category metrics and convert to list if necessary (pop before defining the metrics dictionary)
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()
    
    # Re-define metrics dict to include per-category metrics directly
    metrics = {
        "mean_iou": metrics["mean_iou"], 
        "mean_accuracy": metrics["mean_accuracy"],
        **{f"accuracy_{newid2label[i]}": v for i, v in enumerate(per_category_accuracy)},
        **{f"iou_{newid2label[i]}": v for i, v in enumerate(per_category_iou)}
    }
    return metrics

In [None]:
dateset_miou(result_list, original_seg_maps_list)

In [None]:
    f, axs = plt.subplots(1, 2)
    f.set_figheight(30)
    f.set_figwidth(50)
    axs[0].set_title("Image", {'fontsize': 40})
    axs[0].imshow(image_transpose)
    axs[1].set_title("Ground truth", {'fontsize': 40})
    axs[1].imshow(label)

### Function to save all the predictions 

In [None]:
from matplotlib import pyplot as plt
def savePredictions(result_list, original_seg_maps_list, save_path):
    for i in range(len(result_list)):
        file_name = f"result_{i}"
        # Set up the plot
        f, axs = plt.subplots(1, 2)
        f.set_figheight(30)
        f.set_figwidth(50)
        
        axs[0].set_title("Prediction", {'fontsize': 40})
        axs[0].imshow(result_list[i])
        axs[1].set_title("Ground truth", {'fontsize': 40})
        axs[1].imshow(original_seg_maps_list[i])
    
        # Construct the full path where the image will be saved
        file_path = os.path.join(save_path, f"{file_name}.png")
    
        # Save the figure
        plt.savefig(file_path, bbox_inches='tight')
        plt.close(f)  # Close the figure to free memory
    print("Predictions saved")

In [None]:
save_path='data_augmentation_dataset/outputs-mask2former/'
savePredictions(result_list, original_seg_maps_list,save_path)