In [1]:
import torch
import numpy as np
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from torchvision import transforms
from PIL import Image

from torchvision import models
from tqdm import tqdm
import torchvision
from glob import glob

In [2]:
def get_transform(input_size=224):
    return transforms.Compose([
        transforms.Resize((input_size, input_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

class Dataloader_PACS(Dataset):
    def __init__(self):
        self.data = []
        self.domains = {
            'art_painting': 0,
            'cartoon': 1,
            'photo': 2,
            'sketch': 3}
        source = '/hadatasets/andreza/datasets_oodbench/PACS'
        lista = glob(source+'/**/*.jpg', recursive=True)
        lista.extend(glob(source+'/**/*.png', recursive=True))
        for data_path in lista:
            splited = data_path.split('/')
            label = splited[-2]
            domain = splited[-3]
            self.data.append([data_path, label, domain])
            
        
        self.transform = get_transform()
        self.data = np.array(self.data) # data = [['fullpath', 'label', 'domain'], ....]
        labels = np.unique(self.data[:,1])
        self.class_to_idx = {
            value: int(idx) for idx, value in enumerate(labels)
        }

        
    def __getitem__(self, index: int):
        img_path, label, domain = self.data[index]
        
        image = Image.open(img_path).convert('RGB')
        image = self.transform(image)
        
        label = torch.tensor(int(self.class_to_idx[label]))
        return image, label, img_path, domain

    def __len__(self):
        return len(self.data)

In [3]:
dataset_pacs = Dataloader_PACS()
dataloader = DataLoader(dataset_pacs,
                        batch_size = 1,
                        shuffle = False,
                        num_workers=8)


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device:", device)

# PACS = 7 classes
# VLCS = 5 classes

device: cuda


In [5]:
model = torchvision.models.resnet50(weights="ResNet50_Weights.IMAGENET1K_V2")
model = models.resnet50()
model.fc = torch.nn.Linear(in_features=2048, out_features=7, bias=True)
model.to(device)
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [6]:
softmax_output_0 = [] 
softmax_output_1 = [] 
softmax_output_2 = [] 
softmax_output_3 = [] 
softmax_output_4 = [] 
softmax_output_5 = [] 
softmax_output_6 = [] 

phase = 'val'
for inputs, labels, _, _ in tqdm(dataloader):
    inputs = inputs.to(device)
    labels = labels.to(device)
    with torch.set_grad_enabled(phase == 'train'):
        outputs = torch.nn.Softmax()(model(inputs))
        outputs = outputs.cpu().data.numpy()
        
        if labels == 0:
            for out_prediction in outputs:
                softmax_output_0.append(out_prediction.tolist())
        elif labels == 1:
            for out_prediction in outputs:
                softmax_output_1.append(out_prediction.tolist())
        elif labels == 2:
            for out_prediction in outputs:
                softmax_output_2.append(out_prediction.tolist())
        elif labels == 3:
            for out_prediction in outputs:
                softmax_output_3.append(out_prediction.tolist())
        elif labels == 4:
            for out_prediction in outputs:
                softmax_output_4.append(out_prediction.tolist())
        elif labels == 5:
            for out_prediction in outputs:
                softmax_output_5.append(out_prediction.tolist())
        elif labels == 6:
            for out_prediction in outputs:
                softmax_output_6.append(out_prediction.tolist())
        else:
            print('error')


  return self._call_impl(*args, **kwargs)
100%|███████████████████████████████████████████████████████████████| 9991/9991 [01:22<00:00, 120.59it/s]


In [8]:
classes = dataset_pacs.class_to_idx
keys = list(classes.keys())

{'dog': 0,
 'elephant': 1,
 'giraffe': 2,
 'guitar': 3,
 'horse': 4,
 'house': 5,
 'person': 6}

In [20]:
for idx, softmax_list in enumerate([softmax_output_0, softmax_output_1,
                                    softmax_output_2, softmax_output_3,
                                    softmax_output_4, softmax_output_5,
                                    softmax_output_6]):
    
    softmax_output = np.asarray(softmax_list)
    np.save(f"outputs/PACS_image_features_resnet50_softmax_class_{keys[idx]}.npy", softmax_output)