In [1]:
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torcheval
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from dataset import PlantData, PlantDataTest
from scipy.stats import zscore
%matplotlib inline

In [2]:
dir_path = os.path.join('data')

In [3]:
data = pd.read_csv(os.path.join(dir_path, 'train.csv'))
test_data = pd.read_csv(os.path.join(dir_path, 'test.csv'))

In [4]:
train_data, val_data = train_test_split(data, test_size=0.2, train_size=0.8, shuffle=False, random_state=7)

In [5]:
means = torch.tensor(train_data.mean(axis=0), dtype=torch.float32)
stds = torch.tensor(train_data.std(axis=0), dtype=torch.float32)

  means = torch.tensor(train_data.mean(axis=0), dtype=torch.float32)
  stds = torch.tensor(train_data.std(axis=0), dtype=torch.float32)


In [6]:
# data transforms

data_transform = torchvision.transforms.Lambda(
    lambda x : (x - means[1:-6])/(stds[1:-6])
)

In [7]:
# target transforms

target_transforms = torchvision.transforms.Lambda(
    lambda x : (x - means[-6:])/(stds[-6:])
)

In [8]:
# image transforms
image_train_data_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ConvertImageDtype(dtype=torch.float32),
    torchvision.transforms.RandomResizedCrop((224, 224), scale=(0.8, 1.0), ratio=(1, 1)),
    torchvision.transforms.RandomHorizontalFlip(),
    torchvision.transforms.RandomVerticalFlip(),
    torchvision.transforms.RandomRotation(15),
    torchvision.transforms.ColorJitter(0.2, 0.2, 0.2),
    # torchvision.transforms.ToTensor(),
    torchvision.transforms.Lambda(
        lambda x: torch.clamp(x,min=0, max=1)
    ),
    # use resnet normalization
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    torchvision.transforms.RandomErasing(p=0.2, scale=(0.01, 0.02), ratio=(0.1, 4), value=0)
])

image_test_data_transforms = torchvision.transforms.Compose([
    torchvision.transforms.ConvertImageDtype(dtype=torch.float32),
    torchvision.transforms.Resize((224, 224)),
    # torchvision.transforms.ToTensor(),
    # use resnet normalization
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [9]:
# datasets

train_dataset = PlantData(train_data,
                          os.path.join(dir_path, 'train_images'),
                          image_train_data_transforms,
                          data_transform,
                          target_transforms)
val_dataset = PlantData(val_data,
                          os.path.join(dir_path,  'train_images'),
                          image_test_data_transforms,
                          data_transform,
                          target_transforms)
test_dataset = PlantDataTest(test_data,
                             os.path.join(dir_path,  'test_images'),
                             image_test_data_transforms,
                             data_transform)

In [10]:
BATCH_SIZE = 64

In [11]:
# dataloader

train_dataloader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                               drop_last=True,
                                               num_workers=1,
                                               pin_memory=False)

val_dataloader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=BATCH_SIZE,
                                             shuffle=False,
                                             drop_last=False,
                                             num_workers=1,
                                             pin_memory=False)

test_dataloader = torch.utils.data.DataLoader(test_dataset,
                                              batch_size=1,
                                              shuffle=False,
                                              drop_last=False,
                                              num_workers=1,
                                              pin_memory=False)

In [12]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)

cuda:0


In [13]:
model1 = torch.load('results_during_training/swin_v2_b_batch_seed1_model_epoch20.pth')
model1.cuda()
model1.base_model = model1.base_model.to(device)

  model1 = torch.load('results_during_training/swin_v2_b_batch_seed1_model_epoch19.pth')


In [14]:
model1.eval()

TransferSWINBatch(
  (base_model): SwinTransformer(
    (features): Sequential(
      (0): Sequential(
        (0): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
        (1): Permute()
        (2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
      )
      (1): Sequential(
        (0): SwinTransformerBlockV2(
          (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (attn): ShiftedWindowAttentionV2(
            (qkv): Linear(in_features=128, out_features=384, bias=True)
            (proj): Linear(in_features=128, out_features=128, bias=True)
            (cpb_mlp): Sequential(
              (0): Linear(in_features=2, out_features=512, bias=True)
              (1): ReLU(inplace=True)
              (2): Linear(in_features=512, out_features=4, bias=False)
            )
          )
          (stochastic_depth): StochasticDepth(p=0.0, mode=row)
          (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
          (mlp): MLP(
            (0):

In [15]:
model2 = torch.load('results_during_training/dino_big_seed0_model_epoch11.pth')
model2.cuda()
model2.base_model = model2.base_model.to(device)

  model2 = torch.load('results_during_training/dino_big_seed0_model_epoch10.pth')
  from .autonotebook import tqdm as notebook_tqdm


In [16]:
import torcheval.metrics


In [17]:
metrics1 = []
metrics2 = []
metricsavg = []
for i in range(1,7):
    metrics1.append(torcheval.metrics.R2Score())
    metrics2.append(torcheval.metrics.R2Score())
    metricsavg.append(torcheval.metrics.R2Score())

In [18]:
for batch_idx, (images, extra, targets) in enumerate(val_dataloader):
    images = images.to(device)
    extra = extra.to(device)
    # targets = targets.to(device)

    with torch.no_grad():
        output1 = model1.forward(images,extra).detach().cpu()
        output2 = model2.forward(images,extra).detach().cpu()

    for i in range(1,7):
        metrics1[i-1].update(output1[:,-i], targets[:,-i])
        metrics2[i-1].update(output2[:,-i], targets[:,-i])
        metricsavg[i-1].update((output2[:,-i]+output1[:,-i])/2, targets[:,-i])

  additional = self.data_transform(torch.tensor(self.csv.iloc[index, 1:-6], dtype=torch.float32))
  targets = self.targets_transform(torch.tensor(self.csv.iloc[index, -6:], dtype=torch.float32))


In [19]:
for i in range(6):
    print(f'Feature {i+1} -- ViT DINO: {metrics1[i].compute()}, SwinV2 {metrics2[i].compute()} Avg {metricsavg[i].compute()}')

Feature 1 -- ViT DINO: 0.322124719619751, SwinV2 0.3162527084350586 Avg 0.3295576572418213
Feature 2 -- ViT DINO: 0.19461798667907715, SwinV2 0.193059504032135 Avg 0.2053646445274353
Feature 3 -- ViT DINO: 0.22174817323684692, SwinV2 0.20802748203277588 Avg 0.23242777585983276
Feature 4 -- ViT DINO: 0.4617008566856384, SwinV2 0.4793391227722168 Avg 0.48964691162109375
Feature 5 -- ViT DINO: 0.2957690954208374, SwinV2 0.29596614837646484 Avg 0.30895566940307617
Feature 6 -- ViT DINO: 0.27881789207458496, SwinV2 0.2786998152732849 Avg 0.29363858699798584


In [20]:
print([m.compute().detach().item() for m in metrics1])
print([m.compute().detach().item() for m in metrics2])
print([m.compute().detach().item() for m in metricsavg])

[0.322124719619751, 0.19461798667907715, 0.22174817323684692, 0.4617008566856384, 0.2957690954208374, 0.27881789207458496]
[0.3162527084350586, 0.193059504032135, 0.20802748203277588, 0.4793391227722168, 0.29596614837646484, 0.2786998152732849]
[0.3295576572418213, 0.2053646445274353, 0.23242777585983276, 0.48964691162109375, 0.30895566940307617, 0.29363858699798584]
