In [1]:
# Get Model

from transformers import AutoFeatureExtractor, AutoModelForImageClassification

extractor = AutoFeatureExtractor.from_pretrained("farleyknight-org-username/vit-base-mnist")

model = AutoModelForImageClassification.from_pretrained("farleyknight-org-username/vit-base-mnist")



In [2]:
model

ViTForImageClassification(
  (vit): ViTModel(
    (embeddings): ViTEmbeddings(
      (patch_embeddings): ViTPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): ViTEncoder(
      (layer): ModuleList(
        (0-11): 12 x ViTLayer(
          (attention): ViTAttention(
            (attention): ViTSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
            (output): ViTSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (intermediate): ViTIntermediate(
            (dense): Linear(in_features=7

In [54]:
# Get Data

from Data.Data import DataLoader

distortions = ['shot_noise', 'motion_blur']  # desired distortions

test_data_obj = DataLoader('test', distortions)

# duplicates encoutered in create_dataset function so just using full testing dataset for now

'''test_size = 1000

#test_ratios = {'clean': 0.2, 'shot_noise': 0.4, 'motion_blur': 0.6}

#test_dataset = test_data_obj.create_dataset(test_size, test_ratios)
'''

test_data_dict = test_data_obj.load()

In [55]:
images = test_data_dict['clean'][:100]
#images.extend(test_data_dict['shot_noise'][:100])
#images.extend(test_data_dict['motion_blur'][:100])

In [49]:
# orginal dimensions
og_images = images.copy()
og_images[0]['image'].shape

TensorShape([28, 28, 1])

In [56]:
# convert to Pytorch Tensors and appropriate dimensions based on https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
import torch
import torch.nn.functional as F

for image in images:
    numpy_array = image['image'].numpy() / 255 # turn tensorflow tensor to numpy array
    
    # move channel dimension to the front and convert into Pytorch tensor
    tensor = torch.tensor(numpy_array.reshape((1, 28, 28)))
    
    tensor = tensor.unsqueeze(0) # add batch size dimesion at index 0
    
    # expand image from 28x28 to 224x244
    tensor = F.interpolate(tensor, size=(224, 224), mode='bilinear', align_corners=False)
    
    # make the tensor have three channels instead of 1
    final_tensor = torch.cat((tensor, tensor, tensor), dim=1)
    
    image['image'] = final_tensor
    
    # convert label tensorflow tensor to Pytorch tensor
    image['label'] = torch.tensor(image['label'].numpy())

In [57]:
# new dimensions [batch_size, channels, height, width]
images[0]['image'].shape

torch.Size([1, 3, 224, 224])

In [58]:
len(images)

100

In [59]:
import torch.nn as nn

loss_fn = nn.CrossEntropyLoss()

model.eval()

test_loss, correct = 0, 0
with torch.no_grad():
    for image in range(len(images)):
        
        pred = model(images[image]['image'])
        
        correct += (pred.logits.argmax(1) == images[image]['label']).type(torch.float).sum().item()
        
    print(correct/len(images))

0.96
