In [57]:
import os
from PIL import Image

# STAGE2
# 0_Rust / Ferrugem:
image_path1 = os.path.abspath('./dataset/swatdcnn/test/stage-2/test/0_Rust/92.jpg')
# 1_Brown_Spots / Mofo Fuliginoso:
image_path2 = os.path.abspath('./dataset/swatdcnn/test/stage-2/test/1_Brown_Spots/22.jpg')
# 2_Sooty_Molds / Mofo Fuliginoso:
image_path3 = os.path.abspath('./dataset/swatdcnn/test/stage-2/test/1_Brown_Spots/IMG_20191018_125640.jpg')

prep_transforms = transforms.Compose(
    [transforms.Resize((160, 160)),
     transforms.ToTensor(),
     transforms.Normalize( (0.4914, 0.4822, 0.4465), (0.2470, 0.2435, 0.2616) )
     ]
)

In [80]:
# Open and display the image
img = Image.open(image_path)
img2 = Image.open(image_path2)
img3 = Image.open(image_path3)
img4 = Image.open(image_path4)
img5 = Image.open(image_path5)
img6 = Image.open(image_path6)
img7 = Image.open(image_path7)
img8 = Image.open(image_path8)
img9 = Image.open(image_path9)

In [87]:
img_tensor = prep_transforms(img)
img_tensor2 = prep_transforms(img2)
img_tensor3 = prep_transforms(img3)
img_tensor4 = prep_transforms(img4)
# img_tensor5 = prep_transforms(img5)
img_tensor6 = prep_transforms(img6)
# img_tensor7 = prep_transforms(img7)
img_tensor8 = prep_transforms(img8)
img_tensor9 = prep_transforms(img9)

In [14]:
data = torch.load("./checkpoint/ViT-4-ckpt.t7")

In [23]:
# https://github.com/lucidrains/vit-pytorch/blob/main/vit_pytorch/vit.py

import torch
from torch import nn

from einops import rearrange, repeat
from einops.layers.torch import Rearrange

# helpers

def pair(t):
    return t if isinstance(t, tuple) else (t, t)

# classes

class PreNorm(nn.Module):
    def __init__(self, dim, fn):
        super().__init__()
        self.norm = nn.LayerNorm(dim)
        self.fn = fn
    def forward(self, x, **kwargs):
        return self.fn(self.norm(x), **kwargs)

class FeedForward(nn.Module):
    def __init__(self, dim, hidden_dim, dropout = 0.):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(dim, hidden_dim),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, dim),
            nn.Dropout(dropout)
        )
    def forward(self, x):
        return self.net(x)

class Attention(nn.Module):
    def __init__(self, dim, heads = 8, dim_head = 64, dropout = 0.):
        super().__init__()
        inner_dim = dim_head *  heads
        project_out = not (heads == 1 and dim_head == dim)

        self.heads = heads
        self.scale = dim_head ** -0.5

        self.attend = nn.Softmax(dim = -1)
        self.to_qkv = nn.Linear(dim, inner_dim * 3, bias = False)

        self.to_out = nn.Sequential(
            nn.Linear(inner_dim, dim),
            nn.Dropout(dropout)
        ) if project_out else nn.Identity()

    def forward(self, x):
        qkv = self.to_qkv(x).chunk(3, dim = -1)
        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> b h n d', h = self.heads), qkv)

        dots = torch.matmul(q, k.transpose(-1, -2)) * self.scale

        attn = self.attend(dots)

        out = torch.matmul(attn, v)
        out = rearrange(out, 'b h n d -> b n (h d)')
        return self.to_out(out)

class Transformer(nn.Module):
    def __init__(self, dim, depth, heads, dim_head, mlp_dim, dropout = 0.):
        super().__init__()
        self.layers = nn.ModuleList([])
        for _ in range(depth):
            self.layers.append(nn.ModuleList([
                PreNorm(dim, Attention(dim, heads = heads, dim_head = dim_head, dropout = dropout)),
                PreNorm(dim, FeedForward(dim, mlp_dim, dropout = dropout))
            ]))
    def forward(self, x):
        for attn, ff in self.layers:
            x = attn(x) + x
            x = ff(x) + x
        return x

class ViT(nn.Module):
    def __init__(self, *, image_size, patch_size, num_classes, dim, depth, heads, mlp_dim, pool = 'cls', channels = 3, dim_head = 64, dropout = 0., emb_dropout = 0.):
        super().__init__()
        image_height, image_width = pair(image_size)
        patch_height, patch_width = pair(patch_size)

        assert image_height % patch_height == 0 and image_width % patch_width == 0, 'Image dimensions must be divisible by the patch size.'

        num_patches = (image_height // patch_height) * (image_width // patch_width)
        patch_dim = channels * patch_height * patch_width
        assert pool in {'cls', 'mean'}, 'pool type must be either cls (cls token) or mean (mean pooling)'

        self.to_patch_embedding = nn.Sequential(
            Rearrange('b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1 = patch_height, p2 = patch_width),
            nn.Linear(patch_dim, dim),
        )

        self.pos_embedding = nn.Parameter(torch.randn(1, num_patches + 1, dim))
        self.cls_token = nn.Parameter(torch.randn(1, 1, dim))
        self.dropout = nn.Dropout(emb_dropout)

        self.transformer = Transformer(dim, depth, heads, dim_head, mlp_dim, dropout)

        self.pool = pool
        self.to_latent = nn.Identity()

        self.mlp_head = nn.Sequential(
            nn.LayerNorm(dim),
            nn.Linear(dim, num_classes)
        )

    def forward(self, img):
        x = self.to_patch_embedding(img)
        b, n, _ = x.shape

        cls_tokens = repeat(self.cls_token, '() n d -> b n d', b = b)
        x = torch.cat((cls_tokens, x), dim=1)
        x += self.pos_embedding[:, :(n + 1)]
        x = self.dropout(x)

        x = self.transformer(x)

        x = x.mean(dim = 1) if self.pool == 'mean' else x[:, 0]

        x = self.to_latent(x)
        return self.mlp_head(x)


In [41]:
net = ViT(
    image_size = 32,
    patch_size = 4,
    num_classes = 10,
    dim = 512,
    depth = 6,
    heads = 8,
    mlp_dim = 512,
    dropout = 0.1,
    emb_dropout = 0.1)



data = torch.load('./checkpoint/ViT-4-ckpt.t7')
# net = data['model']
net.load_state_dict(data['model'])


<All keys matched successfully>

In [71]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)
batch = img_tensor.unsqueeze(0).to("cpu")
net.eval()
output = net(batch)
output

CATEGORIES = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']
logits = torch.nn.functional.softmax(output, dim=1) * 100
prob_dict = {}
for i, classname in enumerate(CATEGORIES):
  prob = logits[0][i].item()
  print(f"{classname} score: {prob:.2f}")
  prob_dict[classname] = [prob]

device: cuda:0
airplane score: 0.00
automobile score: 0.00
bird score: 100.00
cat score: 0.00
deer score: 0.00
dog score: 0.00
frog score: 0.00
horse score: 0.00
ship score: 0.00
truck score: 0.00


In [72]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)
batch = img_tensor2.unsqueeze(0).to("cpu")
net.eval()
output = net(batch)
output

CATEGORIES = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']
logits = torch.nn.functional.softmax(output, dim=1) * 100
prob_dict = {}
for i, classname in enumerate(CATEGORIES):
  prob = logits[0][i].item()
  print(f"{classname} score: {prob:.2f}")
  prob_dict[classname] = [prob]

device: cuda:0
airplane score: 50.51
automobile score: 0.00
bird score: 46.73
cat score: 0.00
deer score: 0.00
dog score: 0.00
frog score: 0.73
horse score: 0.00
ship score: 0.00
truck score: 2.03


In [73]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)
batch = img_tensor3.unsqueeze(0).to("cpu")
net.eval()
output = net(batch)
output

CATEGORIES = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']
logits = torch.nn.functional.softmax(output, dim=1) * 100
prob_dict = {}
for i, classname in enumerate(CATEGORIES):
  prob = logits[0][i].item()
  print(f"{classname} score: {prob:.2f}")
  prob_dict[classname] = [prob]

device: cuda:0
airplane score: 0.00
automobile score: 100.00
bird score: 0.00
cat score: 0.00
deer score: 0.00
dog score: 0.00
frog score: 0.00
horse score: 0.00
ship score: 0.00
truck score: 0.00


In [74]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)
batch = img_tensor4.unsqueeze(0).to("cpu")
net.eval()
output = net(batch)
output

CATEGORIES = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']
logits = torch.nn.functional.softmax(output, dim=1) * 100
prob_dict = {}
for i, classname in enumerate(CATEGORIES):
  prob = logits[0][i].item()
  print(f"{classname} score: {prob:.2f}")
  prob_dict[classname] = [prob]

device: cuda:0
airplane score: 94.91
automobile score: 5.06
bird score: 0.01
cat score: 0.00
deer score: 0.00
dog score: 0.00
frog score: 0.00
horse score: 0.00
ship score: 0.02
truck score: 0.00


In [86]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)
batch = img_tensor9.unsqueeze(0).to("cpu")
net.eval()
output = net(batch)
output

CATEGORIES = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']
logits = torch.nn.functional.softmax(output, dim=1) * 100
prob_dict = {}
for i, classname in enumerate(CATEGORIES):
  prob = logits[0][i].item()
  print(f"{classname} score: {prob:.2f}")
  prob_dict[classname] = [prob]

device: cuda:0
airplane score: 0.00
automobile score: 0.00
bird score: 0.00
cat score: 0.00
deer score: 0.00
dog score: 0.00
frog score: 100.00
horse score: 0.00
ship score: 0.00
truck score: 0.00


In [88]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('device:', device)
batch = img_tensor6.unsqueeze(0).to("cpu")
net.eval()
output = net(batch)
output

CATEGORIES = ['airplane','automobile','bird','cat','deer',
               'dog','frog','horse','ship','truck']
logits = torch.nn.functional.softmax(output, dim=1) * 100
prob_dict = {}
for i, classname in enumerate(CATEGORIES):
  prob = logits[0][i].item()
  print(f"{classname} score: {prob:.2f}")
  prob_dict[classname] = [prob]

device: cuda:0
airplane score: 0.00
automobile score: 0.00
bird score: 2.66
cat score: 40.63
deer score: 0.02
dog score: 30.35
frog score: 0.02
horse score: 26.32
ship score: 0.00
truck score: 0.01
