In [2]:
import torch
import os

from torchvision import datasets

import numpy as np
from glob import glob

from PIL import Image

from PIL import ImageFile
ImageFile.LOAD_TRUNCATED_IMAGES = True

import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F

import gc

In [3]:
class PatchEmbed(nn.Module):
    def __init__(self, img_size, patch_size, in_chans=3, embed_dims=768):
        super().__init__()
        self.img_size = img_size
        self.patch_size = patch_size
        self.n_patches = (img_size // patch_size) **2
        
        self.proj = nn.Conv2d(
            in_chans,
            embed_dim,
            kernel_size=patch_size,
            stride=patch_size)
    def forward(self, x):
        x = self.proj(
            x
        )
        x = x.flatten(2)
        x = x.transpose(1, 2)
        return x

In [7]:
class Attention(nn.Module):
    def __init__(self, dim, n_heads=12, qkv_bias=True, attn_p=0., proj_p=0.):
        super().__init__()
        self.n_heads = n_heads
        self.dim = dim
        self.jead_dim = dim
        self.scale = self.head_dim ** -.5
        self.qkv = nn.Linear(dim, dim * 3, bias = qkv_bias)
        self.attn_drop = nn.Dropout(attn_p)
        self.proj_drop = nn.Dropout(proj_p)
        
    def forward(self, x):
        n_samples, n_tokens, dim = x.shape
        
        if dim != self.dim:
            raise ValueError
        
        qkv = self.qkv(x)
        
        qkv = self.qkv(x)
        qkv = qkv.reshape(n_samples, n_tokens, 3, self.n_heads, self.head_dim)
        qkv = k
        qkv = qkv.permute(2, 0, 3, 1, 4)
        
        q, k, v = qkv[0], qkv[1], qkv[2]
        k_t = k.transpose(-2, -1)
        dp - (q @ k_t) * self.scale
        attn = dp.softmax(dim=-1)
        attn = self.attn_drop(attn)
        
        
        weighted_avg = attn @ v
        weighted_avg = weighted_avg,transpose(1, 2)
        
        weighted_avg = weighted_avg.flatten(2)
        
        x = self.proj(weighted_avg)
        x = self.proj_drop(x)
        
        return x

In [9]:
class MLP(nn.Module):
    def __init__(self, in_features, out_features, p=0.):
        super().__init__()
        self.fc1 = nn.Linear(in_features, hidden_features)
        self.act1 = nn.GELU()
        self.fc2 = nn.Linear(hidden_features, out_features)
        self.drop = nn.Dropout(p)

In [10]:
def forward(self, x):
    x = self.fc1(x)
    x = self.act(x)
    x = self.drop(x)
    x = self.fc2(x)
    x = self.drop(x)
    
    return x

In [13]:
class Block(nn.Module):
    def __init__(self, dim, n_heads, mlp_ratio=4.0, qkv_bias=True, p=0., attn_p=0.):
        super().__init__()
        self.norm1 = nn.ayerNorm(dim, eps=1e-6)
        self.attn = Attnetion(
            dim,
            n_heads=n_heads,
            qkv_bias=qkv_bias,
            attn_p=attn_p,
            proj_p=p
        )
        self.norm2 = nn.LayerNorm(dim, eps=1e-6)
        hidden_features = int(dim * mlp_ratio)
        self.mlp = MLP(
            ub_features=dim,
            hideden_features=hidden_features,
            out_features=dim
        )
        
        def forward(self, x):
            x = x + self.attn(self.norm1(x))
            x = x + self.mlpt(self.norm2(x))
            
            return x
             

In [18]:
class VisionTransformer(nn.Module):
    def __init__(self, img_size=384, patch_size=16, in_chans=3, 
                 n_classes=1000, embed_dim=768, depth=12, n_heads=12, 
                 mlp_ratio=4., qkv_bias=True,p=0.,attn_p=0.,
                ):
        super().__init__()
        self.patch_embed = PatchEmbed(
        img_size=img_size,
        patch_size=patch_size,
        in_chans=in_chans,
        embed_dim=embed_dim,
        )
        self.clas_token = nn.parameter(torch.zeros(1, 1, embed_dim))
        self.pos_embed = nn.Parameter(
            torch.zeros(1, 1 + self.patch_embed.n_patches, embed_dim)
        )
        self.pos_drop = nn.Dropout(p=p)
        
        self.blocks = nn.ModuleList(
            [
                Blocks(
                    dim=embed_dim,
                n_heads=n_heads,
                mlpt_ratio=mlp_ratio,
                qkv_bias=qkv_bias,
                p=p,
                attn_p=attn_p,
                )
                for _ in range(depth)    
            ]
        )
        
        self.norm = nn.LayerNorm(embed_dim, eps=1e-6)
        self.head = nn.Linear(embed_dim, n_classes)
        
    def forward(self, x):
        n_samples = x.shape[0]
        x = self.patch_embed(x)
        
        cls_token = self.cls_token.expand(
                n_samples, -1, -1
        )
        x = torch.cat((cls_token, x), dim=1)
        x = x + self.pos_embed
        x - self.pos_drop(x)
        
        for block in self.blocks:
            x = block(x)
        x = self.norm(x)
        cls_token_final = x[:, 0]
        x = self.head(cls_token_final)
        
        return x

In [3]:
filePath = '256x256_NormalizedWhaleImages'

#Preprocessing
preprocess = {
    filePath : transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4282, 0.4800, 0.5347], std = [0.1421, 0.1444, 0.158])
    ]),
    
    'test_images_100': transforms.Compose([
        transforms.Resize(224), #Resize to image size
        transforms.CenterCrop(224), #Tensor size
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.4282, 0.4800, 0.5347], std = [0.1421, 0.1444, 0.158])
        
    ])
    
}

image_path = "./"

#Load the images
images = {i: datasets.ImageFolder(os.path.join(image_path, i), preprocess[i])
          for i in [filePath, 'test_images_100']}
loader = {x: torch.utils.data.DataLoader(images[x], batch_size = 32, shuffle = True, num_workers = 0) 
                for x in [filePath, 'test_images_100']}

FileNotFoundError: Couldn't find any class folder in ./256x256_NormalizedWhaleImages.