In [1]:
import torch
import torch.nn as nn
from einops import rearrange, repeat

### Patch Embedding
It has three components:
- Convert the image into sequence of patches
- Add CLS token to sequence of patches
- Add positional encoding to all the patches. 

In [19]:
class PatchEmbedding(nn.Module):
    def __init__(self, config):
        super().__init__()
        image_height = config["image_height"]
        image_width = config["image_width"]
        im_channels = config["im_channels"]
        emb_dim = config["emb_dim"] # Transformer dimentions(D)
        patch_embd_dropout = config["patch_emb_drop"]

        self.patch_height = config["patch_height"]
        self.patch_width = config["patch_width"]

        num_patches = (image_height // self.patch_height) * (image_width // self.patch_width)

        patch_dim = im_channels * self.patch_height * self.patch_width    
        
        # W belongs to R^(patch_dim x emb_dim)
        self.patch_emb = nn.Sequential(
            nn.LayerNorm(patch_dim),           
            nn.Linear(patch_dim, emb_dim),
            nn.LayerNorm(emb_dim),
        )
        
        # Positional information needs to be added to cls as well so 1+num_patches
        self.pos_emb = nn.Parameter(torch.randn(1, num_patches + 1, emb_dim))
        self.cls_token = nn.Parameter(torch.randn(emb_dim))    # CLS token belongs to R^emb_dim
        self.patch_emb_dropout = nn.Dropout(patch_embd_dropout)

    def forward(self, x):
        batch_size = x.shape[0]

        out  = rearrange(x, 'b c (h p1) (w p2) -> b (h w) (p1 p2 c)', p1=self.patch_height, p2=self.patch_width)  # split image into patches

        out = self.patch_emb(out)
        cls_token = repeat(self.cls_token, 'd -> b n d', b=batch_size, n=1)
        out = torch.cat([cls_token, out], dim=1)
        out += self.pos_emb
        out = self.patch_emb_dropout(out)

        return out


In [51]:
#example run
image = torch.randn(1, 3, 224, 224)
config = {
    "image_height": 224,
    "image_width": 224,
    "im_channels": 3,
    "emb_dim": 512,
    "patch_height": 16,
    "patch_width": 16,
    "patch_emb_dropout": 0.1
}
patch_emb = PatchEmbedding(config)
out = patch_emb(image)
print(out.shape)  

torch.Size([1, 197, 512])


### Attention Module

In [3]:
class Attention(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.n_heads = config["n_heads"]
        self.head_dim = config["head_dim"]   # d_h
        self.emb_dim = config["emb_dim"]
        self.drop_prob = config["dropout"] if "dropout" in config else 0.0
        self.att_dim  = self.n_heads * self.head_dim

        self.qkv_proj = nn.Linear(self.emb_dim, self.att_dim * 3, bias=False)
        self.att_drop = nn.Dropout(self.drop_prob)

        self.out_proj = nn.Sequential(
            nn.Linear(self.att_dim, self.emb_dim),
            nn.Dropout(self.drop_prob)
        )

    def forward(self, x):
        B, N = x.shape[:2]  # B: batch size, N: number of tokens

        q, k,v = self.qkv_proj(x).split(self.att_dim, dim=-1)
        #split into heads
        q = rearrange(q, 'b n (h d_h) -> b h n d_h', h=self.n_heads, d_h=self.head_dim) 
        k = rearrange(k, 'b n (h d_h) -> b h n d_h', h=self.n_heads, d_h=self.head_dim)
        v = rearrange(v, 'b n (h d_h) -> b h n d_h', h=self.n_heads, d_h=self.head_dim)

        #Scaled dot product attention

        att = torch.matmul(q, k.transpose(-2, -1)) / (self.head_dim ** 0.5)
        att = nn.functional.softmax(att, dim=-1)
        att = self.att_drop(att)


        #Weighted Value Computation
        out = torch.matmul(att, v)

        #Rearrange heads
        out = rearrange(out, 'b h n d_h -> b n (h d_h)', h=self.n_heads, d_h=self.head_dim)
        out = self.out_proj(out)

        return out
        

In [48]:
#example run
config = {
    "n_heads": 8,
    "head_dim": 64,
    "emb_dim": 512,
    "dropout": 0.1
}

att = Attention(config)
out = att(out)
print(out.shape)

torch.Size([1, 197, 512])


### Transformer

In [4]:
class TransformerLayer(nn.Module):
    def __init__(self, config):
        super().__init__()
        emb_dim = config["emb_dim"]
        ff_hidden_dim = config["ff_dim"] if "ff_dim" in config else 4 * emb_dim
        ff_dropout = config["ff_drop"] if "ff_drop" in config else 0.0
        self.att_norm = nn.LayerNorm(emb_dim)
        self.ff_norm = nn.LayerNorm(emb_dim)
        self.attention_block = Attention(config)
        self.ff_block = nn.Sequential(
            nn.Linear(emb_dim, ff_hidden_dim),
            nn.GELU(),
            nn.Dropout(ff_dropout),
            nn.Linear(ff_hidden_dim, emb_dim),
            nn.Dropout(ff_dropout)
        )

    def forward(self, x):
        out = x
        out = out + self.attention_block(self.att_norm(out))
        out = out + self.ff_block(self.ff_norm(out))
        return out
        

In [54]:
#expample run
config = {
    "emb_dim": 512,
    "ff_dim": 2048,
    "ff_drop": 0.1,
    "n_heads": 8,
    "head_dim": 64,
    "dropout": 0.1
}

trasnformer_block = TransformerLayer(config)
out = trasnformer_block(out)
print(out.shape)

torch.Size([1, 197, 512])


### ViT

In [15]:
class VIT(nn.Module):
    def __init__(self, config):
        super().__init__()
        n_layers = config["n_layers"]
        emb_dim = config["emb_dim"]
        num_classes = config["num_classes"] # number of classes
        self.patch_embedding = PatchEmbedding(config)
        self.transformer = nn.ModuleList([TransformerLayer(config) for _ in range(n_layers)])
        self.norm = nn.LayerNorm(emb_dim)
        self.fc_layer = nn.Linear(emb_dim, num_classes)

    def forward(self, x):
        out = self.patch_embedding(x)
        for layer in self.transformer:
            out = layer(out)
        out = self.norm(out)
        
        out = self.fc_layer(out[:, 0])

        #Logits, No softmax
        return out


In [56]:
#example run
image = torch.randn(1, 3, 224, 224)
config = {
    "image_height": 224,
    "image_width": 224,
    "im_channels": 3,
    "emb_dim": 512,
    "patch_height": 16,
    "patch_width": 16,
    "patch_emb_dropout": 0.1,
    "n_layers": 12,
    "num_dim": 1000,
    "n_heads": 8,
    "head_dim": 64,
    "dropout": 0.1
}

model = VIT(config)
out = model(image)
print(out.shape)

torch.Size([1, 1000])


#### Training

In [16]:
import sys
sys.path.append('../')
import yaml
import argparse
import torch
import random
import os
import numpy as np
from tqdm import tqdm
from torch.utils.data.dataloader import DataLoader
from dataset.mnist_color_texture_dataset import MnistDataset
from torch.optim import Adam
from torch.optim.lr_scheduler import ReduceLROnPlateau


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


def train_for_one_epoch(epoch_idx, model, mnist_loader, optimizer):
    r"""
    Method to run the training for one epoch.
    :param epoch_idx: iteration number of current epoch
    :param model: Transformer model
    :param mnist_loader: Data loder for mnist
    :param optimizer: optimizer to be used taken from config
    :return:
    """
    losses = []
    criterion = torch.nn.CrossEntropyLoss()
    for data in tqdm(mnist_loader):
        im = data['image'].float().to(device)
        number_cls = data['number_cls'].long().to(device)
        optimizer.zero_grad()
        model_output = model(im)
        loss = criterion(model_output, number_cls)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
    print('Finished epoch: {} | Number Loss : {:.4f}'.
          format(epoch_idx + 1,
                 np.mean(losses)))
    return np.mean(losses)


def train(config_path):
    #  Read the config file
    ######################################
    with open(config_path, 'r') as file:
        try:
            config = yaml.safe_load(file)
        except yaml.YAMLError as exc:
            print(exc)
    print(config)
    #######################################
    
    # Set the desired seed value
    ######################################
    seed = config['train_params']['seed']
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if device == 'cuda':
        torch.cuda.manual_seed_all(seed)
    #######################################
    
    # Create the model and dataset
    model = VIT(config['model_params']).to(device)
    mnist = MnistDataset('train', config['dataset_params'],
                         im_h=config['model_params']['image_height'],
                         im_w=config['model_params']['image_width'])
    mnist_loader = DataLoader(mnist, batch_size=config['train_params']['batch_size'], shuffle=True, num_workers=4)
    num_epochs = config['train_params']['epochs']
    optimizer = Adam(model.parameters(), lr=config['train_params']['lr'])
    scheduler = ReduceLROnPlateau(optimizer, factor=0.5, patience=2)
    
    # Create output directories
    if not os.path.exists(config['train_params']['task_name']):
        os.mkdir(config['train_params']['task_name'])
    
    # Load checkpoint if found
    if os.path.exists(os.path.join(config['train_params']['task_name'],
                                   config['train_params']['ckpt_name'])):
        print('Loading checkpoint')
        model.load_state_dict(torch.load(os.path.join(config['train_params']['task_name'],
                                                      config['train_params']['ckpt_name']), map_location=device))
    best_loss = np.inf
    
    for epoch_idx in range(num_epochs):
        mean_loss = train_for_one_epoch(epoch_idx, model, mnist_loader, optimizer)
        scheduler.step(mean_loss)
        # Simply update checkpoint if found better version
        if mean_loss < best_loss:
            print('Improved Loss to {:.4f} .... Saving Model'.format(mean_loss))
            torch.save(model.state_dict(), os.path.join(config['train_params']['task_name'],
                                                        config['train_params']['ckpt_name']))
            best_loss = mean_loss
        else:
            print('No Loss Improvement')
   

In [22]:
config_path = "../config/default.yaml"
train(config_path)

{'dataset_params': {'root_dir': '../data'}, 'model_params': {'n_heads': 8, 'head_dim': 64, 'emb_dim': 128, 'attn_drop': 0.1, 'ff_dim': 256, 'ff_drop': 0.1, 'n_layers': 6, 'bg_classes': 44, 'num_classes': 10, 'image_height': 224, 'image_width': 224, 'patch_height': 16, 'patch_width': 16, 'patch_emb_drop': 0.1, 'im_channels': 3}, 'train_params': {'task_name': 'default', 'batch_size': 64, 'epochs': 100, 'lr': 0.001, 'seed': 1111, 'ckpt_name': 'vit_ckpt.pth'}}


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 1 | Number Loss : 0.9568
Improved Loss to 0.9568 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.93it/s]


Finished epoch: 2 | Number Loss : 0.3287
Improved Loss to 0.3287 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.91it/s]


Finished epoch: 3 | Number Loss : 0.2463
Improved Loss to 0.2463 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.94it/s]


Finished epoch: 4 | Number Loss : 0.2042
Improved Loss to 0.2042 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.93it/s]


Finished epoch: 5 | Number Loss : 0.1814
Improved Loss to 0.1814 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.91it/s]


Finished epoch: 6 | Number Loss : 0.1580
Improved Loss to 0.1580 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.84it/s]


Finished epoch: 7 | Number Loss : 0.1384
Improved Loss to 0.1384 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.88it/s]


Finished epoch: 8 | Number Loss : 0.1328
Improved Loss to 0.1328 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.84it/s]


Finished epoch: 9 | Number Loss : 0.1245
Improved Loss to 0.1245 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.86it/s]


Finished epoch: 10 | Number Loss : 0.1116
Improved Loss to 0.1116 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.86it/s]


Finished epoch: 11 | Number Loss : 0.1056
Improved Loss to 0.1056 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.86it/s]


Finished epoch: 12 | Number Loss : 0.1001
Improved Loss to 0.1001 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.85it/s]


Finished epoch: 13 | Number Loss : 0.0908
Improved Loss to 0.0908 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.90it/s]


Finished epoch: 14 | Number Loss : 0.0837
Improved Loss to 0.0837 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.89it/s]


Finished epoch: 15 | Number Loss : 0.0834
Improved Loss to 0.0834 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.78it/s]


Finished epoch: 16 | Number Loss : 0.0772
Improved Loss to 0.0772 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.85it/s]


Finished epoch: 17 | Number Loss : 0.0730
Improved Loss to 0.0730 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.82it/s]


Finished epoch: 18 | Number Loss : 0.0715
Improved Loss to 0.0715 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.86it/s]


Finished epoch: 19 | Number Loss : 0.0717
No Loss Improvement


100%|██████████| 938/938 [01:19<00:00, 11.83it/s]


Finished epoch: 20 | Number Loss : 0.0645
Improved Loss to 0.0645 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.82it/s]


Finished epoch: 21 | Number Loss : 0.0600
Improved Loss to 0.0600 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.89it/s]


Finished epoch: 22 | Number Loss : 0.0556
Improved Loss to 0.0556 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.85it/s]


Finished epoch: 23 | Number Loss : 0.0577
No Loss Improvement


100%|██████████| 938/938 [01:19<00:00, 11.84it/s]


Finished epoch: 24 | Number Loss : 0.0507
Improved Loss to 0.0507 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.87it/s]


Finished epoch: 25 | Number Loss : 0.0484
Improved Loss to 0.0484 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.80it/s]


Finished epoch: 26 | Number Loss : 0.0467
Improved Loss to 0.0467 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.92it/s]


Finished epoch: 27 | Number Loss : 0.0451
Improved Loss to 0.0451 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.92it/s]


Finished epoch: 28 | Number Loss : 0.0455
No Loss Improvement


100%|██████████| 938/938 [01:19<00:00, 11.84it/s]


Finished epoch: 29 | Number Loss : 0.0397
Improved Loss to 0.0397 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.88it/s]


Finished epoch: 30 | Number Loss : 0.0395
Improved Loss to 0.0395 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.91it/s]


Finished epoch: 31 | Number Loss : 0.0395
Improved Loss to 0.0395 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.91it/s]


Finished epoch: 32 | Number Loss : 0.0353
Improved Loss to 0.0353 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.86it/s]


Finished epoch: 33 | Number Loss : 0.0361
No Loss Improvement


100%|██████████| 938/938 [01:18<00:00, 11.90it/s]


Finished epoch: 34 | Number Loss : 0.0352
Improved Loss to 0.0352 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.85it/s]


Finished epoch: 35 | Number Loss : 0.0338
Improved Loss to 0.0338 .... Saving Model


100%|██████████| 938/938 [01:18<00:00, 11.93it/s]


Finished epoch: 36 | Number Loss : 0.0341
No Loss Improvement


100%|██████████| 938/938 [01:19<00:00, 11.84it/s]


Finished epoch: 37 | Number Loss : 0.0285
Improved Loss to 0.0285 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.79it/s]


Finished epoch: 38 | Number Loss : 0.0307
No Loss Improvement


100%|██████████| 938/938 [01:19<00:00, 11.79it/s]


Finished epoch: 39 | Number Loss : 0.0318
No Loss Improvement


100%|██████████| 938/938 [01:19<00:00, 11.86it/s]


Finished epoch: 40 | Number Loss : 0.0249
Improved Loss to 0.0249 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.84it/s]


Finished epoch: 41 | Number Loss : 0.0297
No Loss Improvement


100%|██████████| 938/938 [01:19<00:00, 11.86it/s]


Finished epoch: 42 | Number Loss : 0.0271
No Loss Improvement


100%|██████████| 938/938 [01:18<00:00, 11.88it/s]


Finished epoch: 43 | Number Loss : 0.0245
Improved Loss to 0.0245 .... Saving Model


100%|██████████| 938/938 [01:19<00:00, 11.87it/s]


Finished epoch: 44 | Number Loss : 0.0268
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.03it/s]


Finished epoch: 45 | Number Loss : 0.0226
Improved Loss to 0.0226 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.22it/s]


Finished epoch: 46 | Number Loss : 0.0239
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.11it/s]


Finished epoch: 47 | Number Loss : 0.0222
Improved Loss to 0.0222 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.23it/s]


Finished epoch: 48 | Number Loss : 0.0219
Improved Loss to 0.0219 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.26it/s]


Finished epoch: 49 | Number Loss : 0.0237
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.17it/s]


Finished epoch: 50 | Number Loss : 0.0209
Improved Loss to 0.0209 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.24it/s]


Finished epoch: 51 | Number Loss : 0.0189
Improved Loss to 0.0189 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.25it/s]


Finished epoch: 52 | Number Loss : 0.0239
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.24it/s]


Finished epoch: 53 | Number Loss : 0.0215
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.22it/s]


Finished epoch: 54 | Number Loss : 0.0189
Improved Loss to 0.0189 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.26it/s]


Finished epoch: 55 | Number Loss : 0.0189
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.29it/s]


Finished epoch: 56 | Number Loss : 0.0182
Improved Loss to 0.0182 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.20it/s]


Finished epoch: 57 | Number Loss : 0.0190
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 58 | Number Loss : 0.0200
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.18it/s]


Finished epoch: 59 | Number Loss : 0.0162
Improved Loss to 0.0162 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.25it/s]


Finished epoch: 60 | Number Loss : 0.0168
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.19it/s]


Finished epoch: 61 | Number Loss : 0.0150
Improved Loss to 0.0150 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 62 | Number Loss : 0.0177
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.19it/s]


Finished epoch: 63 | Number Loss : 0.0173
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.22it/s]


Finished epoch: 64 | Number Loss : 0.0163
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.22it/s]


Finished epoch: 65 | Number Loss : 0.0081
Improved Loss to 0.0081 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.23it/s]


Finished epoch: 66 | Number Loss : 0.0046
Improved Loss to 0.0046 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.22it/s]


Finished epoch: 67 | Number Loss : 0.0046
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.19it/s]


Finished epoch: 68 | Number Loss : 0.0065
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.17it/s]


Finished epoch: 69 | Number Loss : 0.0050
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.22it/s]


Finished epoch: 70 | Number Loss : 0.0026
Improved Loss to 0.0026 .... Saving Model


100%|██████████| 938/938 [01:17<00:00, 12.18it/s]


Finished epoch: 71 | Number Loss : 0.0020
Improved Loss to 0.0020 .... Saving Model


100%|██████████| 938/938 [01:17<00:00, 12.17it/s]


Finished epoch: 72 | Number Loss : 0.0016
Improved Loss to 0.0016 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.20it/s]


Finished epoch: 73 | Number Loss : 0.0017
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.16it/s]


Finished epoch: 74 | Number Loss : 0.0021
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 75 | Number Loss : 0.0014
Improved Loss to 0.0014 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.20it/s]


Finished epoch: 76 | Number Loss : 0.0013
Improved Loss to 0.0013 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.18it/s]


Finished epoch: 77 | Number Loss : 0.0023
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.24it/s]


Finished epoch: 78 | Number Loss : 0.0014
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.25it/s]


Finished epoch: 79 | Number Loss : 0.0019
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.22it/s]


Finished epoch: 80 | Number Loss : 0.0010
Improved Loss to 0.0010 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 81 | Number Loss : 0.0007
Improved Loss to 0.0007 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.19it/s]


Finished epoch: 82 | Number Loss : 0.0006
Improved Loss to 0.0006 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 83 | Number Loss : 0.0005
Improved Loss to 0.0005 .... Saving Model


100%|██████████| 938/938 [01:17<00:00, 12.18it/s]


Finished epoch: 84 | Number Loss : 0.0006
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.16it/s]


Finished epoch: 85 | Number Loss : 0.0005
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.20it/s]


Finished epoch: 86 | Number Loss : 0.0005
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 87 | Number Loss : 0.0004
Improved Loss to 0.0004 .... Saving Model


100%|██████████| 938/938 [01:17<00:00, 12.18it/s]


Finished epoch: 88 | Number Loss : 0.0003
Improved Loss to 0.0003 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 89 | Number Loss : 0.0002
Improved Loss to 0.0002 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.23it/s]


Finished epoch: 90 | Number Loss : 0.0003
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.20it/s]


Finished epoch: 91 | Number Loss : 0.0004
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.18it/s]


Finished epoch: 92 | Number Loss : 0.0004
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.17it/s]


Finished epoch: 93 | Number Loss : 0.0004
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.21it/s]


Finished epoch: 94 | Number Loss : 0.0004
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.18it/s]


Finished epoch: 95 | Number Loss : 0.0002
Improved Loss to 0.0002 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.19it/s]


Finished epoch: 96 | Number Loss : 0.0002
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.23it/s]


Finished epoch: 97 | Number Loss : 0.0002
No Loss Improvement


100%|██████████| 938/938 [01:17<00:00, 12.16it/s]


Finished epoch: 98 | Number Loss : 0.0002
No Loss Improvement


100%|██████████| 938/938 [01:16<00:00, 12.23it/s]


Finished epoch: 99 | Number Loss : 0.0002
Improved Loss to 0.0002 .... Saving Model


100%|██████████| 938/938 [01:16<00:00, 12.20it/s]

Finished epoch: 100 | Number Loss : 0.0002
No Loss Improvement



