In [None]:
!pip install tiktoken

Collecting tiktoken
  Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m25.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: tiktoken
Successfully installed tiktoken-0.8.0


In [None]:
import os
import time
import math
import torch
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import tiktoken  # Added tiktoken import
from dataclasses import dataclass
from torch.cuda.amp import autocast, GradScaler  # For mixed precision

# Model Definition and Configurations (Replace with your GPT model code)
class CausalSelfAttention(nn.Module):
    def __init__(self, config):
        super().__init__()
        assert config.n_embd % config.n_head == 0
        self.n_head = config.n_head
        self.n_embd = config.n_embd
        self.c_attn = nn.Linear(config.n_embd, 3 * config.n_embd)
        self.c_proj = nn.Linear(config.n_embd, config.n_embd)
        self.register_buffer("bias", torch.tril(torch.ones(config.block_size, config.block_size))
                             .view(1, 1, config.block_size, config.block_size))

    def forward(self, x):
        B, T, C = x.size()
        qkv = self.c_attn(x)
        q, k, v = qkv.split(self.n_embd, dim=2)
        k = k.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        q = q.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        v = v.view(B, T, self.n_head, C // self.n_head).transpose(1, 2)
        att = (q @ k.transpose(-2, -1)) * (1.0 / math.sqrt(k.size(-1)))
        att = att.masked_fill(self.bias[:, :, :T, :T] == 0, float('-inf'))
        att = F.softmax(att, dim=-1)
        y = att @ v
        y = y.transpose(1, 2).contiguous().view(B, T, C)
        return self.c_proj(y)

# Define MLP
class MLP(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.c_fc = nn.Linear(config.n_embd, 4 * config.n_embd)
        self.gelu = nn.GELU()
        self.c_proj = nn.Linear(4 * config.n_embd, config.n_embd)

    def forward(self, x):
        return self.c_proj(self.gelu(self.c_fc(x)))

# Define Block
class Block(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.ln_1 = nn.LayerNorm(config.n_embd)
        self.attn = CausalSelfAttention(config)
        self.ln_2 = nn.LayerNorm(config.n_embd)
        self.mlp = MLP(config)

    def forward(self, x):
        x = x + self.attn(self.ln_1(x))
        x = x + self.mlp(self.ln_2(x))
        return x

# Define GPTConfig with the new parameters
@dataclass
class GPTConfig:
    block_size: int = 1024
    vocab_size: int = 50257
    n_layer: int = 12    # Set to 12 layers
    n_head: int = 16     # Set to 16 attention heads
    n_embd: int = 1024   # Set embedding size to 1024      # Adjusted embedding dimension

# Define GPT
class GPT(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.config = config
        self.transformer = nn.ModuleDict(dict(
            wte=nn.Embedding(config.vocab_size, config.n_embd),
            wpe=nn.Embedding(config.block_size, config.n_embd),
            h=nn.ModuleList([Block(config) for _ in range(config.n_layer)]),
            ln_f=nn.LayerNorm(config.n_embd),
        ))
        self.lm_head = nn.Linear(config.n_embd, config.vocab_size, bias=False)
        self.transformer.wte.weight = self.lm_head.weight
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)
            if module.bias is not None:
                torch.nn.init.zeros_(module.bias)
        elif isinstance(module, nn.Embedding):
            torch.nn.init.normal_(module.weight, mean=0.0, std=0.02)

    def forward(self, idx, targets=None):
        B, T = idx.size()
        assert T <= self.config.block_size
        pos = torch.arange(0, T, dtype=torch.long, device=idx.device)
        x = self.transformer.wte(idx) + self.transformer.wpe(pos)
        for block in self.transformer.h:
            x = block(x)
        x = self.transformer.ln_f(x)
        logits = self.lm_head(x)
        loss = F.cross_entropy(logits.view(-1, logits.size(-1)), targets.view(-1)) if targets is not None else None
        return logits, loss

# DataLoader with tiktoken
class DataLoaderLite:
    def __init__(self, B, T, data_path, tokenizer_name="gpt2"):
        self.B = B
        self.T = T
        self.tokenizer = tiktoken.get_encoding(tokenizer_name)
        with open(data_path, 'r', encoding='utf-8') as f:
            text = f.read()
        self.tokens = torch.tensor(self.tokenizer.encode(text), dtype=torch.long)
        self.current_position = 0

    def next_batch(self):
      B, T = self.B, self.T
      end = self.current_position + B * T + 1
      if end > len(self.tokens):
          self.current_position = 0  # Reset to the beginning if we exceed data length
          end = self.current_position + B * T + 1

      buf = self.tokens[self.current_position:end]
      self.current_position += B * T

      # Ensure the buffer length matches the batch size requirements
      if len(buf) < B * T + 1:  # If insufficient data, pad with zeros
          pad_size = (B * T + 1) - len(buf)
          buf = torch.cat([buf, torch.zeros(pad_size, dtype=buf.dtype)], dim=0)

      x = buf[:-1].view(B, T)
      y = buf[1:].view(B, T)
      return x, y


# Initialize Model and Optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = GPT(GPTConfig())  # Replace with your GPT model initialization
model.to(device)
optimizer = torch.optim.AdamW(model.parameters(), lr=3e-4)

# Mixed Precision setup
scaler = GradScaler()

# Number of Parameters
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print(f"Number of trainable parameters: {num_params:,}")

# Checkpoint and Logs
os.makedirs("checkpoints", exist_ok=True)
os.makedirs("logs", exist_ok=True)
log_file = "logs/training_log.txt"

# DataLoaderLite (Replace with your data loader)
train_loader = DataLoaderLite(B=8, T=256, data_path="/content/input.txt")  # Adjust batch size and sequence length

# Training Loop
max_epochs = 200
extra_epochs = 5
loss_threshold = 0.099999
training_complete = False
loss_history = []
with open(log_file, "w") as log:
    for epoch in range(max_epochs):
        epoch_loss = 0.0
        epoch_accuracy = 0.0  # Initialize total accuracy for the epoch
        batches = tqdm(range(len(train_loader.tokens) // (train_loader.B * train_loader.T)))

        for _ in batches:
            x, y = train_loader.next_batch()
            x, y = x.to(device), y.to(device)
            optimizer.zero_grad()

            # Mixed Precision training
            with autocast():
                logits, loss = model(x, y)

            # Calculate accuracy for the batch
            _, predictions = logits.max(dim=-1)  # Get the class with the highest probability
            correct = (predictions == y).float()  # Compare predicted and true classes
            accuracy = correct.sum() / correct.numel()  # Compute the accuracy for the batch

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            epoch_loss += loss.item()
            epoch_accuracy += accuracy.item()  # Accumulate accuracy for the epoch

            batches.set_description(f"Epoch {epoch+1}/{max_epochs} - Loss: {loss.item():.6f} - Accuracy: {accuracy.item()*100:.2f}%")

        # Average Loss and Accuracy for the Epoch
        avg_loss = epoch_loss / len(batches)
        avg_accuracy = epoch_accuracy / len(batches)  # Compute average accuracy for the epoch

        # Log the epoch's loss and accuracy
        log.write(f"Epoch {epoch+1}: Loss = {avg_loss:.6f}, Accuracy = {avg_accuracy*100:.2f}%\n")
        print(f"Epoch {epoch+1} Complete: Average Loss = {avg_loss:.6f}, Average Accuracy = {avg_accuracy*100:.2f}%")

        # Save Model for Even Epochs
        if (epoch + 1) % 2 == 0:
            torch.save(model.state_dict(), f"checkpoints/model_epoch_{epoch+1}.pt")
            print(f"Model saved after Epoch {epoch+1}")

        # Check if Threshold is Met
        if avg_loss < loss_threshold and not training_complete:
            print(f"Loss below threshold ({loss_threshold}). Continuing for {extra_epochs} extra epochs.")
            max_epochs += extra_epochs
            training_complete = True


  scaler = GradScaler()


Number of trainable parameters: 203,668,480


  with autocast():
Epoch 1/200 - Loss: 6.052705 - Accuracy: 17.92%: 100%|██████████| 165/165 [00:47<00:00,  3.48it/s]


Epoch 1 Complete: Average Loss = 6.655091, Average Accuracy = 13.06%


Epoch 2/200 - Loss: 5.513403 - Accuracy: 20.85%: 100%|██████████| 165/165 [00:47<00:00,  3.44it/s]


Epoch 2 Complete: Average Loss = 5.724089, Average Accuracy = 17.27%
Model saved after Epoch 2


Epoch 3/200 - Loss: 5.176640 - Accuracy: 21.44%: 100%|██████████| 165/165 [00:49<00:00,  3.35it/s]


Epoch 3 Complete: Average Loss = 5.356712, Average Accuracy = 18.86%


Epoch 4/200 - Loss: 4.908785 - Accuracy: 22.12%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 4 Complete: Average Loss = 5.059195, Average Accuracy = 20.33%
Model saved after Epoch 4


Epoch 5/200 - Loss: 4.722197 - Accuracy: 24.51%: 100%|██████████| 165/165 [00:49<00:00,  3.35it/s]


Epoch 5 Complete: Average Loss = 4.833605, Average Accuracy = 21.85%


Epoch 6/200 - Loss: 4.543072 - Accuracy: 25.83%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 6 Complete: Average Loss = 4.680102, Average Accuracy = 23.22%
Model saved after Epoch 6


Epoch 7/200 - Loss: 4.387027 - Accuracy: 26.95%: 100%|██████████| 165/165 [00:49<00:00,  3.36it/s]


Epoch 7 Complete: Average Loss = 4.543137, Average Accuracy = 24.46%


Epoch 8/200 - Loss: 4.246301 - Accuracy: 28.61%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 8 Complete: Average Loss = 4.430372, Average Accuracy = 25.57%
Model saved after Epoch 8


Epoch 9/200 - Loss: 4.108156 - Accuracy: 29.93%: 100%|██████████| 165/165 [00:49<00:00,  3.36it/s]


Epoch 9 Complete: Average Loss = 4.317351, Average Accuracy = 26.64%


Epoch 10/200 - Loss: 3.983847 - Accuracy: 30.96%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 10 Complete: Average Loss = 4.194217, Average Accuracy = 27.68%
Model saved after Epoch 10


Epoch 11/200 - Loss: 3.834943 - Accuracy: 33.25%: 100%|██████████| 165/165 [00:49<00:00,  3.36it/s]


Epoch 11 Complete: Average Loss = 4.076782, Average Accuracy = 28.80%


Epoch 12/200 - Loss: 3.784020 - Accuracy: 32.76%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 12 Complete: Average Loss = 3.975849, Average Accuracy = 29.86%
Model saved after Epoch 12


Epoch 13/200 - Loss: 3.648687 - Accuracy: 34.86%: 100%|██████████| 165/165 [00:49<00:00,  3.36it/s]


Epoch 13 Complete: Average Loss = 3.891437, Average Accuracy = 30.66%


Epoch 14/200 - Loss: 3.559650 - Accuracy: 35.45%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 14 Complete: Average Loss = 3.806384, Average Accuracy = 31.48%
Model saved after Epoch 14


Epoch 15/200 - Loss: 3.481542 - Accuracy: 36.57%: 100%|██████████| 165/165 [00:48<00:00,  3.37it/s]


Epoch 15 Complete: Average Loss = 3.719236, Average Accuracy = 32.43%


Epoch 16/200 - Loss: 3.368174 - Accuracy: 37.55%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 16 Complete: Average Loss = 3.638821, Average Accuracy = 33.15%
Model saved after Epoch 16


Epoch 17/200 - Loss: 3.285975 - Accuracy: 37.94%: 100%|██████████| 165/165 [00:47<00:00,  3.45it/s]


Epoch 17 Complete: Average Loss = 3.547185, Average Accuracy = 33.97%


Epoch 18/200 - Loss: 3.252765 - Accuracy: 38.82%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 18 Complete: Average Loss = 3.474102, Average Accuracy = 34.66%
Model saved after Epoch 18


Epoch 19/200 - Loss: 3.196387 - Accuracy: 38.77%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 19 Complete: Average Loss = 3.408320, Average Accuracy = 35.24%


Epoch 20/200 - Loss: 3.144306 - Accuracy: 39.21%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 20 Complete: Average Loss = 3.355458, Average Accuracy = 35.86%
Model saved after Epoch 20


Epoch 21/200 - Loss: 3.103904 - Accuracy: 39.21%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 21 Complete: Average Loss = 3.314957, Average Accuracy = 36.12%


Epoch 22/200 - Loss: 3.038669 - Accuracy: 40.38%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 22 Complete: Average Loss = 3.256821, Average Accuracy = 36.59%
Model saved after Epoch 22


Epoch 23/200 - Loss: 3.023093 - Accuracy: 39.99%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 23 Complete: Average Loss = 3.226529, Average Accuracy = 36.84%


Epoch 24/200 - Loss: 2.996262 - Accuracy: 39.89%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 24 Complete: Average Loss = 3.264684, Average Accuracy = 36.16%
Model saved after Epoch 24


Epoch 25/200 - Loss: 3.026088 - Accuracy: 40.14%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 25 Complete: Average Loss = 3.285902, Average Accuracy = 35.74%


Epoch 26/200 - Loss: 3.084871 - Accuracy: 39.01%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 26 Complete: Average Loss = 3.319550, Average Accuracy = 35.18%
Model saved after Epoch 26


Epoch 27/200 - Loss: 3.080973 - Accuracy: 38.77%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 27 Complete: Average Loss = 3.377278, Average Accuracy = 34.34%


Epoch 28/200 - Loss: 3.012961 - Accuracy: 40.62%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 28 Complete: Average Loss = 3.319641, Average Accuracy = 34.88%
Model saved after Epoch 28


Epoch 29/200 - Loss: 2.984856 - Accuracy: 40.48%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 29 Complete: Average Loss = 3.241012, Average Accuracy = 35.77%


Epoch 30/200 - Loss: 2.951032 - Accuracy: 40.67%: 100%|██████████| 165/165 [00:48<00:00,  3.38it/s]


Epoch 30 Complete: Average Loss = 3.231607, Average Accuracy = 35.78%
Model saved after Epoch 30


Epoch 31/200 - Loss: 2.937482 - Accuracy: 39.84%: 100%|██████████| 165/165 [00:48<00:00,  3.38it/s]


Epoch 31 Complete: Average Loss = 3.215522, Average Accuracy = 35.75%


Epoch 32/200 - Loss: 2.987158 - Accuracy: 39.79%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 32 Complete: Average Loss = 3.227467, Average Accuracy = 35.47%
Model saved after Epoch 32


Epoch 33/200 - Loss: 2.943540 - Accuracy: 40.58%: 100%|██████████| 165/165 [00:48<00:00,  3.38it/s]


Epoch 33 Complete: Average Loss = 3.200461, Average Accuracy = 35.78%


Epoch 34/200 - Loss: 2.987813 - Accuracy: 38.77%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 34 Complete: Average Loss = 3.199029, Average Accuracy = 35.54%
Model saved after Epoch 34


Epoch 35/200 - Loss: 2.973783 - Accuracy: 39.16%: 100%|██████████| 165/165 [00:48<00:00,  3.38it/s]


Epoch 35 Complete: Average Loss = 3.214527, Average Accuracy = 35.30%


Epoch 36/200 - Loss: 2.938540 - Accuracy: 40.67%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 36 Complete: Average Loss = 3.223604, Average Accuracy = 35.01%
Model saved after Epoch 36


Epoch 37/200 - Loss: 2.892686 - Accuracy: 39.45%: 100%|██████████| 165/165 [00:48<00:00,  3.38it/s]


Epoch 37 Complete: Average Loss = 3.172881, Average Accuracy = 35.52%


Epoch 38/200 - Loss: 2.850626 - Accuracy: 41.16%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 38 Complete: Average Loss = 3.091690, Average Accuracy = 36.43%
Model saved after Epoch 38


Epoch 39/200 - Loss: 2.779316 - Accuracy: 41.31%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 39 Complete: Average Loss = 3.067785, Average Accuracy = 36.56%


Epoch 40/200 - Loss: 2.762010 - Accuracy: 41.31%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 40 Complete: Average Loss = 3.021264, Average Accuracy = 37.09%
Model saved after Epoch 40


Epoch 41/200 - Loss: 2.741572 - Accuracy: 41.80%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 41 Complete: Average Loss = 3.000669, Average Accuracy = 37.28%


Epoch 42/200 - Loss: 2.696124 - Accuracy: 42.43%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 42 Complete: Average Loss = 2.944925, Average Accuracy = 37.85%
Model saved after Epoch 42


Epoch 43/200 - Loss: 2.610130 - Accuracy: 42.82%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 43 Complete: Average Loss = 2.890767, Average Accuracy = 38.43%


Epoch 44/200 - Loss: 2.501151 - Accuracy: 44.97%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 44 Complete: Average Loss = 2.806621, Average Accuracy = 39.50%
Model saved after Epoch 44


Epoch 45/200 - Loss: 2.406761 - Accuracy: 46.68%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 45 Complete: Average Loss = 2.710048, Average Accuracy = 40.88%


Epoch 46/200 - Loss: 2.308814 - Accuracy: 47.80%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 46 Complete: Average Loss = 2.616700, Average Accuracy = 42.06%
Model saved after Epoch 46


Epoch 47/200 - Loss: 2.291755 - Accuracy: 48.24%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 47 Complete: Average Loss = 2.512887, Average Accuracy = 43.48%


Epoch 48/200 - Loss: 2.282526 - Accuracy: 46.48%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 48 Complete: Average Loss = 2.499916, Average Accuracy = 43.66%
Model saved after Epoch 48


Epoch 49/200 - Loss: 2.278864 - Accuracy: 47.51%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 49 Complete: Average Loss = 2.464531, Average Accuracy = 44.01%


Epoch 50/200 - Loss: 2.133234 - Accuracy: 50.00%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 50 Complete: Average Loss = 2.359999, Average Accuracy = 45.58%
Model saved after Epoch 50


Epoch 51/200 - Loss: 1.979641 - Accuracy: 52.88%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 51 Complete: Average Loss = 2.245202, Average Accuracy = 47.47%


Epoch 52/200 - Loss: 1.893902 - Accuracy: 53.81%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 52 Complete: Average Loss = 2.126785, Average Accuracy = 49.51%
Model saved after Epoch 52


Epoch 53/200 - Loss: 1.742552 - Accuracy: 57.81%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 53 Complete: Average Loss = 2.001453, Average Accuracy = 51.82%


Epoch 54/200 - Loss: 1.715018 - Accuracy: 56.49%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 54 Complete: Average Loss = 1.900424, Average Accuracy = 53.55%
Model saved after Epoch 54


Epoch 55/200 - Loss: 1.605411 - Accuracy: 59.67%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 55 Complete: Average Loss = 1.780649, Average Accuracy = 55.78%


Epoch 56/200 - Loss: 1.443317 - Accuracy: 62.60%: 100%|██████████| 165/165 [00:48<00:00,  3.44it/s]


Epoch 56 Complete: Average Loss = 1.678253, Average Accuracy = 57.75%
Model saved after Epoch 56


Epoch 57/200 - Loss: 1.336854 - Accuracy: 65.48%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 57 Complete: Average Loss = 1.541705, Average Accuracy = 60.63%


Epoch 58/200 - Loss: 1.239382 - Accuracy: 65.38%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 58 Complete: Average Loss = 1.424818, Average Accuracy = 63.14%
Model saved after Epoch 58


Epoch 59/200 - Loss: 1.230950 - Accuracy: 67.24%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 59 Complete: Average Loss = 1.293512, Average Accuracy = 66.10%


Epoch 60/200 - Loss: 1.169675 - Accuracy: 68.12%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 60 Complete: Average Loss = 1.189191, Average Accuracy = 68.61%
Model saved after Epoch 60


Epoch 61/200 - Loss: 1.044148 - Accuracy: 71.19%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 61 Complete: Average Loss = 1.086724, Average Accuracy = 70.96%


Epoch 62/200 - Loss: 1.021625 - Accuracy: 75.00%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 62 Complete: Average Loss = 0.988162, Average Accuracy = 73.46%
Model saved after Epoch 62


Epoch 63/200 - Loss: 0.882030 - Accuracy: 75.49%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 63 Complete: Average Loss = 0.936014, Average Accuracy = 74.60%


Epoch 64/200 - Loss: 0.886215 - Accuracy: 74.85%: 100%|██████████| 165/165 [00:47<00:00,  3.44it/s]


Epoch 64 Complete: Average Loss = 0.871416, Average Accuracy = 76.16%
Model saved after Epoch 64


Epoch 65/200 - Loss: 0.720709 - Accuracy: 81.20%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 65 Complete: Average Loss = 0.788283, Average Accuracy = 78.35%


Epoch 66/200 - Loss: 0.663786 - Accuracy: 83.40%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 66 Complete: Average Loss = 0.690410, Average Accuracy = 81.08%
Model saved after Epoch 66


Epoch 67/200 - Loss: 0.673675 - Accuracy: 82.52%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 67 Complete: Average Loss = 0.626140, Average Accuracy = 82.86%


Epoch 68/200 - Loss: 0.575431 - Accuracy: 85.60%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 68 Complete: Average Loss = 0.564656, Average Accuracy = 84.62%
Model saved after Epoch 68


Epoch 69/200 - Loss: 0.569431 - Accuracy: 84.62%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 69 Complete: Average Loss = 0.512422, Average Accuracy = 86.09%


Epoch 70/200 - Loss: 0.548066 - Accuracy: 84.96%: 100%|██████████| 165/165 [00:47<00:00,  3.44it/s]


Epoch 70 Complete: Average Loss = 0.466987, Average Accuracy = 87.36%
Model saved after Epoch 70


Epoch 71/200 - Loss: 0.476850 - Accuracy: 88.13%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 71 Complete: Average Loss = 0.420528, Average Accuracy = 88.70%


Epoch 72/200 - Loss: 0.416150 - Accuracy: 88.53%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 72 Complete: Average Loss = 0.375737, Average Accuracy = 89.93%
Model saved after Epoch 72


Epoch 73/200 - Loss: 0.421405 - Accuracy: 89.26%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 73 Complete: Average Loss = 0.340205, Average Accuracy = 90.93%


Epoch 74/200 - Loss: 0.389859 - Accuracy: 91.02%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 74 Complete: Average Loss = 0.327094, Average Accuracy = 91.37%
Model saved after Epoch 74


Epoch 75/200 - Loss: 0.368750 - Accuracy: 90.33%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 75 Complete: Average Loss = 0.301933, Average Accuracy = 92.03%


Epoch 76/200 - Loss: 0.351848 - Accuracy: 91.80%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 76 Complete: Average Loss = 0.286069, Average Accuracy = 92.42%
Model saved after Epoch 76


Epoch 77/200 - Loss: 0.355329 - Accuracy: 90.43%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 77 Complete: Average Loss = 0.277790, Average Accuracy = 92.60%


Epoch 78/200 - Loss: 0.307954 - Accuracy: 91.70%: 100%|██████████| 165/165 [00:48<00:00,  3.44it/s]


Epoch 78 Complete: Average Loss = 0.269991, Average Accuracy = 92.80%
Model saved after Epoch 78


Epoch 79/200 - Loss: 0.307057 - Accuracy: 92.43%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 79 Complete: Average Loss = 0.257221, Average Accuracy = 93.17%


Epoch 80/200 - Loss: 0.333657 - Accuracy: 91.89%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 80 Complete: Average Loss = 0.242368, Average Accuracy = 93.55%
Model saved after Epoch 80


Epoch 81/200 - Loss: 0.326756 - Accuracy: 91.85%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 81 Complete: Average Loss = 0.231491, Average Accuracy = 93.85%


Epoch 82/200 - Loss: 0.239758 - Accuracy: 93.55%: 100%|██████████| 165/165 [00:47<00:00,  3.45it/s]


Epoch 82 Complete: Average Loss = 0.224045, Average Accuracy = 94.05%
Model saved after Epoch 82


Epoch 83/200 - Loss: 0.248347 - Accuracy: 93.85%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 83 Complete: Average Loss = 0.200663, Average Accuracy = 94.80%


Epoch 84/200 - Loss: 0.185470 - Accuracy: 95.17%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 84 Complete: Average Loss = 0.182701, Average Accuracy = 95.26%
Model saved after Epoch 84


Epoch 85/200 - Loss: 0.221148 - Accuracy: 94.87%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 85 Complete: Average Loss = 0.170063, Average Accuracy = 95.67%


Epoch 86/200 - Loss: 0.192595 - Accuracy: 94.82%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 86 Complete: Average Loss = 0.161994, Average Accuracy = 95.86%
Model saved after Epoch 86


Epoch 87/200 - Loss: 0.190807 - Accuracy: 95.31%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 87 Complete: Average Loss = 0.162081, Average Accuracy = 95.93%


Epoch 88/200 - Loss: 0.151298 - Accuracy: 95.85%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 88 Complete: Average Loss = 0.153563, Average Accuracy = 96.09%
Model saved after Epoch 88


Epoch 89/200 - Loss: 0.150860 - Accuracy: 96.34%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 89 Complete: Average Loss = 0.152144, Average Accuracy = 96.16%


Epoch 90/200 - Loss: 0.175152 - Accuracy: 94.78%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 90 Complete: Average Loss = 0.160296, Average Accuracy = 95.83%
Model saved after Epoch 90


Epoch 91/200 - Loss: 0.180721 - Accuracy: 94.92%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 91 Complete: Average Loss = 0.165337, Average Accuracy = 95.64%


Epoch 92/200 - Loss: 0.204789 - Accuracy: 94.38%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 92 Complete: Average Loss = 0.164360, Average Accuracy = 95.62%
Model saved after Epoch 92


Epoch 93/200 - Loss: 0.218509 - Accuracy: 94.68%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 93 Complete: Average Loss = 0.167378, Average Accuracy = 95.54%


Epoch 94/200 - Loss: 0.179745 - Accuracy: 94.68%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 94 Complete: Average Loss = 0.173651, Average Accuracy = 95.35%
Model saved after Epoch 94


Epoch 95/200 - Loss: 0.198001 - Accuracy: 95.51%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 95 Complete: Average Loss = 0.167996, Average Accuracy = 95.48%


Epoch 96/200 - Loss: 0.173545 - Accuracy: 95.46%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 96 Complete: Average Loss = 0.160090, Average Accuracy = 95.74%
Model saved after Epoch 96


Epoch 97/200 - Loss: 0.176610 - Accuracy: 95.26%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 97 Complete: Average Loss = 0.148600, Average Accuracy = 96.06%


Epoch 98/200 - Loss: 0.169134 - Accuracy: 95.21%: 100%|██████████| 165/165 [00:48<00:00,  3.44it/s]


Epoch 98 Complete: Average Loss = 0.140171, Average Accuracy = 96.29%
Model saved after Epoch 98


Epoch 99/200 - Loss: 0.170999 - Accuracy: 95.41%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 99 Complete: Average Loss = 0.135144, Average Accuracy = 96.45%


Epoch 100/200 - Loss: 0.119899 - Accuracy: 96.73%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 100 Complete: Average Loss = 0.127409, Average Accuracy = 96.69%
Model saved after Epoch 100


Epoch 101/200 - Loss: 0.143427 - Accuracy: 96.68%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 101 Complete: Average Loss = 0.118770, Average Accuracy = 96.94%


Epoch 102/200 - Loss: 0.116070 - Accuracy: 97.12%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 102 Complete: Average Loss = 0.111579, Average Accuracy = 97.13%
Model saved after Epoch 102


Epoch 103/200 - Loss: 0.121212 - Accuracy: 97.12%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 103 Complete: Average Loss = 0.102957, Average Accuracy = 97.40%


Epoch 104/200 - Loss: 0.081952 - Accuracy: 98.00%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 104 Complete: Average Loss = 0.096855, Average Accuracy = 97.59%
Model saved after Epoch 104
Loss below threshold (0.099999). Continuing for 5 extra epochs.


Epoch 105/205 - Loss: 0.104370 - Accuracy: 97.22%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 105 Complete: Average Loss = 0.098965, Average Accuracy = 97.54%


Epoch 106/205 - Loss: 0.103861 - Accuracy: 97.17%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 106 Complete: Average Loss = 0.101708, Average Accuracy = 97.45%
Model saved after Epoch 106


Epoch 107/205 - Loss: 0.112440 - Accuracy: 96.97%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 107 Complete: Average Loss = 0.105988, Average Accuracy = 97.27%


Epoch 108/205 - Loss: 0.144490 - Accuracy: 95.85%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 108 Complete: Average Loss = 0.119705, Average Accuracy = 96.85%
Model saved after Epoch 108


Epoch 109/205 - Loss: 0.165181 - Accuracy: 95.95%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 109 Complete: Average Loss = 0.134194, Average Accuracy = 96.39%


Epoch 110/205 - Loss: 0.182647 - Accuracy: 94.68%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 110 Complete: Average Loss = 0.140003, Average Accuracy = 96.17%
Model saved after Epoch 110


Epoch 111/205 - Loss: 0.204577 - Accuracy: 94.58%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 111 Complete: Average Loss = 0.153432, Average Accuracy = 95.78%


Epoch 112/205 - Loss: 0.197709 - Accuracy: 94.63%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 112 Complete: Average Loss = 0.154955, Average Accuracy = 95.67%
Model saved after Epoch 112


Epoch 113/205 - Loss: 0.131224 - Accuracy: 96.00%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 113 Complete: Average Loss = 0.148663, Average Accuracy = 95.83%


Epoch 114/205 - Loss: 0.142194 - Accuracy: 96.24%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 114 Complete: Average Loss = 0.130769, Average Accuracy = 96.40%
Model saved after Epoch 114


Epoch 115/205 - Loss: 0.093765 - Accuracy: 97.75%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 115 Complete: Average Loss = 0.105192, Average Accuracy = 97.19%


Epoch 116/205 - Loss: 0.060470 - Accuracy: 98.58%: 100%|██████████| 165/165 [00:48<00:00,  3.44it/s]


Epoch 116 Complete: Average Loss = 0.085661, Average Accuracy = 97.81%
Model saved after Epoch 116


Epoch 117/205 - Loss: 0.058318 - Accuracy: 98.78%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 117 Complete: Average Loss = 0.071248, Average Accuracy = 98.22%


Epoch 118/205 - Loss: 0.062684 - Accuracy: 98.63%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 118 Complete: Average Loss = 0.062454, Average Accuracy = 98.53%
Model saved after Epoch 118


Epoch 119/205 - Loss: 0.054553 - Accuracy: 98.73%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 119 Complete: Average Loss = 0.055777, Average Accuracy = 98.72%


Epoch 120/205 - Loss: 0.052164 - Accuracy: 98.78%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 120 Complete: Average Loss = 0.056866, Average Accuracy = 98.73%
Model saved after Epoch 120


Epoch 121/205 - Loss: 0.073134 - Accuracy: 98.63%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 121 Complete: Average Loss = 0.057376, Average Accuracy = 98.70%


Epoch 122/205 - Loss: 0.059354 - Accuracy: 98.58%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 122 Complete: Average Loss = 0.063649, Average Accuracy = 98.55%
Model saved after Epoch 122


Epoch 123/205 - Loss: 0.067623 - Accuracy: 98.49%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 123 Complete: Average Loss = 0.073134, Average Accuracy = 98.24%


Epoch 124/205 - Loss: 0.103249 - Accuracy: 97.80%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 124 Complete: Average Loss = 0.087954, Average Accuracy = 97.79%
Model saved after Epoch 124


Epoch 125/205 - Loss: 0.148187 - Accuracy: 96.29%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 125 Complete: Average Loss = 0.109570, Average Accuracy = 97.13%


Epoch 126/205 - Loss: 0.200991 - Accuracy: 95.36%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 126 Complete: Average Loss = 0.134189, Average Accuracy = 96.33%
Model saved after Epoch 126


Epoch 127/205 - Loss: 0.144529 - Accuracy: 96.24%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 127 Complete: Average Loss = 0.138617, Average Accuracy = 96.18%


Epoch 128/205 - Loss: 0.143186 - Accuracy: 95.85%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 128 Complete: Average Loss = 0.129946, Average Accuracy = 96.38%
Model saved after Epoch 128


Epoch 129/205 - Loss: 0.122532 - Accuracy: 96.24%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 129 Complete: Average Loss = 0.111782, Average Accuracy = 96.93%


Epoch 130/205 - Loss: 0.112769 - Accuracy: 97.80%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 130 Complete: Average Loss = 0.089486, Average Accuracy = 97.62%
Model saved after Epoch 130


Epoch 131/205 - Loss: 0.083915 - Accuracy: 98.24%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 131 Complete: Average Loss = 0.072593, Average Accuracy = 98.13%


Epoch 132/205 - Loss: 0.072820 - Accuracy: 98.05%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 132 Complete: Average Loss = 0.059666, Average Accuracy = 98.57%
Model saved after Epoch 132


Epoch 133/205 - Loss: 0.065372 - Accuracy: 98.58%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 133 Complete: Average Loss = 0.049723, Average Accuracy = 98.85%


Epoch 134/205 - Loss: 0.075788 - Accuracy: 98.05%: 100%|██████████| 165/165 [00:48<00:00,  3.44it/s]


Epoch 134 Complete: Average Loss = 0.044214, Average Accuracy = 99.03%
Model saved after Epoch 134


Epoch 135/205 - Loss: 0.055768 - Accuracy: 99.02%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 135 Complete: Average Loss = 0.040421, Average Accuracy = 99.14%


Epoch 136/205 - Loss: 0.034768 - Accuracy: 99.12%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 136 Complete: Average Loss = 0.039427, Average Accuracy = 99.18%
Model saved after Epoch 136


Epoch 137/205 - Loss: 0.044633 - Accuracy: 99.17%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 137 Complete: Average Loss = 0.040396, Average Accuracy = 99.14%


Epoch 138/205 - Loss: 0.054099 - Accuracy: 99.02%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 138 Complete: Average Loss = 0.044641, Average Accuracy = 99.02%
Model saved after Epoch 138


Epoch 139/205 - Loss: 0.067854 - Accuracy: 98.10%: 100%|██████████| 165/165 [00:48<00:00,  3.44it/s]


Epoch 139 Complete: Average Loss = 0.056030, Average Accuracy = 98.69%


Epoch 140/205 - Loss: 0.080376 - Accuracy: 98.00%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 140 Complete: Average Loss = 0.081097, Average Accuracy = 98.03%
Model saved after Epoch 140


Epoch 141/205 - Loss: 0.144874 - Accuracy: 95.75%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 141 Complete: Average Loss = 0.124810, Average Accuracy = 96.64%


Epoch 142/205 - Loss: 0.191763 - Accuracy: 94.14%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 142 Complete: Average Loss = 0.171287, Average Accuracy = 95.16%
Model saved after Epoch 142


Epoch 143/205 - Loss: 0.135022 - Accuracy: 95.70%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 143 Complete: Average Loss = 0.159836, Average Accuracy = 95.45%


Epoch 144/205 - Loss: 0.112320 - Accuracy: 97.07%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 144 Complete: Average Loss = 0.117001, Average Accuracy = 96.71%
Model saved after Epoch 144


Epoch 145/205 - Loss: 0.062369 - Accuracy: 98.34%: 100%|██████████| 165/165 [00:47<00:00,  3.44it/s]


Epoch 145 Complete: Average Loss = 0.077994, Average Accuracy = 97.90%


Epoch 146/205 - Loss: 0.045685 - Accuracy: 98.68%: 100%|██████████| 165/165 [00:47<00:00,  3.44it/s]


Epoch 146 Complete: Average Loss = 0.054879, Average Accuracy = 98.65%
Model saved after Epoch 146


Epoch 147/205 - Loss: 0.039993 - Accuracy: 99.12%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 147 Complete: Average Loss = 0.039974, Average Accuracy = 99.06%


Epoch 148/205 - Loss: 0.038084 - Accuracy: 99.17%: 100%|██████████| 165/165 [00:48<00:00,  3.44it/s]


Epoch 148 Complete: Average Loss = 0.035017, Average Accuracy = 99.27%
Model saved after Epoch 148


Epoch 149/205 - Loss: 0.042224 - Accuracy: 99.17%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 149 Complete: Average Loss = 0.031962, Average Accuracy = 99.35%


Epoch 150/205 - Loss: 0.040059 - Accuracy: 99.22%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 150 Complete: Average Loss = 0.031122, Average Accuracy = 99.35%
Model saved after Epoch 150


Epoch 151/205 - Loss: 0.042576 - Accuracy: 99.32%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 151 Complete: Average Loss = 0.030755, Average Accuracy = 99.38%


Epoch 152/205 - Loss: 0.059887 - Accuracy: 99.12%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 152 Complete: Average Loss = 0.031763, Average Accuracy = 99.36%
Model saved after Epoch 152


Epoch 153/205 - Loss: 0.084847 - Accuracy: 99.12%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 153 Complete: Average Loss = 0.033243, Average Accuracy = 99.30%


Epoch 154/205 - Loss: 0.059653 - Accuracy: 98.97%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 154 Complete: Average Loss = 0.040400, Average Accuracy = 99.13%
Model saved after Epoch 154


Epoch 155/205 - Loss: 0.067981 - Accuracy: 98.68%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 155 Complete: Average Loss = 0.051971, Average Accuracy = 98.80%


Epoch 156/205 - Loss: 0.083084 - Accuracy: 97.41%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 156 Complete: Average Loss = 0.075767, Average Accuracy = 98.03%
Model saved after Epoch 156


Epoch 157/205 - Loss: 0.138732 - Accuracy: 95.90%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 157 Complete: Average Loss = 0.121449, Average Accuracy = 96.68%


Epoch 158/205 - Loss: 0.138407 - Accuracy: 95.41%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 158 Complete: Average Loss = 0.159268, Average Accuracy = 95.55%
Model saved after Epoch 158


Epoch 159/205 - Loss: 0.104946 - Accuracy: 96.73%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 159 Complete: Average Loss = 0.151035, Average Accuracy = 95.74%


Epoch 160/205 - Loss: 0.079812 - Accuracy: 98.00%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 160 Complete: Average Loss = 0.103900, Average Accuracy = 97.07%
Model saved after Epoch 160


Epoch 161/205 - Loss: 0.055415 - Accuracy: 98.78%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 161 Complete: Average Loss = 0.065206, Average Accuracy = 98.25%


Epoch 162/205 - Loss: 0.036992 - Accuracy: 99.17%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 162 Complete: Average Loss = 0.043763, Average Accuracy = 98.91%
Model saved after Epoch 162


Epoch 163/205 - Loss: 0.033278 - Accuracy: 99.17%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 163 Complete: Average Loss = 0.030019, Average Accuracy = 99.33%


Epoch 164/205 - Loss: 0.027007 - Accuracy: 99.41%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 164 Complete: Average Loss = 0.022855, Average Accuracy = 99.52%
Model saved after Epoch 164


Epoch 165/205 - Loss: 0.023937 - Accuracy: 99.51%: 100%|██████████| 165/165 [00:48<00:00,  3.40it/s]


Epoch 165 Complete: Average Loss = 0.020663, Average Accuracy = 99.60%


Epoch 166/205 - Loss: 0.014241 - Accuracy: 99.61%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 166 Complete: Average Loss = 0.018470, Average Accuracy = 99.65%
Model saved after Epoch 166


Epoch 167/205 - Loss: 0.021560 - Accuracy: 99.66%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 167 Complete: Average Loss = 0.016344, Average Accuracy = 99.68%


Epoch 168/205 - Loss: 0.015927 - Accuracy: 99.56%: 100%|██████████| 165/165 [00:48<00:00,  3.42it/s]


Epoch 168 Complete: Average Loss = 0.015960, Average Accuracy = 99.69%
Model saved after Epoch 168


Epoch 169/205 - Loss: 0.013711 - Accuracy: 99.76%: 100%|██████████| 165/165 [00:48<00:00,  3.39it/s]


Epoch 169 Complete: Average Loss = 0.018010, Average Accuracy = 99.65%


Epoch 170/205 - Loss: 0.017755 - Accuracy: 99.56%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 170 Complete: Average Loss = 0.026301, Average Accuracy = 99.47%
Model saved after Epoch 170


Epoch 171/205 - Loss: 0.042841 - Accuracy: 98.97%: 100%|██████████| 165/165 [00:48<00:00,  3.38it/s]


Epoch 171 Complete: Average Loss = 0.041258, Average Accuracy = 99.10%


Epoch 172/205 - Loss: 0.107759 - Accuracy: 96.58%: 100%|██████████| 165/165 [00:48<00:00,  3.43it/s]


Epoch 172 Complete: Average Loss = 0.091676, Average Accuracy = 97.58%
Model saved after Epoch 172


Epoch 173/205 - Loss: 0.168181 - Accuracy: 95.36%: 100%|██████████| 165/165 [00:48<00:00,  3.41it/s]


Epoch 173 Complete: Average Loss = 0.179909, Average Accuracy = 94.93%


Epoch 174/205 - Loss: 0.181719 - Accuracy: 95.12%:  59%|█████▉    | 97/165 [00:28<00:20,  3.38it/s]


KeyboardInterrupt: 

In [None]:
from google.colab import files
files.download('/content/checkpoints/model_epoch_168.pt')  # Replace with the actual file path


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>