In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from google.colab import files  # Mengimpor modul `files` untuk mengelola file di Google Colab
import pandas as pd

In [2]:
# Upload dataset
uploaded = files.upload()
train_file = next(iter(uploaded.keys()))
train_data = pd.read_csv(train_file, header=None)

# Splitting features and target
X_train = train_data.iloc[:, :-1].values  # Features for training
y_train = train_data.iloc[:, -1].values  # Labels for training

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

# Split train-test
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, random_state=42, stratify=y_train)

Saving optdigits.tra to optdigits.tra


In [3]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# DataLoader
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=12)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=12)

In [4]:
# Define RNN model to mimic HMM
class MarkovRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, pooling_type):
        super(MarkovRNN, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.pooling_type = pooling_type
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        if self.pooling_type == 'max':
            out = torch.max(out, dim=1).values
        elif self.pooling_type == 'avg':
            out = torch.mean(out, dim=1)
        out = self.fc(out)
        return out

In [5]:
# Training function
def train_model(model, train_loader, optimizer, criterion, num_epochs, scheduler=None):
    model.train()
    for epoch in tqdm(range(num_epochs), desc="Training Epochs"):
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch.unsqueeze(1))  # Add sequence dimension
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        if scheduler:
            scheduler.step()
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

In [6]:
# Evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch.unsqueeze(1))  # Add sequence dimension
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    return accuracy

In [7]:
# Parameters
input_size = X_train.shape[1]
output_size = len(torch.unique(y_train_tensor))
hidden_sizes = [32, 64]
pooling_types = ['max', 'avg']
epochs_list = [5, 50, 100, 250, 350]
optimizers = {'SGD': optim.SGD, 'RMSProp': optim.RMSprop, 'Adam': optim.Adam}

# Results dictionary
results = {
    'hidden_size': {},
    'pooling_type': {},
    'optimizer': {},
    'epochs': {}
}

In [8]:
# Experiment: Hidden Sizes
for hidden_size in hidden_sizes:
    print(f"\nExperimenting with Hidden Size: {hidden_size}")
    model = MarkovRNN(input_size, hidden_size, num_layers=1, output_size=output_size, pooling_type='avg')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=50, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['hidden_size'][hidden_size] = accuracy


Experimenting with Hidden Size: 32


Training Epochs:   2%|▏         | 1/50 [00:00<00:25,  1.92it/s]

Epoch 1/50, Loss: 0.6429


Training Epochs:   4%|▍         | 2/50 [00:00<00:20,  2.34it/s]

Epoch 2/50, Loss: 0.1251


Training Epochs:   6%|▌         | 3/50 [00:01<00:18,  2.53it/s]

Epoch 3/50, Loss: 0.0776


Training Epochs:   8%|▊         | 4/50 [00:01<00:17,  2.61it/s]

Epoch 4/50, Loss: 0.0531


Training Epochs:  10%|█         | 5/50 [00:01<00:17,  2.60it/s]

Epoch 5/50, Loss: 0.0383


Training Epochs:  12%|█▏        | 6/50 [00:02<00:16,  2.66it/s]

Epoch 6/50, Loss: 0.0301


Training Epochs:  14%|█▍        | 7/50 [00:02<00:16,  2.62it/s]

Epoch 7/50, Loss: 0.0229


Training Epochs:  16%|█▌        | 8/50 [00:03<00:16,  2.55it/s]

Epoch 8/50, Loss: 0.0183


Training Epochs:  18%|█▊        | 9/50 [00:03<00:16,  2.52it/s]

Epoch 9/50, Loss: 0.0140


Training Epochs:  20%|██        | 10/50 [00:03<00:15,  2.52it/s]

Epoch 10/50, Loss: 0.0110


Training Epochs:  22%|██▏       | 11/50 [00:04<00:15,  2.53it/s]

Epoch 11/50, Loss: 0.0080


Training Epochs:  24%|██▍       | 12/50 [00:04<00:15,  2.53it/s]

Epoch 12/50, Loss: 0.0077


Training Epochs:  26%|██▌       | 13/50 [00:05<00:14,  2.57it/s]

Epoch 13/50, Loss: 0.0076


Training Epochs:  28%|██▊       | 14/50 [00:05<00:13,  2.62it/s]

Epoch 14/50, Loss: 0.0074


Training Epochs:  30%|███       | 15/50 [00:05<00:13,  2.63it/s]

Epoch 15/50, Loss: 0.0072


Training Epochs:  32%|███▏      | 16/50 [00:06<00:12,  2.63it/s]

Epoch 16/50, Loss: 0.0071


Training Epochs:  34%|███▍      | 17/50 [00:06<00:12,  2.66it/s]

Epoch 17/50, Loss: 0.0070


Training Epochs:  36%|███▌      | 18/50 [00:07<00:12,  2.60it/s]

Epoch 18/50, Loss: 0.0068


Training Epochs:  38%|███▊      | 19/50 [00:07<00:11,  2.60it/s]

Epoch 19/50, Loss: 0.0067


Training Epochs:  40%|████      | 20/50 [00:07<00:11,  2.64it/s]

Epoch 20/50, Loss: 0.0065


Training Epochs:  42%|████▏     | 21/50 [00:08<00:11,  2.62it/s]

Epoch 21/50, Loss: 0.0063


Training Epochs:  44%|████▍     | 22/50 [00:08<00:11,  2.51it/s]

Epoch 22/50, Loss: 0.0063


Training Epochs:  46%|████▌     | 23/50 [00:08<00:10,  2.56it/s]

Epoch 23/50, Loss: 0.0063


Training Epochs:  48%|████▊     | 24/50 [00:09<00:09,  2.62it/s]

Epoch 24/50, Loss: 0.0063


Training Epochs:  50%|█████     | 25/50 [00:09<00:09,  2.64it/s]

Epoch 25/50, Loss: 0.0063


Training Epochs:  52%|█████▏    | 26/50 [00:10<00:08,  2.67it/s]

Epoch 26/50, Loss: 0.0063


Training Epochs:  54%|█████▍    | 27/50 [00:10<00:08,  2.66it/s]

Epoch 27/50, Loss: 0.0062


Training Epochs:  56%|█████▌    | 28/50 [00:10<00:08,  2.67it/s]

Epoch 28/50, Loss: 0.0062


Training Epochs:  58%|█████▊    | 29/50 [00:11<00:07,  2.65it/s]

Epoch 29/50, Loss: 0.0062


Training Epochs:  60%|██████    | 30/50 [00:11<00:07,  2.66it/s]

Epoch 30/50, Loss: 0.0062


Training Epochs:  62%|██████▏   | 31/50 [00:11<00:07,  2.67it/s]

Epoch 31/50, Loss: 0.0062


Training Epochs:  64%|██████▍   | 32/50 [00:12<00:06,  2.61it/s]

Epoch 32/50, Loss: 0.0062


Training Epochs:  66%|██████▌   | 33/50 [00:12<00:06,  2.62it/s]

Epoch 33/50, Loss: 0.0062


Training Epochs:  68%|██████▊   | 34/50 [00:13<00:06,  2.64it/s]

Epoch 34/50, Loss: 0.0062


Training Epochs:  70%|███████   | 35/50 [00:13<00:05,  2.65it/s]

Epoch 35/50, Loss: 0.0062


Training Epochs:  72%|███████▏  | 36/50 [00:13<00:05,  2.68it/s]

Epoch 36/50, Loss: 0.0062


Training Epochs:  74%|███████▍  | 37/50 [00:14<00:04,  2.69it/s]

Epoch 37/50, Loss: 0.0062


Training Epochs:  76%|███████▌  | 38/50 [00:14<00:04,  2.67it/s]

Epoch 38/50, Loss: 0.0062


Training Epochs:  78%|███████▊  | 39/50 [00:14<00:04,  2.65it/s]

Epoch 39/50, Loss: 0.0062


Training Epochs:  80%|████████  | 40/50 [00:15<00:03,  2.56it/s]

Epoch 40/50, Loss: 0.0062


Training Epochs:  82%|████████▏ | 41/50 [00:15<00:03,  2.52it/s]

Epoch 41/50, Loss: 0.0062


Training Epochs:  84%|████████▍ | 42/50 [00:16<00:03,  2.52it/s]

Epoch 42/50, Loss: 0.0062


Training Epochs:  86%|████████▌ | 43/50 [00:16<00:02,  2.55it/s]

Epoch 43/50, Loss: 0.0062


Training Epochs:  88%|████████▊ | 44/50 [00:16<00:02,  2.55it/s]

Epoch 44/50, Loss: 0.0062


Training Epochs:  90%|█████████ | 45/50 [00:17<00:01,  2.59it/s]

Epoch 45/50, Loss: 0.0062


Training Epochs:  92%|█████████▏| 46/50 [00:17<00:01,  2.62it/s]

Epoch 46/50, Loss: 0.0062


Training Epochs:  94%|█████████▍| 47/50 [00:18<00:01,  2.59it/s]

Epoch 47/50, Loss: 0.0062


Training Epochs:  96%|█████████▌| 48/50 [00:18<00:00,  2.64it/s]

Epoch 48/50, Loss: 0.0062


Training Epochs:  98%|█████████▊| 49/50 [00:18<00:00,  2.63it/s]

Epoch 49/50, Loss: 0.0062


Training Epochs: 100%|██████████| 50/50 [00:19<00:00,  2.60it/s]

Epoch 50/50, Loss: 0.0062





Test Accuracy: 96.99%

Experimenting with Hidden Size: 64


Training Epochs:   2%|▏         | 1/50 [00:00<00:19,  2.56it/s]

Epoch 1/50, Loss: 0.4436


Training Epochs:   4%|▍         | 2/50 [00:00<00:18,  2.54it/s]

Epoch 2/50, Loss: 0.0926


Training Epochs:   6%|▌         | 3/50 [00:01<00:18,  2.57it/s]

Epoch 3/50, Loss: 0.0552


Training Epochs:   8%|▊         | 4/50 [00:01<00:18,  2.55it/s]

Epoch 4/50, Loss: 0.0364


Training Epochs:  10%|█         | 5/50 [00:01<00:17,  2.56it/s]

Epoch 5/50, Loss: 0.0207


Training Epochs:  12%|█▏        | 6/50 [00:02<00:17,  2.56it/s]

Epoch 6/50, Loss: 0.0143


Training Epochs:  14%|█▍        | 7/50 [00:02<00:17,  2.52it/s]

Epoch 7/50, Loss: 0.0106


Training Epochs:  16%|█▌        | 8/50 [00:03<00:16,  2.55it/s]

Epoch 8/50, Loss: 0.0080


Training Epochs:  18%|█▊        | 9/50 [00:03<00:15,  2.58it/s]

Epoch 9/50, Loss: 0.0054


Training Epochs:  20%|██        | 10/50 [00:03<00:15,  2.58it/s]

Epoch 10/50, Loss: 0.0041


Training Epochs:  22%|██▏       | 11/50 [00:04<00:14,  2.61it/s]

Epoch 11/50, Loss: 0.0032


Training Epochs:  24%|██▍       | 12/50 [00:04<00:14,  2.60it/s]

Epoch 12/50, Loss: 0.0031


Training Epochs:  26%|██▌       | 13/50 [00:05<00:14,  2.60it/s]

Epoch 13/50, Loss: 0.0030


Training Epochs:  28%|██▊       | 14/50 [00:05<00:13,  2.60it/s]

Epoch 14/50, Loss: 0.0029


Training Epochs:  30%|███       | 15/50 [00:05<00:13,  2.59it/s]

Epoch 15/50, Loss: 0.0029


Training Epochs:  32%|███▏      | 16/50 [00:06<00:13,  2.60it/s]

Epoch 16/50, Loss: 0.0028


Training Epochs:  34%|███▍      | 17/50 [00:06<00:12,  2.63it/s]

Epoch 17/50, Loss: 0.0028


Training Epochs:  36%|███▌      | 18/50 [00:06<00:12,  2.59it/s]

Epoch 18/50, Loss: 0.0027


Training Epochs:  38%|███▊      | 19/50 [00:07<00:11,  2.59it/s]

Epoch 19/50, Loss: 0.0027


Training Epochs:  40%|████      | 20/50 [00:07<00:11,  2.56it/s]

Epoch 20/50, Loss: 0.0026


Training Epochs:  42%|████▏     | 21/50 [00:08<00:11,  2.55it/s]

Epoch 21/50, Loss: 0.0025


Training Epochs:  44%|████▍     | 22/50 [00:08<00:11,  2.53it/s]

Epoch 22/50, Loss: 0.0025


Training Epochs:  46%|████▌     | 23/50 [00:08<00:10,  2.49it/s]

Epoch 23/50, Loss: 0.0025


Training Epochs:  48%|████▊     | 24/50 [00:09<00:10,  2.41it/s]

Epoch 24/50, Loss: 0.0025


Training Epochs:  50%|█████     | 25/50 [00:09<00:10,  2.46it/s]

Epoch 25/50, Loss: 0.0025


Training Epochs:  52%|█████▏    | 26/50 [00:10<00:09,  2.52it/s]

Epoch 26/50, Loss: 0.0025


Training Epochs:  54%|█████▍    | 27/50 [00:10<00:09,  2.49it/s]

Epoch 27/50, Loss: 0.0025


Training Epochs:  56%|█████▌    | 28/50 [00:10<00:08,  2.53it/s]

Epoch 28/50, Loss: 0.0025


Training Epochs:  58%|█████▊    | 29/50 [00:11<00:08,  2.55it/s]

Epoch 29/50, Loss: 0.0025


Training Epochs:  60%|██████    | 30/50 [00:11<00:07,  2.53it/s]

Epoch 30/50, Loss: 0.0025


Training Epochs:  62%|██████▏   | 31/50 [00:12<00:07,  2.56it/s]

Epoch 31/50, Loss: 0.0025


Training Epochs:  64%|██████▍   | 32/50 [00:12<00:07,  2.55it/s]

Epoch 32/50, Loss: 0.0025


Training Epochs:  66%|██████▌   | 33/50 [00:12<00:06,  2.55it/s]

Epoch 33/50, Loss: 0.0025


Training Epochs:  68%|██████▊   | 34/50 [00:13<00:06,  2.54it/s]

Epoch 34/50, Loss: 0.0025


Training Epochs:  70%|███████   | 35/50 [00:13<00:05,  2.53it/s]

Epoch 35/50, Loss: 0.0025


Training Epochs:  72%|███████▏  | 36/50 [00:14<00:05,  2.56it/s]

Epoch 36/50, Loss: 0.0025


Training Epochs:  74%|███████▍  | 37/50 [00:14<00:05,  2.58it/s]

Epoch 37/50, Loss: 0.0025


Training Epochs:  76%|███████▌  | 38/50 [00:14<00:04,  2.57it/s]

Epoch 38/50, Loss: 0.0025


Training Epochs:  78%|███████▊  | 39/50 [00:15<00:04,  2.55it/s]

Epoch 39/50, Loss: 0.0025


Training Epochs:  80%|████████  | 40/50 [00:15<00:03,  2.53it/s]

Epoch 40/50, Loss: 0.0025


Training Epochs:  82%|████████▏ | 41/50 [00:16<00:03,  2.51it/s]

Epoch 41/50, Loss: 0.0025


Training Epochs:  84%|████████▍ | 42/50 [00:16<00:03,  2.51it/s]

Epoch 42/50, Loss: 0.0025


Training Epochs:  86%|████████▌ | 43/50 [00:16<00:02,  2.52it/s]

Epoch 43/50, Loss: 0.0025


Training Epochs:  88%|████████▊ | 44/50 [00:17<00:02,  2.54it/s]

Epoch 44/50, Loss: 0.0025


Training Epochs:  90%|█████████ | 45/50 [00:17<00:01,  2.53it/s]

Epoch 45/50, Loss: 0.0025


Training Epochs:  92%|█████████▏| 46/50 [00:18<00:01,  2.55it/s]

Epoch 46/50, Loss: 0.0025


Training Epochs:  94%|█████████▍| 47/50 [00:18<00:01,  2.54it/s]

Epoch 47/50, Loss: 0.0025


Training Epochs:  96%|█████████▌| 48/50 [00:18<00:00,  2.56it/s]

Epoch 48/50, Loss: 0.0025


Training Epochs:  98%|█████████▊| 49/50 [00:19<00:00,  2.56it/s]

Epoch 49/50, Loss: 0.0025


Training Epochs: 100%|██████████| 50/50 [00:19<00:00,  2.55it/s]

Epoch 50/50, Loss: 0.0025





Test Accuracy: 98.04%


In [9]:
# Experiment: Pooling Types
for pooling_type in pooling_types:
    print(f"\nExperimenting with Pooling Type: {pooling_type}")
    model = MarkovRNN(input_size, 32, num_layers=1, output_size=output_size, pooling_type=pooling_type)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=50, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['pooling_type'][pooling_type] = accuracy


Experimenting with Pooling Type: max


Training Epochs:   2%|▏         | 1/50 [00:00<00:19,  2.49it/s]

Epoch 1/50, Loss: 0.6704


Training Epochs:   4%|▍         | 2/50 [00:00<00:19,  2.48it/s]

Epoch 2/50, Loss: 0.1244


Training Epochs:   6%|▌         | 3/50 [00:01<00:19,  2.45it/s]

Epoch 3/50, Loss: 0.0769


Training Epochs:   8%|▊         | 4/50 [00:01<00:18,  2.49it/s]

Epoch 4/50, Loss: 0.0522


Training Epochs:  10%|█         | 5/50 [00:01<00:17,  2.56it/s]

Epoch 5/50, Loss: 0.0410


Training Epochs:  12%|█▏        | 6/50 [00:02<00:17,  2.54it/s]

Epoch 6/50, Loss: 0.0297


Training Epochs:  14%|█▍        | 7/50 [00:02<00:16,  2.56it/s]

Epoch 7/50, Loss: 0.0222


Training Epochs:  16%|█▌        | 8/50 [00:03<00:16,  2.59it/s]

Epoch 8/50, Loss: 0.0175


Training Epochs:  18%|█▊        | 9/50 [00:03<00:15,  2.61it/s]

Epoch 9/50, Loss: 0.0133


Training Epochs:  20%|██        | 10/50 [00:03<00:15,  2.62it/s]

Epoch 10/50, Loss: 0.0106


Training Epochs:  22%|██▏       | 11/50 [00:04<00:15,  2.55it/s]

Epoch 11/50, Loss: 0.0077


Training Epochs:  24%|██▍       | 12/50 [00:04<00:14,  2.57it/s]

Epoch 12/50, Loss: 0.0073


Training Epochs:  26%|██▌       | 13/50 [00:05<00:14,  2.61it/s]

Epoch 13/50, Loss: 0.0071


Training Epochs:  28%|██▊       | 14/50 [00:05<00:13,  2.62it/s]

Epoch 14/50, Loss: 0.0070


Training Epochs:  30%|███       | 15/50 [00:05<00:13,  2.62it/s]

Epoch 15/50, Loss: 0.0069


Training Epochs:  32%|███▏      | 16/50 [00:06<00:12,  2.63it/s]

Epoch 16/50, Loss: 0.0067


Training Epochs:  34%|███▍      | 17/50 [00:06<00:12,  2.61it/s]

Epoch 17/50, Loss: 0.0066


Training Epochs:  36%|███▌      | 18/50 [00:06<00:12,  2.60it/s]

Epoch 18/50, Loss: 0.0064


Training Epochs:  38%|███▊      | 19/50 [00:07<00:11,  2.61it/s]

Epoch 19/50, Loss: 0.0063


Training Epochs:  40%|████      | 20/50 [00:07<00:11,  2.59it/s]

Epoch 20/50, Loss: 0.0062


Training Epochs:  42%|████▏     | 21/50 [00:08<00:11,  2.62it/s]

Epoch 21/50, Loss: 0.0060


Training Epochs:  44%|████▍     | 22/50 [00:08<00:10,  2.63it/s]

Epoch 22/50, Loss: 0.0060


Training Epochs:  46%|████▌     | 23/50 [00:08<00:10,  2.57it/s]

Epoch 23/50, Loss: 0.0060


Training Epochs:  48%|████▊     | 24/50 [00:09<00:10,  2.57it/s]

Epoch 24/50, Loss: 0.0060


Training Epochs:  50%|█████     | 25/50 [00:09<00:09,  2.61it/s]

Epoch 25/50, Loss: 0.0060


Training Epochs:  52%|█████▏    | 26/50 [00:10<00:09,  2.61it/s]

Epoch 26/50, Loss: 0.0059


Training Epochs:  54%|█████▍    | 27/50 [00:10<00:08,  2.64it/s]

Epoch 27/50, Loss: 0.0059


Training Epochs:  56%|█████▌    | 28/50 [00:10<00:08,  2.61it/s]

Epoch 28/50, Loss: 0.0059


Training Epochs:  58%|█████▊    | 29/50 [00:11<00:08,  2.58it/s]

Epoch 29/50, Loss: 0.0059


Training Epochs:  60%|██████    | 30/50 [00:11<00:07,  2.52it/s]

Epoch 30/50, Loss: 0.0059


Training Epochs:  62%|██████▏   | 31/50 [00:12<00:07,  2.53it/s]

Epoch 31/50, Loss: 0.0058


Training Epochs:  64%|██████▍   | 32/50 [00:12<00:07,  2.53it/s]

Epoch 32/50, Loss: 0.0058


Training Epochs:  66%|██████▌   | 33/50 [00:12<00:06,  2.49it/s]

Epoch 33/50, Loss: 0.0058


Training Epochs:  68%|██████▊   | 34/50 [00:13<00:06,  2.47it/s]

Epoch 34/50, Loss: 0.0058


Training Epochs:  70%|███████   | 35/50 [00:13<00:06,  2.48it/s]

Epoch 35/50, Loss: 0.0058


Training Epochs:  72%|███████▏  | 36/50 [00:14<00:05,  2.52it/s]

Epoch 36/50, Loss: 0.0058


Training Epochs:  74%|███████▍  | 37/50 [00:14<00:05,  2.57it/s]

Epoch 37/50, Loss: 0.0059


Training Epochs:  76%|███████▌  | 38/50 [00:14<00:04,  2.58it/s]

Epoch 38/50, Loss: 0.0058


Training Epochs:  78%|███████▊  | 39/50 [00:15<00:04,  2.57it/s]

Epoch 39/50, Loss: 0.0058


Training Epochs:  80%|████████  | 40/50 [00:15<00:03,  2.52it/s]

Epoch 40/50, Loss: 0.0058


Training Epochs:  82%|████████▏ | 41/50 [00:15<00:03,  2.53it/s]

Epoch 41/50, Loss: 0.0058


Training Epochs:  84%|████████▍ | 42/50 [00:16<00:03,  2.55it/s]

Epoch 42/50, Loss: 0.0059


Training Epochs:  86%|████████▌ | 43/50 [00:16<00:02,  2.52it/s]

Epoch 43/50, Loss: 0.0058


Training Epochs:  88%|████████▊ | 44/50 [00:17<00:02,  2.52it/s]

Epoch 44/50, Loss: 0.0058


Training Epochs:  90%|█████████ | 45/50 [00:17<00:01,  2.54it/s]

Epoch 45/50, Loss: 0.0058


Training Epochs:  92%|█████████▏| 46/50 [00:17<00:01,  2.55it/s]

Epoch 46/50, Loss: 0.0058


Training Epochs:  94%|█████████▍| 47/50 [00:18<00:01,  2.58it/s]

Epoch 47/50, Loss: 0.0058


Training Epochs:  96%|█████████▌| 48/50 [00:18<00:00,  2.55it/s]

Epoch 48/50, Loss: 0.0058


Training Epochs:  98%|█████████▊| 49/50 [00:19<00:00,  2.53it/s]

Epoch 49/50, Loss: 0.0058


Training Epochs: 100%|██████████| 50/50 [00:19<00:00,  2.56it/s]

Epoch 50/50, Loss: 0.0058





Test Accuracy: 96.73%

Experimenting with Pooling Type: avg


Training Epochs:   2%|▏         | 1/50 [00:00<00:20,  2.41it/s]

Epoch 1/50, Loss: 0.6706


Training Epochs:   4%|▍         | 2/50 [00:00<00:19,  2.46it/s]

Epoch 2/50, Loss: 0.1289


Training Epochs:   6%|▌         | 3/50 [00:01<00:19,  2.44it/s]

Epoch 3/50, Loss: 0.0814


Training Epochs:   8%|▊         | 4/50 [00:01<00:18,  2.50it/s]

Epoch 4/50, Loss: 0.0564


Training Epochs:  10%|█         | 5/50 [00:02<00:17,  2.50it/s]

Epoch 5/50, Loss: 0.0398


Training Epochs:  12%|█▏        | 6/50 [00:02<00:17,  2.55it/s]

Epoch 6/50, Loss: 0.0315


Training Epochs:  14%|█▍        | 7/50 [00:02<00:16,  2.53it/s]

Epoch 7/50, Loss: 0.0223


Training Epochs:  16%|█▌        | 8/50 [00:03<00:16,  2.53it/s]

Epoch 8/50, Loss: 0.0173


Training Epochs:  18%|█▊        | 9/50 [00:03<00:16,  2.55it/s]

Epoch 9/50, Loss: 0.0136


Training Epochs:  20%|██        | 10/50 [00:03<00:15,  2.54it/s]

Epoch 10/50, Loss: 0.0110


Training Epochs:  22%|██▏       | 11/50 [00:04<00:15,  2.48it/s]

Epoch 11/50, Loss: 0.0091


Training Epochs:  24%|██▍       | 12/50 [00:04<00:15,  2.44it/s]

Epoch 12/50, Loss: 0.0082


Training Epochs:  26%|██▌       | 13/50 [00:05<00:15,  2.46it/s]

Epoch 13/50, Loss: 0.0079


Training Epochs:  28%|██▊       | 14/50 [00:05<00:14,  2.49it/s]

Epoch 14/50, Loss: 0.0076


Training Epochs:  30%|███       | 15/50 [00:06<00:13,  2.50it/s]

Epoch 15/50, Loss: 0.0076


Training Epochs:  32%|███▏      | 16/50 [00:06<00:13,  2.53it/s]

Epoch 16/50, Loss: 0.0073


Training Epochs:  34%|███▍      | 17/50 [00:06<00:13,  2.50it/s]

Epoch 17/50, Loss: 0.0071


Training Epochs:  36%|███▌      | 18/50 [00:07<00:12,  2.51it/s]

Epoch 18/50, Loss: 0.0070


Training Epochs:  38%|███▊      | 19/50 [00:07<00:12,  2.53it/s]

Epoch 19/50, Loss: 0.0068


Training Epochs:  40%|████      | 20/50 [00:07<00:11,  2.53it/s]

Epoch 20/50, Loss: 0.0067


Training Epochs:  42%|████▏     | 21/50 [00:08<00:11,  2.50it/s]

Epoch 21/50, Loss: 0.0065


Training Epochs:  44%|████▍     | 22/50 [00:08<00:11,  2.49it/s]

Epoch 22/50, Loss: 0.0065


Training Epochs:  46%|████▌     | 23/50 [00:09<00:10,  2.48it/s]

Epoch 23/50, Loss: 0.0065


Training Epochs:  48%|████▊     | 24/50 [00:09<00:10,  2.50it/s]

Epoch 24/50, Loss: 0.0064


Training Epochs:  50%|█████     | 25/50 [00:09<00:09,  2.51it/s]

Epoch 25/50, Loss: 0.0064


Training Epochs:  52%|█████▏    | 26/50 [00:10<00:09,  2.51it/s]

Epoch 26/50, Loss: 0.0064


Training Epochs:  54%|█████▍    | 27/50 [00:10<00:09,  2.48it/s]

Epoch 27/50, Loss: 0.0064


Training Epochs:  56%|█████▌    | 28/50 [00:11<00:08,  2.51it/s]

Epoch 28/50, Loss: 0.0063


Training Epochs:  58%|█████▊    | 29/50 [00:11<00:08,  2.53it/s]

Epoch 29/50, Loss: 0.0063


Training Epochs:  60%|██████    | 30/50 [00:11<00:07,  2.54it/s]

Epoch 30/50, Loss: 0.0063


Training Epochs:  62%|██████▏   | 31/50 [00:12<00:07,  2.51it/s]

Epoch 31/50, Loss: 0.0063


Training Epochs:  64%|██████▍   | 32/50 [00:12<00:07,  2.52it/s]

Epoch 32/50, Loss: 0.0063


Training Epochs:  66%|██████▌   | 33/50 [00:13<00:06,  2.53it/s]

Epoch 33/50, Loss: 0.0063


Training Epochs:  68%|██████▊   | 34/50 [00:13<00:06,  2.53it/s]

Epoch 34/50, Loss: 0.0063


Training Epochs:  70%|███████   | 35/50 [00:13<00:05,  2.53it/s]

Epoch 35/50, Loss: 0.0063


Training Epochs:  72%|███████▏  | 36/50 [00:14<00:05,  2.52it/s]

Epoch 36/50, Loss: 0.0063


Training Epochs:  74%|███████▍  | 37/50 [00:14<00:05,  2.49it/s]

Epoch 37/50, Loss: 0.0063


Training Epochs:  76%|███████▌  | 38/50 [00:15<00:04,  2.51it/s]

Epoch 38/50, Loss: 0.0063


Training Epochs:  78%|███████▊  | 39/50 [00:15<00:04,  2.50it/s]

Epoch 39/50, Loss: 0.0063


Training Epochs:  80%|████████  | 40/50 [00:15<00:04,  2.50it/s]

Epoch 40/50, Loss: 0.0063


Training Epochs:  82%|████████▏ | 41/50 [00:16<00:03,  2.45it/s]

Epoch 41/50, Loss: 0.0063


Training Epochs:  84%|████████▍ | 42/50 [00:16<00:03,  2.45it/s]

Epoch 42/50, Loss: 0.0063


Training Epochs:  86%|████████▌ | 43/50 [00:17<00:02,  2.39it/s]

Epoch 43/50, Loss: 0.0063


Training Epochs:  88%|████████▊ | 44/50 [00:17<00:02,  2.42it/s]

Epoch 44/50, Loss: 0.0063


Training Epochs:  90%|█████████ | 45/50 [00:18<00:02,  2.40it/s]

Epoch 45/50, Loss: 0.0063


Training Epochs:  92%|█████████▏| 46/50 [00:18<00:01,  2.42it/s]

Epoch 46/50, Loss: 0.0063


Training Epochs:  94%|█████████▍| 47/50 [00:18<00:01,  2.44it/s]

Epoch 47/50, Loss: 0.0063


Training Epochs:  96%|█████████▌| 48/50 [00:19<00:00,  2.48it/s]

Epoch 48/50, Loss: 0.0063


Training Epochs:  98%|█████████▊| 49/50 [00:19<00:00,  2.50it/s]

Epoch 49/50, Loss: 0.0063


Training Epochs: 100%|██████████| 50/50 [00:20<00:00,  2.49it/s]

Epoch 50/50, Loss: 0.0063





Test Accuracy: 96.73%


In [10]:
# Experiment: Optimizers
for optimizer_name, optimizer_class in optimizers.items():
    print(f"\nExperimenting with Optimizer: {optimizer_name}")
    model = MarkovRNN(input_size, 32, num_layers=1, output_size=output_size, pooling_type='avg')
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer_class(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=50, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['optimizer'][optimizer_name] = accuracy


Experimenting with Optimizer: SGD


Training Epochs:   2%|▏         | 1/50 [00:00<00:17,  2.76it/s]

Epoch 1/50, Loss: 2.2182


Training Epochs:   4%|▍         | 2/50 [00:00<00:17,  2.73it/s]

Epoch 2/50, Loss: 1.9678


Training Epochs:   6%|▌         | 3/50 [00:01<00:17,  2.73it/s]

Epoch 3/50, Loss: 1.7471


Training Epochs:   8%|▊         | 4/50 [00:01<00:17,  2.62it/s]

Epoch 4/50, Loss: 1.5537


Training Epochs:  10%|█         | 5/50 [00:01<00:17,  2.62it/s]

Epoch 5/50, Loss: 1.3847


Training Epochs:  12%|█▏        | 6/50 [00:02<00:16,  2.70it/s]

Epoch 6/50, Loss: 1.2384


Training Epochs:  14%|█▍        | 7/50 [00:02<00:16,  2.66it/s]

Epoch 7/50, Loss: 1.1125


Training Epochs:  16%|█▌        | 8/50 [00:02<00:15,  2.70it/s]

Epoch 8/50, Loss: 1.0038


Training Epochs:  18%|█▊        | 9/50 [00:03<00:15,  2.73it/s]

Epoch 9/50, Loss: 0.9103


Training Epochs:  20%|██        | 10/50 [00:03<00:15,  2.67it/s]

Epoch 10/50, Loss: 0.8295


Training Epochs:  22%|██▏       | 11/50 [00:04<00:14,  2.67it/s]

Epoch 11/50, Loss: 0.7881


Training Epochs:  24%|██▍       | 12/50 [00:04<00:14,  2.70it/s]

Epoch 12/50, Loss: 0.7812


Training Epochs:  26%|██▌       | 13/50 [00:04<00:13,  2.75it/s]

Epoch 13/50, Loss: 0.7741


Training Epochs:  28%|██▊       | 14/50 [00:05<00:13,  2.71it/s]

Epoch 14/50, Loss: 0.7676


Training Epochs:  30%|███       | 15/50 [00:05<00:13,  2.66it/s]

Epoch 15/50, Loss: 0.7612


Training Epochs:  32%|███▏      | 16/50 [00:05<00:12,  2.69it/s]

Epoch 16/50, Loss: 0.7548


Training Epochs:  34%|███▍      | 17/50 [00:06<00:12,  2.71it/s]

Epoch 17/50, Loss: 0.7477


Training Epochs:  36%|███▌      | 18/50 [00:06<00:12,  2.63it/s]

Epoch 18/50, Loss: 0.7420


Training Epochs:  38%|███▊      | 19/50 [00:07<00:11,  2.62it/s]

Epoch 19/50, Loss: 0.7350


Training Epochs:  40%|████      | 20/50 [00:07<00:11,  2.67it/s]

Epoch 20/50, Loss: 0.7292


Training Epochs:  42%|████▏     | 21/50 [00:07<00:11,  2.63it/s]

Epoch 21/50, Loss: 0.7254


Training Epochs:  44%|████▍     | 22/50 [00:08<00:10,  2.60it/s]

Epoch 22/50, Loss: 0.7248


Training Epochs:  46%|████▌     | 23/50 [00:08<00:10,  2.54it/s]

Epoch 23/50, Loss: 0.7247


Training Epochs:  48%|████▊     | 24/50 [00:09<00:10,  2.56it/s]

Epoch 24/50, Loss: 0.7242


Training Epochs:  50%|█████     | 25/50 [00:09<00:09,  2.55it/s]

Epoch 25/50, Loss: 0.7239


Training Epochs:  52%|█████▏    | 26/50 [00:09<00:09,  2.56it/s]

Epoch 26/50, Loss: 0.7225


Training Epochs:  54%|█████▍    | 27/50 [00:10<00:08,  2.64it/s]

Epoch 27/50, Loss: 0.7224


Training Epochs:  56%|█████▌    | 28/50 [00:10<00:08,  2.63it/s]

Epoch 28/50, Loss: 0.7218


Training Epochs:  58%|█████▊    | 29/50 [00:10<00:07,  2.66it/s]

Epoch 29/50, Loss: 0.7210


Training Epochs:  60%|██████    | 30/50 [00:11<00:07,  2.65it/s]

Epoch 30/50, Loss: 0.7208


Training Epochs:  62%|██████▏   | 31/50 [00:11<00:07,  2.65it/s]

Epoch 31/50, Loss: 0.7203


Training Epochs:  64%|██████▍   | 32/50 [00:12<00:06,  2.62it/s]

Epoch 32/50, Loss: 0.7196


Training Epochs:  66%|██████▌   | 33/50 [00:12<00:06,  2.66it/s]

Epoch 33/50, Loss: 0.7198


Training Epochs:  68%|██████▊   | 34/50 [00:12<00:06,  2.64it/s]

Epoch 34/50, Loss: 0.7203


Training Epochs:  70%|███████   | 35/50 [00:13<00:05,  2.66it/s]

Epoch 35/50, Loss: 0.7197


Training Epochs:  72%|███████▏  | 36/50 [00:13<00:05,  2.68it/s]

Epoch 36/50, Loss: 0.7202


Training Epochs:  74%|███████▍  | 37/50 [00:13<00:04,  2.62it/s]

Epoch 37/50, Loss: 0.7198


Training Epochs:  76%|███████▌  | 38/50 [00:14<00:04,  2.63it/s]

Epoch 38/50, Loss: 0.7196


Training Epochs:  78%|███████▊  | 39/50 [00:14<00:04,  2.62it/s]

Epoch 39/50, Loss: 0.7191


Training Epochs:  80%|████████  | 40/50 [00:15<00:03,  2.66it/s]

Epoch 40/50, Loss: 0.7203


Training Epochs:  82%|████████▏ | 41/50 [00:15<00:03,  2.65it/s]

Epoch 41/50, Loss: 0.7193


Training Epochs:  84%|████████▍ | 42/50 [00:15<00:03,  2.62it/s]

Epoch 42/50, Loss: 0.7194


Training Epochs:  86%|████████▌ | 43/50 [00:16<00:02,  2.60it/s]

Epoch 43/50, Loss: 0.7200


Training Epochs:  88%|████████▊ | 44/50 [00:16<00:02,  2.60it/s]

Epoch 44/50, Loss: 0.7197


Training Epochs:  90%|█████████ | 45/50 [00:17<00:01,  2.61it/s]

Epoch 45/50, Loss: 0.7192


Training Epochs:  92%|█████████▏| 46/50 [00:17<00:01,  2.64it/s]

Epoch 46/50, Loss: 0.7192


Training Epochs:  94%|█████████▍| 47/50 [00:17<00:01,  2.64it/s]

Epoch 47/50, Loss: 0.7193


Training Epochs:  96%|█████████▌| 48/50 [00:18<00:00,  2.62it/s]

Epoch 48/50, Loss: 0.7196


Training Epochs:  98%|█████████▊| 49/50 [00:18<00:00,  2.63it/s]

Epoch 49/50, Loss: 0.7190


Training Epochs: 100%|██████████| 50/50 [00:18<00:00,  2.64it/s]

Epoch 50/50, Loss: 0.7201





Test Accuracy: 90.07%

Experimenting with Optimizer: RMSProp


Training Epochs:   2%|▏         | 1/50 [00:00<00:19,  2.48it/s]

Epoch 1/50, Loss: 0.3020


Training Epochs:   4%|▍         | 2/50 [00:00<00:19,  2.41it/s]

Epoch 2/50, Loss: 0.0747


Training Epochs:   6%|▌         | 3/50 [00:01<00:19,  2.41it/s]

Epoch 3/50, Loss: 0.0437


Training Epochs:   8%|▊         | 4/50 [00:01<00:19,  2.37it/s]

Epoch 4/50, Loss: 0.0286


Training Epochs:  10%|█         | 5/50 [00:02<00:19,  2.36it/s]

Epoch 5/50, Loss: 0.0206


Training Epochs:  12%|█▏        | 6/50 [00:02<00:18,  2.35it/s]

Epoch 6/50, Loss: 0.0168


Training Epochs:  14%|█▍        | 7/50 [00:02<00:17,  2.42it/s]

Epoch 7/50, Loss: 0.0130


Training Epochs:  16%|█▌        | 8/50 [00:03<00:17,  2.44it/s]

Epoch 8/50, Loss: 0.0088


Training Epochs:  18%|█▊        | 9/50 [00:03<00:16,  2.43it/s]

Epoch 9/50, Loss: 0.0069


Training Epochs:  20%|██        | 10/50 [00:04<00:16,  2.49it/s]

Epoch 10/50, Loss: 0.0050


Training Epochs:  22%|██▏       | 11/50 [00:04<00:15,  2.49it/s]

Epoch 11/50, Loss: 0.0033


Training Epochs:  24%|██▍       | 12/50 [00:04<00:15,  2.51it/s]

Epoch 12/50, Loss: 0.0028


Training Epochs:  26%|██▌       | 13/50 [00:05<00:14,  2.52it/s]

Epoch 13/50, Loss: 0.0026


Training Epochs:  28%|██▊       | 14/50 [00:05<00:14,  2.54it/s]

Epoch 14/50, Loss: 0.0024


Training Epochs:  30%|███       | 15/50 [00:06<00:13,  2.52it/s]

Epoch 15/50, Loss: 0.0023


Training Epochs:  32%|███▏      | 16/50 [00:06<00:13,  2.54it/s]

Epoch 16/50, Loss: 0.0022


Training Epochs:  34%|███▍      | 17/50 [00:06<00:13,  2.53it/s]

Epoch 17/50, Loss: 0.0021


Training Epochs:  36%|███▌      | 18/50 [00:07<00:12,  2.53it/s]

Epoch 18/50, Loss: 0.0020


Training Epochs:  38%|███▊      | 19/50 [00:07<00:12,  2.54it/s]

Epoch 19/50, Loss: 0.0019


Training Epochs:  40%|████      | 20/50 [00:08<00:11,  2.52it/s]

Epoch 20/50, Loss: 0.0017


Training Epochs:  42%|████▏     | 21/50 [00:08<00:11,  2.53it/s]

Epoch 21/50, Loss: 0.0016


Training Epochs:  44%|████▍     | 22/50 [00:08<00:11,  2.53it/s]

Epoch 22/50, Loss: 0.0015


Training Epochs:  46%|████▌     | 23/50 [00:09<00:10,  2.54it/s]

Epoch 23/50, Loss: 0.0015


Training Epochs:  48%|████▊     | 24/50 [00:09<00:10,  2.52it/s]

Epoch 24/50, Loss: 0.0015


Training Epochs:  50%|█████     | 25/50 [00:10<00:09,  2.52it/s]

Epoch 25/50, Loss: 0.0015


Training Epochs:  52%|█████▏    | 26/50 [00:10<00:09,  2.54it/s]

Epoch 26/50, Loss: 0.0015


Training Epochs:  54%|█████▍    | 27/50 [00:10<00:09,  2.50it/s]

Epoch 27/50, Loss: 0.0015


Training Epochs:  56%|█████▌    | 28/50 [00:11<00:08,  2.49it/s]

Epoch 28/50, Loss: 0.0015


Training Epochs:  58%|█████▊    | 29/50 [00:11<00:08,  2.50it/s]

Epoch 29/50, Loss: 0.0014


Training Epochs:  60%|██████    | 30/50 [00:12<00:08,  2.50it/s]

Epoch 30/50, Loss: 0.0014


Training Epochs:  62%|██████▏   | 31/50 [00:12<00:07,  2.50it/s]

Epoch 31/50, Loss: 0.0014


Training Epochs:  64%|██████▍   | 32/50 [00:12<00:07,  2.56it/s]

Epoch 32/50, Loss: 0.0014


Training Epochs:  66%|██████▌   | 33/50 [00:13<00:06,  2.53it/s]

Epoch 33/50, Loss: 0.0014


Training Epochs:  68%|██████▊   | 34/50 [00:13<00:06,  2.46it/s]

Epoch 34/50, Loss: 0.0014


Training Epochs:  70%|███████   | 35/50 [00:14<00:06,  2.44it/s]

Epoch 35/50, Loss: 0.0014


Training Epochs:  72%|███████▏  | 36/50 [00:14<00:05,  2.42it/s]

Epoch 36/50, Loss: 0.0014


Training Epochs:  74%|███████▍  | 37/50 [00:14<00:05,  2.41it/s]

Epoch 37/50, Loss: 0.0014


Training Epochs:  76%|███████▌  | 38/50 [00:15<00:04,  2.46it/s]

Epoch 38/50, Loss: 0.0014


Training Epochs:  78%|███████▊  | 39/50 [00:15<00:04,  2.47it/s]

Epoch 39/50, Loss: 0.0014


Training Epochs:  80%|████████  | 40/50 [00:16<00:03,  2.50it/s]

Epoch 40/50, Loss: 0.0014


Training Epochs:  82%|████████▏ | 41/50 [00:16<00:03,  2.53it/s]

Epoch 41/50, Loss: 0.0014


Training Epochs:  84%|████████▍ | 42/50 [00:16<00:03,  2.55it/s]

Epoch 42/50, Loss: 0.0014


Training Epochs:  86%|████████▌ | 43/50 [00:17<00:02,  2.58it/s]

Epoch 43/50, Loss: 0.0014


Training Epochs:  88%|████████▊ | 44/50 [00:17<00:02,  2.54it/s]

Epoch 44/50, Loss: 0.0014


Training Epochs:  90%|█████████ | 45/50 [00:18<00:01,  2.54it/s]

Epoch 45/50, Loss: 0.0014


Training Epochs:  92%|█████████▏| 46/50 [00:18<00:01,  2.55it/s]

Epoch 46/50, Loss: 0.0014


Training Epochs:  94%|█████████▍| 47/50 [00:18<00:01,  2.55it/s]

Epoch 47/50, Loss: 0.0014


Training Epochs:  96%|█████████▌| 48/50 [00:19<00:00,  2.52it/s]

Epoch 48/50, Loss: 0.0014


Training Epochs:  98%|█████████▊| 49/50 [00:19<00:00,  2.54it/s]

Epoch 49/50, Loss: 0.0014


Training Epochs: 100%|██████████| 50/50 [00:20<00:00,  2.50it/s]

Epoch 50/50, Loss: 0.0014





Test Accuracy: 96.99%

Experimenting with Optimizer: Adam


Training Epochs:   2%|▏         | 1/50 [00:00<00:19,  2.46it/s]

Epoch 1/50, Loss: 0.6418


Training Epochs:   4%|▍         | 2/50 [00:00<00:19,  2.44it/s]

Epoch 2/50, Loss: 0.1210


Training Epochs:   6%|▌         | 3/50 [00:01<00:19,  2.46it/s]

Epoch 3/50, Loss: 0.0710


Training Epochs:   8%|▊         | 4/50 [00:01<00:18,  2.45it/s]

Epoch 4/50, Loss: 0.0500


Training Epochs:  10%|█         | 5/50 [00:02<00:18,  2.45it/s]

Epoch 5/50, Loss: 0.0354


Training Epochs:  12%|█▏        | 6/50 [00:02<00:17,  2.45it/s]

Epoch 6/50, Loss: 0.0265


Training Epochs:  14%|█▍        | 7/50 [00:02<00:17,  2.45it/s]

Epoch 7/50, Loss: 0.0207


Training Epochs:  16%|█▌        | 8/50 [00:03<00:17,  2.43it/s]

Epoch 8/50, Loss: 0.0162


Training Epochs:  18%|█▊        | 9/50 [00:03<00:16,  2.44it/s]

Epoch 9/50, Loss: 0.0123


Training Epochs:  20%|██        | 10/50 [00:04<00:16,  2.45it/s]

Epoch 10/50, Loss: 0.0093


Training Epochs:  22%|██▏       | 11/50 [00:04<00:16,  2.42it/s]

Epoch 11/50, Loss: 0.0074


Training Epochs:  24%|██▍       | 12/50 [00:04<00:15,  2.43it/s]

Epoch 12/50, Loss: 0.0070


Training Epochs:  26%|██▌       | 13/50 [00:05<00:15,  2.40it/s]

Epoch 13/50, Loss: 0.0068


Training Epochs:  28%|██▊       | 14/50 [00:05<00:15,  2.35it/s]

Epoch 14/50, Loss: 0.0067


Training Epochs:  30%|███       | 15/50 [00:06<00:14,  2.39it/s]

Epoch 15/50, Loss: 0.0065


Training Epochs:  32%|███▏      | 16/50 [00:06<00:14,  2.36it/s]

Epoch 16/50, Loss: 0.0064


Training Epochs:  34%|███▍      | 17/50 [00:07<00:13,  2.40it/s]

Epoch 17/50, Loss: 0.0062


Training Epochs:  36%|███▌      | 18/50 [00:07<00:13,  2.43it/s]

Epoch 18/50, Loss: 0.0061


Training Epochs:  38%|███▊      | 19/50 [00:07<00:12,  2.44it/s]

Epoch 19/50, Loss: 0.0060


Training Epochs:  40%|████      | 20/50 [00:08<00:12,  2.46it/s]

Epoch 20/50, Loss: 0.0059


Training Epochs:  42%|████▏     | 21/50 [00:08<00:11,  2.46it/s]

Epoch 21/50, Loss: 0.0057


Training Epochs:  44%|████▍     | 22/50 [00:09<00:11,  2.44it/s]

Epoch 22/50, Loss: 0.0057


Training Epochs:  46%|████▌     | 23/50 [00:09<00:10,  2.47it/s]

Epoch 23/50, Loss: 0.0057


Training Epochs:  48%|████▊     | 24/50 [00:09<00:10,  2.48it/s]

Epoch 24/50, Loss: 0.0056


Training Epochs:  50%|█████     | 25/50 [00:10<00:10,  2.42it/s]

Epoch 25/50, Loss: 0.0056


Training Epochs:  52%|█████▏    | 26/50 [00:10<00:09,  2.43it/s]

Epoch 26/50, Loss: 0.0056


Training Epochs:  54%|█████▍    | 27/50 [00:11<00:09,  2.45it/s]

Epoch 27/50, Loss: 0.0056


Training Epochs:  56%|█████▌    | 28/50 [00:11<00:08,  2.47it/s]

Epoch 28/50, Loss: 0.0056


Training Epochs:  58%|█████▊    | 29/50 [00:11<00:08,  2.48it/s]

Epoch 29/50, Loss: 0.0056


Training Epochs:  60%|██████    | 30/50 [00:12<00:08,  2.40it/s]

Epoch 30/50, Loss: 0.0056


Training Epochs:  62%|██████▏   | 31/50 [00:12<00:07,  2.41it/s]

Epoch 31/50, Loss: 0.0056


Training Epochs:  64%|██████▍   | 32/50 [00:13<00:07,  2.42it/s]

Epoch 32/50, Loss: 0.0056


Training Epochs:  66%|██████▌   | 33/50 [00:13<00:06,  2.43it/s]

Epoch 33/50, Loss: 0.0055


Training Epochs:  68%|██████▊   | 34/50 [00:13<00:06,  2.41it/s]

Epoch 34/50, Loss: 0.0055


Training Epochs:  70%|███████   | 35/50 [00:14<00:06,  2.40it/s]

Epoch 35/50, Loss: 0.0055


Training Epochs:  72%|███████▏  | 36/50 [00:14<00:05,  2.40it/s]

Epoch 36/50, Loss: 0.0055


Training Epochs:  74%|███████▍  | 37/50 [00:15<00:05,  2.37it/s]

Epoch 37/50, Loss: 0.0055


Training Epochs:  76%|███████▌  | 38/50 [00:15<00:04,  2.40it/s]

Epoch 38/50, Loss: 0.0056


Training Epochs:  78%|███████▊  | 39/50 [00:16<00:04,  2.42it/s]

Epoch 39/50, Loss: 0.0055


Training Epochs:  80%|████████  | 40/50 [00:16<00:04,  2.45it/s]

Epoch 40/50, Loss: 0.0055


Training Epochs:  82%|████████▏ | 41/50 [00:16<00:03,  2.40it/s]

Epoch 41/50, Loss: 0.0056


Training Epochs:  84%|████████▍ | 42/50 [00:17<00:03,  2.37it/s]

Epoch 42/50, Loss: 0.0055


Training Epochs:  86%|████████▌ | 43/50 [00:17<00:02,  2.35it/s]

Epoch 43/50, Loss: 0.0055


Training Epochs:  88%|████████▊ | 44/50 [00:18<00:02,  2.37it/s]

Epoch 44/50, Loss: 0.0055


Training Epochs:  90%|█████████ | 45/50 [00:18<00:02,  2.37it/s]

Epoch 45/50, Loss: 0.0055


Training Epochs:  92%|█████████▏| 46/50 [00:19<00:01,  2.38it/s]

Epoch 46/50, Loss: 0.0055


Training Epochs:  94%|█████████▍| 47/50 [00:19<00:01,  2.43it/s]

Epoch 47/50, Loss: 0.0055


Training Epochs:  96%|█████████▌| 48/50 [00:19<00:00,  2.39it/s]

Epoch 48/50, Loss: 0.0056


Training Epochs:  98%|█████████▊| 49/50 [00:20<00:00,  2.39it/s]

Epoch 49/50, Loss: 0.0055


Training Epochs: 100%|██████████| 50/50 [00:20<00:00,  2.42it/s]

Epoch 50/50, Loss: 0.0055





Test Accuracy: 97.39%


In [11]:
# Experiment: Epochs
for num_epochs in epochs_list:
    print(f"\nExperimenting with Epochs: {num_epochs}")
    model = MarkovRNN(input_size, 32, num_layers=1, output_size=output_size, pooling_type='avg')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=num_epochs, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['epochs'][num_epochs] = accuracy


Experimenting with Epochs: 5


Training Epochs:  20%|██        | 1/5 [00:00<00:01,  2.38it/s]

Epoch 1/5, Loss: 0.6502


Training Epochs:  40%|████      | 2/5 [00:00<00:01,  2.48it/s]

Epoch 2/5, Loss: 0.1244


Training Epochs:  60%|██████    | 3/5 [00:01<00:00,  2.44it/s]

Epoch 3/5, Loss: 0.0720


Training Epochs:  80%|████████  | 4/5 [00:01<00:00,  2.48it/s]

Epoch 4/5, Loss: 0.0485


Training Epochs: 100%|██████████| 5/5 [00:02<00:00,  2.45it/s]

Epoch 5/5, Loss: 0.0372





Test Accuracy: 97.25%

Experimenting with Epochs: 50


Training Epochs:   2%|▏         | 1/50 [00:00<00:21,  2.33it/s]

Epoch 1/50, Loss: 0.6536


Training Epochs:   4%|▍         | 2/50 [00:00<00:19,  2.41it/s]

Epoch 2/50, Loss: 0.1237


Training Epochs:   6%|▌         | 3/50 [00:01<00:19,  2.40it/s]

Epoch 3/50, Loss: 0.0720


Training Epochs:   8%|▊         | 4/50 [00:01<00:19,  2.40it/s]

Epoch 4/50, Loss: 0.0487


Training Epochs:  10%|█         | 5/50 [00:02<00:18,  2.41it/s]

Epoch 5/50, Loss: 0.0352


Training Epochs:  12%|█▏        | 6/50 [00:02<00:18,  2.42it/s]

Epoch 6/50, Loss: 0.0247


Training Epochs:  14%|█▍        | 7/50 [00:02<00:17,  2.39it/s]

Epoch 7/50, Loss: 0.0191


Training Epochs:  16%|█▌        | 8/50 [00:03<00:17,  2.43it/s]

Epoch 8/50, Loss: 0.0147


Training Epochs:  18%|█▊        | 9/50 [00:03<00:16,  2.42it/s]

Epoch 9/50, Loss: 0.0120


Training Epochs:  20%|██        | 10/50 [00:04<00:16,  2.44it/s]

Epoch 10/50, Loss: 0.0096


Training Epochs:  22%|██▏       | 11/50 [00:04<00:15,  2.44it/s]

Epoch 11/50, Loss: 0.0075


Training Epochs:  24%|██▍       | 12/50 [00:04<00:15,  2.44it/s]

Epoch 12/50, Loss: 0.0071


Training Epochs:  26%|██▌       | 13/50 [00:05<00:15,  2.40it/s]

Epoch 13/50, Loss: 0.0070


Training Epochs:  28%|██▊       | 14/50 [00:05<00:15,  2.36it/s]

Epoch 14/50, Loss: 0.0068


Training Epochs:  30%|███       | 15/50 [00:06<00:14,  2.36it/s]

Epoch 15/50, Loss: 0.0067


Training Epochs:  32%|███▏      | 16/50 [00:06<00:14,  2.35it/s]

Epoch 16/50, Loss: 0.0067


Training Epochs:  34%|███▍      | 17/50 [00:07<00:14,  2.35it/s]

Epoch 17/50, Loss: 0.0065


Training Epochs:  36%|███▌      | 18/50 [00:07<00:13,  2.32it/s]

Epoch 18/50, Loss: 0.0063


Training Epochs:  38%|███▊      | 19/50 [00:07<00:13,  2.32it/s]

Epoch 19/50, Loss: 0.0062


Training Epochs:  40%|████      | 20/50 [00:08<00:12,  2.37it/s]

Epoch 20/50, Loss: 0.0061


Training Epochs:  42%|████▏     | 21/50 [00:08<00:12,  2.33it/s]

Epoch 21/50, Loss: 0.0059


Training Epochs:  44%|████▍     | 22/50 [00:09<00:11,  2.35it/s]

Epoch 22/50, Loss: 0.0059


Training Epochs:  46%|████▌     | 23/50 [00:09<00:11,  2.35it/s]

Epoch 23/50, Loss: 0.0059


Training Epochs:  48%|████▊     | 24/50 [00:10<00:10,  2.39it/s]

Epoch 24/50, Loss: 0.0059


Training Epochs:  50%|█████     | 25/50 [00:10<00:10,  2.37it/s]

Epoch 25/50, Loss: 0.0058


Training Epochs:  52%|█████▏    | 26/50 [00:10<00:10,  2.40it/s]

Epoch 26/50, Loss: 0.0058


Training Epochs:  54%|█████▍    | 27/50 [00:11<00:09,  2.38it/s]

Epoch 27/50, Loss: 0.0058


Training Epochs:  56%|█████▌    | 28/50 [00:12<00:12,  1.80it/s]

Epoch 28/50, Loss: 0.0058


Training Epochs:  58%|█████▊    | 29/50 [00:12<00:10,  1.95it/s]

Epoch 29/50, Loss: 0.0058


Training Epochs:  60%|██████    | 30/50 [00:13<00:09,  2.08it/s]

Epoch 30/50, Loss: 0.0058


Training Epochs:  62%|██████▏   | 31/50 [00:13<00:08,  2.16it/s]

Epoch 31/50, Loss: 0.0058


Training Epochs:  64%|██████▍   | 32/50 [00:13<00:08,  2.19it/s]

Epoch 32/50, Loss: 0.0058


Training Epochs:  66%|██████▌   | 33/50 [00:14<00:07,  2.27it/s]

Epoch 33/50, Loss: 0.0057


Training Epochs:  68%|██████▊   | 34/50 [00:14<00:06,  2.30it/s]

Epoch 34/50, Loss: 0.0058


Training Epochs:  70%|███████   | 35/50 [00:15<00:06,  2.33it/s]

Epoch 35/50, Loss: 0.0058


Training Epochs:  72%|███████▏  | 36/50 [00:15<00:06,  2.32it/s]

Epoch 36/50, Loss: 0.0057


Training Epochs:  74%|███████▍  | 37/50 [00:15<00:05,  2.35it/s]

Epoch 37/50, Loss: 0.0057


Training Epochs:  76%|███████▌  | 38/50 [00:16<00:05,  2.35it/s]

Epoch 38/50, Loss: 0.0057


Training Epochs:  78%|███████▊  | 39/50 [00:16<00:04,  2.32it/s]

Epoch 39/50, Loss: 0.0057


Training Epochs:  80%|████████  | 40/50 [00:17<00:04,  2.35it/s]

Epoch 40/50, Loss: 0.0057


Training Epochs:  82%|████████▏ | 41/50 [00:17<00:03,  2.32it/s]

Epoch 41/50, Loss: 0.0057


Training Epochs:  84%|████████▍ | 42/50 [00:18<00:03,  2.29it/s]

Epoch 42/50, Loss: 0.0057


Training Epochs:  86%|████████▌ | 43/50 [00:18<00:03,  2.26it/s]

Epoch 43/50, Loss: 0.0057


Training Epochs:  88%|████████▊ | 44/50 [00:19<00:02,  2.24it/s]

Epoch 44/50, Loss: 0.0057


Training Epochs:  90%|█████████ | 45/50 [00:19<00:02,  2.27it/s]

Epoch 45/50, Loss: 0.0057


Training Epochs:  92%|█████████▏| 46/50 [00:19<00:01,  2.30it/s]

Epoch 46/50, Loss: 0.0057


Training Epochs:  94%|█████████▍| 47/50 [00:20<00:01,  2.33it/s]

Epoch 47/50, Loss: 0.0057


Training Epochs:  96%|█████████▌| 48/50 [00:20<00:00,  2.32it/s]

Epoch 48/50, Loss: 0.0057


Training Epochs:  98%|█████████▊| 49/50 [00:21<00:00,  2.32it/s]

Epoch 49/50, Loss: 0.0057


Training Epochs: 100%|██████████| 50/50 [00:21<00:00,  2.31it/s]

Epoch 50/50, Loss: 0.0057





Test Accuracy: 96.60%

Experimenting with Epochs: 100


Training Epochs:   1%|          | 1/100 [00:00<00:40,  2.42it/s]

Epoch 1/100, Loss: 0.6782


Training Epochs:   2%|▏         | 2/100 [00:00<00:42,  2.29it/s]

Epoch 2/100, Loss: 0.1352


Training Epochs:   3%|▎         | 3/100 [00:01<00:42,  2.29it/s]

Epoch 3/100, Loss: 0.0815


Training Epochs:   4%|▍         | 4/100 [00:01<00:41,  2.32it/s]

Epoch 4/100, Loss: 0.0565


Training Epochs:   5%|▌         | 5/100 [00:02<00:40,  2.34it/s]

Epoch 5/100, Loss: 0.0453


Training Epochs:   6%|▌         | 6/100 [00:02<00:39,  2.36it/s]

Epoch 6/100, Loss: 0.0303


Training Epochs:   7%|▋         | 7/100 [00:02<00:39,  2.35it/s]

Epoch 7/100, Loss: 0.0233


Training Epochs:   8%|▊         | 8/100 [00:03<00:39,  2.32it/s]

Epoch 8/100, Loss: 0.0177


Training Epochs:   9%|▉         | 9/100 [00:03<00:39,  2.29it/s]

Epoch 9/100, Loss: 0.0144


Training Epochs:  10%|█         | 10/100 [00:04<00:39,  2.27it/s]

Epoch 10/100, Loss: 0.0114


Training Epochs:  11%|█         | 11/100 [00:04<00:39,  2.28it/s]

Epoch 11/100, Loss: 0.0084


Training Epochs:  12%|█▏        | 12/100 [00:05<00:38,  2.29it/s]

Epoch 12/100, Loss: 0.0079


Training Epochs:  13%|█▎        | 13/100 [00:05<00:37,  2.29it/s]

Epoch 13/100, Loss: 0.0077


Training Epochs:  14%|█▍        | 14/100 [00:06<00:37,  2.32it/s]

Epoch 14/100, Loss: 0.0075


Training Epochs:  15%|█▌        | 15/100 [00:06<00:36,  2.34it/s]

Epoch 15/100, Loss: 0.0074


Training Epochs:  16%|█▌        | 16/100 [00:06<00:36,  2.32it/s]

Epoch 16/100, Loss: 0.0072


Training Epochs:  17%|█▋        | 17/100 [00:07<00:35,  2.34it/s]

Epoch 17/100, Loss: 0.0071


Training Epochs:  18%|█▊        | 18/100 [00:07<00:35,  2.31it/s]

Epoch 18/100, Loss: 0.0069


Training Epochs:  19%|█▉        | 19/100 [00:08<00:35,  2.30it/s]

Epoch 19/100, Loss: 0.0067


Training Epochs:  20%|██        | 20/100 [00:08<00:35,  2.27it/s]

Epoch 20/100, Loss: 0.0067


Training Epochs:  21%|██        | 21/100 [00:09<00:35,  2.24it/s]

Epoch 21/100, Loss: 0.0064


Training Epochs:  22%|██▏       | 22/100 [00:09<00:35,  2.21it/s]

Epoch 22/100, Loss: 0.0064


Training Epochs:  23%|██▎       | 23/100 [00:10<00:34,  2.25it/s]

Epoch 23/100, Loss: 0.0064


Training Epochs:  24%|██▍       | 24/100 [00:10<00:33,  2.28it/s]

Epoch 24/100, Loss: 0.0064


Training Epochs:  25%|██▌       | 25/100 [00:10<00:32,  2.30it/s]

Epoch 25/100, Loss: 0.0064


Training Epochs:  26%|██▌       | 26/100 [00:11<00:31,  2.33it/s]

Epoch 26/100, Loss: 0.0064


Training Epochs:  27%|██▋       | 27/100 [00:11<00:31,  2.31it/s]

Epoch 27/100, Loss: 0.0064


Training Epochs:  28%|██▊       | 28/100 [00:12<00:30,  2.35it/s]

Epoch 28/100, Loss: 0.0063


Training Epochs:  29%|██▉       | 29/100 [00:12<00:30,  2.34it/s]

Epoch 29/100, Loss: 0.0063


Training Epochs:  30%|███       | 30/100 [00:13<00:30,  2.33it/s]

Epoch 30/100, Loss: 0.0063


Training Epochs:  31%|███       | 31/100 [00:13<00:29,  2.33it/s]

Epoch 31/100, Loss: 0.0063


Training Epochs:  32%|███▏      | 32/100 [00:13<00:29,  2.31it/s]

Epoch 32/100, Loss: 0.0062


Training Epochs:  33%|███▎      | 33/100 [00:14<00:29,  2.31it/s]

Epoch 33/100, Loss: 0.0063


Training Epochs:  34%|███▍      | 34/100 [00:14<00:28,  2.32it/s]

Epoch 34/100, Loss: 0.0062


Training Epochs:  35%|███▌      | 35/100 [00:15<00:28,  2.29it/s]

Epoch 35/100, Loss: 0.0063


Training Epochs:  36%|███▌      | 36/100 [00:15<00:27,  2.31it/s]

Epoch 36/100, Loss: 0.0063


Training Epochs:  37%|███▋      | 37/100 [00:16<00:26,  2.34it/s]

Epoch 37/100, Loss: 0.0062


Training Epochs:  38%|███▊      | 38/100 [00:16<00:26,  2.36it/s]

Epoch 38/100, Loss: 0.0062


Training Epochs:  39%|███▉      | 39/100 [00:16<00:25,  2.37it/s]

Epoch 39/100, Loss: 0.0062


Training Epochs:  40%|████      | 40/100 [00:17<00:25,  2.37it/s]

Epoch 40/100, Loss: 0.0062


Training Epochs:  41%|████      | 41/100 [00:17<00:24,  2.37it/s]

Epoch 41/100, Loss: 0.0062


Training Epochs:  42%|████▏     | 42/100 [00:18<00:24,  2.38it/s]

Epoch 42/100, Loss: 0.0062


Training Epochs:  43%|████▎     | 43/100 [00:18<00:24,  2.34it/s]

Epoch 43/100, Loss: 0.0062


Training Epochs:  44%|████▍     | 44/100 [00:18<00:23,  2.37it/s]

Epoch 44/100, Loss: 0.0062


Training Epochs:  45%|████▌     | 45/100 [00:19<00:23,  2.37it/s]

Epoch 45/100, Loss: 0.0062


Training Epochs:  46%|████▌     | 46/100 [00:19<00:22,  2.38it/s]

Epoch 46/100, Loss: 0.0062


Training Epochs:  47%|████▋     | 47/100 [00:20<00:22,  2.34it/s]

Epoch 47/100, Loss: 0.0063


Training Epochs:  48%|████▊     | 48/100 [00:20<00:22,  2.28it/s]

Epoch 48/100, Loss: 0.0062


Training Epochs:  49%|████▉     | 49/100 [00:21<00:22,  2.29it/s]

Epoch 49/100, Loss: 0.0062


Training Epochs:  50%|█████     | 50/100 [00:21<00:22,  2.27it/s]

Epoch 50/100, Loss: 0.0062


Training Epochs:  51%|█████     | 51/100 [00:22<00:21,  2.25it/s]

Epoch 51/100, Loss: 0.0062


Training Epochs:  52%|█████▏    | 52/100 [00:22<00:21,  2.28it/s]

Epoch 52/100, Loss: 0.0062


Training Epochs:  53%|█████▎    | 53/100 [00:22<00:20,  2.32it/s]

Epoch 53/100, Loss: 0.0062


Training Epochs:  54%|█████▍    | 54/100 [00:23<00:20,  2.30it/s]

Epoch 54/100, Loss: 0.0062


Training Epochs:  55%|█████▌    | 55/100 [00:23<00:19,  2.31it/s]

Epoch 55/100, Loss: 0.0062


Training Epochs:  56%|█████▌    | 56/100 [00:24<00:18,  2.34it/s]

Epoch 56/100, Loss: 0.0062


Training Epochs:  57%|█████▋    | 57/100 [00:24<00:18,  2.33it/s]

Epoch 57/100, Loss: 0.0062


Training Epochs:  58%|█████▊    | 58/100 [00:25<00:18,  2.31it/s]

Epoch 58/100, Loss: 0.0062


Training Epochs:  59%|█████▉    | 59/100 [00:25<00:17,  2.33it/s]

Epoch 59/100, Loss: 0.0062


Training Epochs:  60%|██████    | 60/100 [00:25<00:17,  2.30it/s]

Epoch 60/100, Loss: 0.0062


Training Epochs:  61%|██████    | 61/100 [00:26<00:16,  2.32it/s]

Epoch 61/100, Loss: 0.0062


Training Epochs:  62%|██████▏   | 62/100 [00:26<00:16,  2.33it/s]

Epoch 62/100, Loss: 0.0062


Training Epochs:  63%|██████▎   | 63/100 [00:27<00:15,  2.33it/s]

Epoch 63/100, Loss: 0.0062


Training Epochs:  64%|██████▍   | 64/100 [00:27<00:15,  2.35it/s]

Epoch 64/100, Loss: 0.0062


Training Epochs:  65%|██████▌   | 65/100 [00:28<00:14,  2.35it/s]

Epoch 65/100, Loss: 0.0062


Training Epochs:  66%|██████▌   | 66/100 [00:28<00:14,  2.28it/s]

Epoch 66/100, Loss: 0.0062


Training Epochs:  67%|██████▋   | 67/100 [00:28<00:14,  2.30it/s]

Epoch 67/100, Loss: 0.0062


Training Epochs:  68%|██████▊   | 68/100 [00:29<00:14,  2.28it/s]

Epoch 68/100, Loss: 0.0062


Training Epochs:  69%|██████▉   | 69/100 [00:29<00:13,  2.31it/s]

Epoch 69/100, Loss: 0.0062


Training Epochs:  70%|███████   | 70/100 [00:30<00:13,  2.30it/s]

Epoch 70/100, Loss: 0.0062


Training Epochs:  71%|███████   | 71/100 [00:30<00:12,  2.29it/s]

Epoch 71/100, Loss: 0.0062


Training Epochs:  72%|███████▏  | 72/100 [00:31<00:12,  2.28it/s]

Epoch 72/100, Loss: 0.0062


Training Epochs:  73%|███████▎  | 73/100 [00:31<00:11,  2.28it/s]

Epoch 73/100, Loss: 0.0063


Training Epochs:  74%|███████▍  | 74/100 [00:31<00:11,  2.30it/s]

Epoch 74/100, Loss: 0.0062


Training Epochs:  75%|███████▌  | 75/100 [00:32<00:10,  2.28it/s]

Epoch 75/100, Loss: 0.0062


Training Epochs:  76%|███████▌  | 76/100 [00:32<00:10,  2.26it/s]

Epoch 76/100, Loss: 0.0062


Training Epochs:  77%|███████▋  | 77/100 [00:33<00:10,  2.28it/s]

Epoch 77/100, Loss: 0.0062


Training Epochs:  78%|███████▊  | 78/100 [00:33<00:09,  2.27it/s]

Epoch 78/100, Loss: 0.0062


Training Epochs:  79%|███████▉  | 79/100 [00:34<00:09,  2.29it/s]

Epoch 79/100, Loss: 0.0062


Training Epochs:  80%|████████  | 80/100 [00:34<00:08,  2.28it/s]

Epoch 80/100, Loss: 0.0062


Training Epochs:  81%|████████  | 81/100 [00:35<00:08,  2.29it/s]

Epoch 81/100, Loss: 0.0062


Training Epochs:  82%|████████▏ | 82/100 [00:35<00:07,  2.30it/s]

Epoch 82/100, Loss: 0.0062


Training Epochs:  83%|████████▎ | 83/100 [00:35<00:07,  2.28it/s]

Epoch 83/100, Loss: 0.0062


Training Epochs:  84%|████████▍ | 84/100 [00:36<00:06,  2.31it/s]

Epoch 84/100, Loss: 0.0063


Training Epochs:  85%|████████▌ | 85/100 [00:36<00:06,  2.31it/s]

Epoch 85/100, Loss: 0.0062


Training Epochs:  86%|████████▌ | 86/100 [00:37<00:06,  2.27it/s]

Epoch 86/100, Loss: 0.0062


Training Epochs:  87%|████████▋ | 87/100 [00:37<00:05,  2.31it/s]

Epoch 87/100, Loss: 0.0062


Training Epochs:  88%|████████▊ | 88/100 [00:38<00:05,  2.32it/s]

Epoch 88/100, Loss: 0.0062


Training Epochs:  89%|████████▉ | 89/100 [00:38<00:04,  2.33it/s]

Epoch 89/100, Loss: 0.0062


Training Epochs:  90%|█████████ | 90/100 [00:38<00:04,  2.32it/s]

Epoch 90/100, Loss: 0.0063


Training Epochs:  91%|█████████ | 91/100 [00:39<00:03,  2.37it/s]

Epoch 91/100, Loss: 0.0062


Training Epochs:  92%|█████████▏| 92/100 [00:39<00:03,  2.38it/s]

Epoch 92/100, Loss: 0.0062


Training Epochs:  93%|█████████▎| 93/100 [00:40<00:02,  2.39it/s]

Epoch 93/100, Loss: 0.0062


Training Epochs:  94%|█████████▍| 94/100 [00:40<00:02,  2.38it/s]

Epoch 94/100, Loss: 0.0062


Training Epochs:  95%|█████████▌| 95/100 [00:41<00:02,  2.39it/s]

Epoch 95/100, Loss: 0.0062


Training Epochs:  96%|█████████▌| 96/100 [00:41<00:01,  2.36it/s]

Epoch 96/100, Loss: 0.0062


Training Epochs:  97%|█████████▋| 97/100 [00:41<00:01,  2.33it/s]

Epoch 97/100, Loss: 0.0062


Training Epochs:  98%|█████████▊| 98/100 [00:42<00:00,  2.29it/s]

Epoch 98/100, Loss: 0.0063


Training Epochs:  99%|█████████▉| 99/100 [00:42<00:00,  2.34it/s]

Epoch 99/100, Loss: 0.0062


Training Epochs: 100%|██████████| 100/100 [00:43<00:00,  2.32it/s]

Epoch 100/100, Loss: 0.0062





Test Accuracy: 96.86%

Experimenting with Epochs: 250


Training Epochs:   0%|          | 1/250 [00:00<01:44,  2.38it/s]

Epoch 1/250, Loss: 0.6535


Training Epochs:   1%|          | 2/250 [00:00<01:48,  2.29it/s]

Epoch 2/250, Loss: 0.1215


Training Epochs:   1%|          | 3/250 [00:01<01:48,  2.27it/s]

Epoch 3/250, Loss: 0.0708


Training Epochs:   2%|▏         | 4/250 [00:01<01:48,  2.26it/s]

Epoch 4/250, Loss: 0.0514


Training Epochs:   2%|▏         | 5/250 [00:02<01:49,  2.24it/s]

Epoch 5/250, Loss: 0.0353


Training Epochs:   2%|▏         | 6/250 [00:02<01:50,  2.21it/s]

Epoch 6/250, Loss: 0.0260


Training Epochs:   3%|▎         | 7/250 [00:03<01:50,  2.19it/s]

Epoch 7/250, Loss: 0.0205


Training Epochs:   3%|▎         | 8/250 [00:03<01:49,  2.21it/s]

Epoch 8/250, Loss: 0.0171


Training Epochs:   4%|▎         | 9/250 [00:04<01:48,  2.21it/s]

Epoch 9/250, Loss: 0.0153


Training Epochs:   4%|▍         | 10/250 [00:04<01:46,  2.25it/s]

Epoch 10/250, Loss: 0.0102


Training Epochs:   4%|▍         | 11/250 [00:04<01:46,  2.24it/s]

Epoch 11/250, Loss: 0.0077


Training Epochs:   5%|▍         | 12/250 [00:05<01:45,  2.25it/s]

Epoch 12/250, Loss: 0.0071


Training Epochs:   5%|▌         | 13/250 [00:05<01:44,  2.26it/s]

Epoch 13/250, Loss: 0.0068


Training Epochs:   6%|▌         | 14/250 [00:06<01:43,  2.28it/s]

Epoch 14/250, Loss: 0.0067


Training Epochs:   6%|▌         | 15/250 [00:06<01:42,  2.29it/s]

Epoch 15/250, Loss: 0.0065


Training Epochs:   6%|▋         | 16/250 [00:07<01:41,  2.30it/s]

Epoch 16/250, Loss: 0.0064


Training Epochs:   7%|▋         | 17/250 [00:07<01:41,  2.29it/s]

Epoch 17/250, Loss: 0.0062


Training Epochs:   7%|▋         | 18/250 [00:07<01:41,  2.28it/s]

Epoch 18/250, Loss: 0.0061


Training Epochs:   8%|▊         | 19/250 [00:08<01:41,  2.27it/s]

Epoch 19/250, Loss: 0.0060


Training Epochs:   8%|▊         | 20/250 [00:08<01:40,  2.29it/s]

Epoch 20/250, Loss: 0.0059


Training Epochs:   8%|▊         | 21/250 [00:09<01:40,  2.28it/s]

Epoch 21/250, Loss: 0.0057


Training Epochs:   9%|▉         | 22/250 [00:09<01:39,  2.29it/s]

Epoch 22/250, Loss: 0.0057


Training Epochs:   9%|▉         | 23/250 [00:10<01:38,  2.30it/s]

Epoch 23/250, Loss: 0.0057


Training Epochs:  10%|▉         | 24/250 [00:10<01:38,  2.29it/s]

Epoch 24/250, Loss: 0.0057


Training Epochs:  10%|█         | 25/250 [00:11<01:39,  2.26it/s]

Epoch 25/250, Loss: 0.0056


Training Epochs:  10%|█         | 26/250 [00:11<01:38,  2.27it/s]

Epoch 26/250, Loss: 0.0056


Training Epochs:  11%|█         | 27/250 [00:11<01:37,  2.28it/s]

Epoch 27/250, Loss: 0.0056


Training Epochs:  11%|█         | 28/250 [00:12<01:38,  2.24it/s]

Epoch 28/250, Loss: 0.0056


Training Epochs:  12%|█▏        | 29/250 [00:12<01:38,  2.23it/s]

Epoch 29/250, Loss: 0.0056


Training Epochs:  12%|█▏        | 30/250 [00:13<01:40,  2.20it/s]

Epoch 30/250, Loss: 0.0056


Training Epochs:  12%|█▏        | 31/250 [00:13<01:40,  2.18it/s]

Epoch 31/250, Loss: 0.0055


Training Epochs:  13%|█▎        | 32/250 [00:14<01:40,  2.16it/s]

Epoch 32/250, Loss: 0.0056


Training Epochs:  13%|█▎        | 33/250 [00:14<01:40,  2.16it/s]

Epoch 33/250, Loss: 0.0056


Training Epochs:  14%|█▎        | 34/250 [00:15<01:37,  2.22it/s]

Epoch 34/250, Loss: 0.0055


Training Epochs:  14%|█▍        | 35/250 [00:15<01:37,  2.21it/s]

Epoch 35/250, Loss: 0.0056


Training Epochs:  14%|█▍        | 36/250 [00:16<01:35,  2.23it/s]

Epoch 36/250, Loss: 0.0055


Training Epochs:  15%|█▍        | 37/250 [00:16<01:34,  2.25it/s]

Epoch 37/250, Loss: 0.0055


Training Epochs:  15%|█▌        | 38/250 [00:16<01:35,  2.23it/s]

Epoch 38/250, Loss: 0.0055


Training Epochs:  16%|█▌        | 39/250 [00:17<01:32,  2.27it/s]

Epoch 39/250, Loss: 0.0055


Training Epochs:  16%|█▌        | 40/250 [00:17<01:32,  2.27it/s]

Epoch 40/250, Loss: 0.0055


Training Epochs:  16%|█▋        | 41/250 [00:18<01:32,  2.27it/s]

Epoch 41/250, Loss: 0.0055


Training Epochs:  17%|█▋        | 42/250 [00:18<01:32,  2.26it/s]

Epoch 42/250, Loss: 0.0056


Training Epochs:  17%|█▋        | 43/250 [00:19<01:32,  2.23it/s]

Epoch 43/250, Loss: 0.0055


Training Epochs:  18%|█▊        | 44/250 [00:19<01:32,  2.22it/s]

Epoch 44/250, Loss: 0.0056


Training Epochs:  18%|█▊        | 45/250 [00:20<01:31,  2.23it/s]

Epoch 45/250, Loss: 0.0056


Training Epochs:  18%|█▊        | 46/250 [00:20<01:31,  2.23it/s]

Epoch 46/250, Loss: 0.0055


Training Epochs:  19%|█▉        | 47/250 [00:20<01:30,  2.24it/s]

Epoch 47/250, Loss: 0.0055


Training Epochs:  19%|█▉        | 48/250 [00:21<01:30,  2.24it/s]

Epoch 48/250, Loss: 0.0055


Training Epochs:  20%|█▉        | 49/250 [00:21<01:29,  2.24it/s]

Epoch 49/250, Loss: 0.0055


Training Epochs:  20%|██        | 50/250 [00:22<01:27,  2.27it/s]

Epoch 50/250, Loss: 0.0055


Training Epochs:  20%|██        | 51/250 [00:22<01:26,  2.31it/s]

Epoch 51/250, Loss: 0.0055


Training Epochs:  21%|██        | 52/250 [00:23<01:25,  2.32it/s]

Epoch 52/250, Loss: 0.0055


Training Epochs:  21%|██        | 53/250 [00:23<01:26,  2.28it/s]

Epoch 53/250, Loss: 0.0055


Training Epochs:  22%|██▏       | 54/250 [00:23<01:25,  2.29it/s]

Epoch 54/250, Loss: 0.0055


Training Epochs:  22%|██▏       | 55/250 [00:24<01:24,  2.30it/s]

Epoch 55/250, Loss: 0.0055


Training Epochs:  22%|██▏       | 56/250 [00:24<01:25,  2.26it/s]

Epoch 56/250, Loss: 0.0055


Training Epochs:  23%|██▎       | 57/250 [00:25<01:26,  2.23it/s]

Epoch 57/250, Loss: 0.0055


Training Epochs:  23%|██▎       | 58/250 [00:25<01:27,  2.19it/s]

Epoch 58/250, Loss: 0.0056


Training Epochs:  24%|██▎       | 59/250 [00:26<01:27,  2.17it/s]

Epoch 59/250, Loss: 0.0056


Training Epochs:  24%|██▍       | 60/250 [00:26<01:27,  2.16it/s]

Epoch 60/250, Loss: 0.0055


Training Epochs:  24%|██▍       | 61/250 [00:27<01:26,  2.19it/s]

Epoch 61/250, Loss: 0.0056


Training Epochs:  25%|██▍       | 62/250 [00:27<01:26,  2.17it/s]

Epoch 62/250, Loss: 0.0055


Training Epochs:  25%|██▌       | 63/250 [00:28<01:24,  2.22it/s]

Epoch 63/250, Loss: 0.0056


Training Epochs:  26%|██▌       | 64/250 [00:28<01:23,  2.23it/s]

Epoch 64/250, Loss: 0.0055


Training Epochs:  26%|██▌       | 65/250 [00:28<01:21,  2.27it/s]

Epoch 65/250, Loss: 0.0055


Training Epochs:  26%|██▋       | 66/250 [00:29<01:22,  2.24it/s]

Epoch 66/250, Loss: 0.0055


Training Epochs:  27%|██▋       | 67/250 [00:29<01:21,  2.25it/s]

Epoch 67/250, Loss: 0.0055


Training Epochs:  27%|██▋       | 68/250 [00:30<01:21,  2.23it/s]

Epoch 68/250, Loss: 0.0056


Training Epochs:  28%|██▊       | 69/250 [00:30<01:19,  2.28it/s]

Epoch 69/250, Loss: 0.0055


Training Epochs:  28%|██▊       | 70/250 [00:31<01:19,  2.26it/s]

Epoch 70/250, Loss: 0.0056


Training Epochs:  28%|██▊       | 71/250 [00:31<01:19,  2.26it/s]

Epoch 71/250, Loss: 0.0055


Training Epochs:  29%|██▉       | 72/250 [00:32<01:18,  2.28it/s]

Epoch 72/250, Loss: 0.0056


Training Epochs:  29%|██▉       | 73/250 [00:32<01:17,  2.30it/s]

Epoch 73/250, Loss: 0.0056


Training Epochs:  30%|██▉       | 74/250 [00:32<01:16,  2.29it/s]

Epoch 74/250, Loss: 0.0055


Training Epochs:  30%|███       | 75/250 [00:33<01:16,  2.28it/s]

Epoch 75/250, Loss: 0.0055


Training Epochs:  30%|███       | 76/250 [00:33<01:16,  2.26it/s]

Epoch 76/250, Loss: 0.0055


Training Epochs:  31%|███       | 77/250 [00:34<01:15,  2.28it/s]

Epoch 77/250, Loss: 0.0055


Training Epochs:  31%|███       | 78/250 [00:34<01:15,  2.29it/s]

Epoch 78/250, Loss: 0.0055


Training Epochs:  32%|███▏      | 79/250 [00:35<01:14,  2.30it/s]

Epoch 79/250, Loss: 0.0056


Training Epochs:  32%|███▏      | 80/250 [00:35<01:14,  2.27it/s]

Epoch 80/250, Loss: 0.0055


Training Epochs:  32%|███▏      | 81/250 [00:36<01:15,  2.24it/s]

Epoch 81/250, Loss: 0.0055


Training Epochs:  33%|███▎      | 82/250 [00:36<01:14,  2.24it/s]

Epoch 82/250, Loss: 0.0055


Training Epochs:  33%|███▎      | 83/250 [00:36<01:15,  2.20it/s]

Epoch 83/250, Loss: 0.0055


Training Epochs:  34%|███▎      | 84/250 [00:37<01:16,  2.17it/s]

Epoch 84/250, Loss: 0.0055


Training Epochs:  34%|███▍      | 85/250 [00:37<01:17,  2.14it/s]

Epoch 85/250, Loss: 0.0056


Training Epochs:  34%|███▍      | 86/250 [00:38<01:16,  2.14it/s]

Epoch 86/250, Loss: 0.0055


Training Epochs:  35%|███▍      | 87/250 [00:38<01:15,  2.16it/s]

Epoch 87/250, Loss: 0.0055


Training Epochs:  35%|███▌      | 88/250 [00:39<01:13,  2.21it/s]

Epoch 88/250, Loss: 0.0055


Training Epochs:  36%|███▌      | 89/250 [00:39<01:12,  2.23it/s]

Epoch 89/250, Loss: 0.0055


Training Epochs:  36%|███▌      | 90/250 [00:40<01:11,  2.25it/s]

Epoch 90/250, Loss: 0.0055


Training Epochs:  36%|███▋      | 91/250 [00:40<01:11,  2.23it/s]

Epoch 91/250, Loss: 0.0055


Training Epochs:  37%|███▋      | 92/250 [00:40<01:09,  2.26it/s]

Epoch 92/250, Loss: 0.0055


Training Epochs:  37%|███▋      | 93/250 [00:41<01:08,  2.28it/s]

Epoch 93/250, Loss: 0.0055


Training Epochs:  38%|███▊      | 94/250 [00:41<01:09,  2.23it/s]

Epoch 94/250, Loss: 0.0055


Training Epochs:  38%|███▊      | 95/250 [00:42<01:07,  2.30it/s]

Epoch 95/250, Loss: 0.0055


Training Epochs:  38%|███▊      | 96/250 [00:42<01:06,  2.31it/s]

Epoch 96/250, Loss: 0.0055


Training Epochs:  39%|███▉      | 97/250 [00:43<01:05,  2.32it/s]

Epoch 97/250, Loss: 0.0055


Training Epochs:  39%|███▉      | 98/250 [00:43<01:06,  2.29it/s]

Epoch 98/250, Loss: 0.0055


Training Epochs:  40%|███▉      | 99/250 [00:44<01:06,  2.27it/s]

Epoch 99/250, Loss: 0.0055


Training Epochs:  40%|████      | 100/250 [00:44<01:05,  2.28it/s]

Epoch 100/250, Loss: 0.0055


Training Epochs:  40%|████      | 101/250 [00:44<01:06,  2.24it/s]

Epoch 101/250, Loss: 0.0055


Training Epochs:  41%|████      | 102/250 [00:45<01:06,  2.24it/s]

Epoch 102/250, Loss: 0.0055


Training Epochs:  41%|████      | 103/250 [00:45<01:05,  2.26it/s]

Epoch 103/250, Loss: 0.0055


Training Epochs:  42%|████▏     | 104/250 [00:46<01:06,  2.19it/s]

Epoch 104/250, Loss: 0.0055


Training Epochs:  42%|████▏     | 105/250 [00:46<01:05,  2.21it/s]

Epoch 105/250, Loss: 0.0055


Training Epochs:  42%|████▏     | 106/250 [00:47<01:05,  2.21it/s]

Epoch 106/250, Loss: 0.0055


Training Epochs:  43%|████▎     | 107/250 [00:47<01:03,  2.25it/s]

Epoch 107/250, Loss: 0.0055


Training Epochs:  43%|████▎     | 108/250 [00:48<01:03,  2.25it/s]

Epoch 108/250, Loss: 0.0056


Training Epochs:  44%|████▎     | 109/250 [00:48<01:03,  2.23it/s]

Epoch 109/250, Loss: 0.0055


Training Epochs:  44%|████▍     | 110/250 [00:49<01:03,  2.19it/s]

Epoch 110/250, Loss: 0.0055


Training Epochs:  44%|████▍     | 111/250 [00:49<01:04,  2.14it/s]

Epoch 111/250, Loss: 0.0055


Training Epochs:  45%|████▍     | 112/250 [00:49<01:04,  2.15it/s]

Epoch 112/250, Loss: 0.0055


Training Epochs:  45%|████▌     | 113/250 [00:50<01:03,  2.15it/s]

Epoch 113/250, Loss: 0.0055


Training Epochs:  46%|████▌     | 114/250 [00:50<01:03,  2.16it/s]

Epoch 114/250, Loss: 0.0055


Training Epochs:  46%|████▌     | 115/250 [00:51<01:02,  2.17it/s]

Epoch 115/250, Loss: 0.0055


Training Epochs:  46%|████▋     | 116/250 [00:51<01:00,  2.22it/s]

Epoch 116/250, Loss: 0.0055


Training Epochs:  47%|████▋     | 117/250 [00:52<00:59,  2.25it/s]

Epoch 117/250, Loss: 0.0055


Training Epochs:  47%|████▋     | 118/250 [00:52<00:58,  2.26it/s]

Epoch 118/250, Loss: 0.0055


Training Epochs:  48%|████▊     | 119/250 [00:53<00:57,  2.28it/s]

Epoch 119/250, Loss: 0.0055


Training Epochs:  48%|████▊     | 120/250 [00:53<00:57,  2.27it/s]

Epoch 120/250, Loss: 0.0055


Training Epochs:  48%|████▊     | 121/250 [00:53<00:57,  2.26it/s]

Epoch 121/250, Loss: 0.0055


Training Epochs:  49%|████▉     | 122/250 [00:54<00:57,  2.24it/s]

Epoch 122/250, Loss: 0.0055


Training Epochs:  49%|████▉     | 123/250 [00:54<00:57,  2.22it/s]

Epoch 123/250, Loss: 0.0055


Training Epochs:  50%|████▉     | 124/250 [00:55<00:56,  2.23it/s]

Epoch 124/250, Loss: 0.0055


Training Epochs:  50%|█████     | 125/250 [00:55<00:55,  2.23it/s]

Epoch 125/250, Loss: 0.0055


Training Epochs:  50%|█████     | 126/250 [00:56<00:55,  2.21it/s]

Epoch 126/250, Loss: 0.0056


Training Epochs:  51%|█████     | 127/250 [00:56<00:54,  2.24it/s]

Epoch 127/250, Loss: 0.0056


Training Epochs:  51%|█████     | 128/250 [00:57<00:54,  2.25it/s]

Epoch 128/250, Loss: 0.0055


Training Epochs:  52%|█████▏    | 129/250 [00:57<00:53,  2.26it/s]

Epoch 129/250, Loss: 0.0056


Training Epochs:  52%|█████▏    | 130/250 [00:57<00:53,  2.25it/s]

Epoch 130/250, Loss: 0.0055


Training Epochs:  52%|█████▏    | 131/250 [00:58<00:53,  2.24it/s]

Epoch 131/250, Loss: 0.0055


Training Epochs:  53%|█████▎    | 132/250 [00:58<00:52,  2.25it/s]

Epoch 132/250, Loss: 0.0055


Training Epochs:  53%|█████▎    | 133/250 [00:59<00:52,  2.25it/s]

Epoch 133/250, Loss: 0.0055


Training Epochs:  54%|█████▎    | 134/250 [00:59<00:51,  2.27it/s]

Epoch 134/250, Loss: 0.0055


Training Epochs:  54%|█████▍    | 135/250 [01:00<00:50,  2.29it/s]

Epoch 135/250, Loss: 0.0055


Training Epochs:  54%|█████▍    | 136/250 [01:00<00:49,  2.28it/s]

Epoch 136/250, Loss: 0.0055


Training Epochs:  55%|█████▍    | 137/250 [01:01<00:50,  2.25it/s]

Epoch 137/250, Loss: 0.0055


Training Epochs:  55%|█████▌    | 138/250 [01:01<00:51,  2.18it/s]

Epoch 138/250, Loss: 0.0055


Training Epochs:  56%|█████▌    | 139/250 [01:02<00:51,  2.15it/s]

Epoch 139/250, Loss: 0.0055


Training Epochs:  56%|█████▌    | 140/250 [01:02<00:51,  2.12it/s]

Epoch 140/250, Loss: 0.0055


Training Epochs:  56%|█████▋    | 141/250 [01:03<00:51,  2.13it/s]

Epoch 141/250, Loss: 0.0055


Training Epochs:  57%|█████▋    | 142/250 [01:03<00:50,  2.15it/s]

Epoch 142/250, Loss: 0.0055


Training Epochs:  57%|█████▋    | 143/250 [01:03<00:49,  2.18it/s]

Epoch 143/250, Loss: 0.0055


Training Epochs:  58%|█████▊    | 144/250 [01:04<00:48,  2.19it/s]

Epoch 144/250, Loss: 0.0056


Training Epochs:  58%|█████▊    | 145/250 [01:04<00:48,  2.18it/s]

Epoch 145/250, Loss: 0.0055


Training Epochs:  58%|█████▊    | 146/250 [01:05<00:47,  2.17it/s]

Epoch 146/250, Loss: 0.0055


Training Epochs:  59%|█████▉    | 147/250 [01:05<00:47,  2.19it/s]

Epoch 147/250, Loss: 0.0055


Training Epochs:  59%|█████▉    | 148/250 [01:06<00:46,  2.18it/s]

Epoch 148/250, Loss: 0.0056


Training Epochs:  60%|█████▉    | 149/250 [01:06<00:45,  2.21it/s]

Epoch 149/250, Loss: 0.0055


Training Epochs:  60%|██████    | 150/250 [01:07<00:45,  2.22it/s]

Epoch 150/250, Loss: 0.0055


Training Epochs:  60%|██████    | 151/250 [01:07<00:45,  2.20it/s]

Epoch 151/250, Loss: 0.0055


Training Epochs:  61%|██████    | 152/250 [01:07<00:44,  2.21it/s]

Epoch 152/250, Loss: 0.0055


Training Epochs:  61%|██████    | 153/250 [01:08<00:44,  2.20it/s]

Epoch 153/250, Loss: 0.0055


Training Epochs:  62%|██████▏   | 154/250 [01:08<00:43,  2.22it/s]

Epoch 154/250, Loss: 0.0055


Training Epochs:  62%|██████▏   | 155/250 [01:09<00:43,  2.20it/s]

Epoch 155/250, Loss: 0.0055


Training Epochs:  62%|██████▏   | 156/250 [01:09<00:42,  2.20it/s]

Epoch 156/250, Loss: 0.0055


Training Epochs:  63%|██████▎   | 157/250 [01:10<00:42,  2.17it/s]

Epoch 157/250, Loss: 0.0055


Training Epochs:  63%|██████▎   | 158/250 [01:10<00:41,  2.20it/s]

Epoch 158/250, Loss: 0.0055


Training Epochs:  64%|██████▎   | 159/250 [01:11<00:41,  2.18it/s]

Epoch 159/250, Loss: 0.0055


Training Epochs:  64%|██████▍   | 160/250 [01:11<00:40,  2.21it/s]

Epoch 160/250, Loss: 0.0055


Training Epochs:  64%|██████▍   | 161/250 [01:12<00:40,  2.20it/s]

Epoch 161/250, Loss: 0.0055


Training Epochs:  65%|██████▍   | 162/250 [01:12<00:39,  2.21it/s]

Epoch 162/250, Loss: 0.0055


Training Epochs:  65%|██████▌   | 163/250 [01:13<00:40,  2.16it/s]

Epoch 163/250, Loss: 0.0055


Training Epochs:  66%|██████▌   | 164/250 [01:13<00:39,  2.17it/s]

Epoch 164/250, Loss: 0.0055


Training Epochs:  66%|██████▌   | 165/250 [01:13<00:39,  2.15it/s]

Epoch 165/250, Loss: 0.0056


Training Epochs:  66%|██████▋   | 166/250 [01:14<00:39,  2.11it/s]

Epoch 166/250, Loss: 0.0055


Training Epochs:  67%|██████▋   | 167/250 [01:14<00:39,  2.12it/s]

Epoch 167/250, Loss: 0.0055


Training Epochs:  67%|██████▋   | 168/250 [01:15<00:38,  2.15it/s]

Epoch 168/250, Loss: 0.0055


Training Epochs:  68%|██████▊   | 169/250 [01:15<00:37,  2.18it/s]

Epoch 169/250, Loss: 0.0056


Training Epochs:  68%|██████▊   | 170/250 [01:16<00:36,  2.19it/s]

Epoch 170/250, Loss: 0.0055


Training Epochs:  68%|██████▊   | 171/250 [01:16<00:36,  2.19it/s]

Epoch 171/250, Loss: 0.0056


Training Epochs:  69%|██████▉   | 172/250 [01:17<00:35,  2.20it/s]

Epoch 172/250, Loss: 0.0055


Training Epochs:  69%|██████▉   | 173/250 [01:17<00:35,  2.19it/s]

Epoch 173/250, Loss: 0.0055


Training Epochs:  70%|██████▉   | 174/250 [01:18<00:33,  2.24it/s]

Epoch 174/250, Loss: 0.0055


Training Epochs:  70%|███████   | 175/250 [01:18<00:34,  2.20it/s]

Epoch 175/250, Loss: 0.0055


Training Epochs:  70%|███████   | 176/250 [01:18<00:33,  2.19it/s]

Epoch 176/250, Loss: 0.0055


Training Epochs:  71%|███████   | 177/250 [01:19<00:32,  2.23it/s]

Epoch 177/250, Loss: 0.0055


Training Epochs:  71%|███████   | 178/250 [01:19<00:32,  2.22it/s]

Epoch 178/250, Loss: 0.0055


Training Epochs:  72%|███████▏  | 179/250 [01:20<00:31,  2.22it/s]

Epoch 179/250, Loss: 0.0055


Training Epochs:  72%|███████▏  | 180/250 [01:20<00:31,  2.19it/s]

Epoch 180/250, Loss: 0.0055


Training Epochs:  72%|███████▏  | 181/250 [01:21<00:31,  2.21it/s]

Epoch 181/250, Loss: 0.0055


Training Epochs:  73%|███████▎  | 182/250 [01:21<00:30,  2.20it/s]

Epoch 182/250, Loss: 0.0056


Training Epochs:  73%|███████▎  | 183/250 [01:22<00:30,  2.22it/s]

Epoch 183/250, Loss: 0.0055


Training Epochs:  74%|███████▎  | 184/250 [01:22<00:30,  2.19it/s]

Epoch 184/250, Loss: 0.0055


Training Epochs:  74%|███████▍  | 185/250 [01:23<00:29,  2.17it/s]

Epoch 185/250, Loss: 0.0055


Training Epochs:  74%|███████▍  | 186/250 [01:23<00:29,  2.17it/s]

Epoch 186/250, Loss: 0.0055


Training Epochs:  75%|███████▍  | 187/250 [01:24<00:29,  2.17it/s]

Epoch 187/250, Loss: 0.0055


Training Epochs:  75%|███████▌  | 188/250 [01:24<00:28,  2.17it/s]

Epoch 188/250, Loss: 0.0055


Training Epochs:  76%|███████▌  | 189/250 [01:24<00:27,  2.18it/s]

Epoch 189/250, Loss: 0.0055


Training Epochs:  76%|███████▌  | 190/250 [01:25<00:27,  2.17it/s]

Epoch 190/250, Loss: 0.0055


Training Epochs:  76%|███████▋  | 191/250 [01:25<00:27,  2.15it/s]

Epoch 191/250, Loss: 0.0055


Training Epochs:  77%|███████▋  | 192/250 [01:26<00:27,  2.11it/s]

Epoch 192/250, Loss: 0.0055


Training Epochs:  77%|███████▋  | 193/250 [01:26<00:27,  2.07it/s]

Epoch 193/250, Loss: 0.0055


Training Epochs:  78%|███████▊  | 194/250 [01:27<00:26,  2.10it/s]

Epoch 194/250, Loss: 0.0055


Training Epochs:  78%|███████▊  | 195/250 [01:27<00:25,  2.17it/s]

Epoch 195/250, Loss: 0.0055


Training Epochs:  78%|███████▊  | 196/250 [01:28<00:24,  2.20it/s]

Epoch 196/250, Loss: 0.0055


Training Epochs:  79%|███████▉  | 197/250 [01:28<00:24,  2.20it/s]

Epoch 197/250, Loss: 0.0055


Training Epochs:  79%|███████▉  | 198/250 [01:29<00:23,  2.17it/s]

Epoch 198/250, Loss: 0.0055


Training Epochs:  80%|███████▉  | 199/250 [01:29<00:23,  2.17it/s]

Epoch 199/250, Loss: 0.0055


Training Epochs:  80%|████████  | 200/250 [01:30<00:23,  2.16it/s]

Epoch 200/250, Loss: 0.0055


Training Epochs:  80%|████████  | 201/250 [01:30<00:22,  2.18it/s]

Epoch 201/250, Loss: 0.0055


Training Epochs:  81%|████████  | 202/250 [01:30<00:22,  2.18it/s]

Epoch 202/250, Loss: 0.0055


Training Epochs:  81%|████████  | 203/250 [01:31<00:21,  2.21it/s]

Epoch 203/250, Loss: 0.0055


Training Epochs:  82%|████████▏ | 204/250 [01:31<00:20,  2.20it/s]

Epoch 204/250, Loss: 0.0055


Training Epochs:  82%|████████▏ | 205/250 [01:32<00:20,  2.21it/s]

Epoch 205/250, Loss: 0.0056


Training Epochs:  82%|████████▏ | 206/250 [01:32<00:20,  2.19it/s]

Epoch 206/250, Loss: 0.0055


Training Epochs:  83%|████████▎ | 207/250 [01:33<00:19,  2.17it/s]

Epoch 207/250, Loss: 0.0055


Training Epochs:  83%|████████▎ | 208/250 [01:33<00:19,  2.15it/s]

Epoch 208/250, Loss: 0.0055


Training Epochs:  84%|████████▎ | 209/250 [01:34<00:18,  2.18it/s]

Epoch 209/250, Loss: 0.0055


Training Epochs:  84%|████████▍ | 210/250 [01:34<00:18,  2.16it/s]

Epoch 210/250, Loss: 0.0055


Training Epochs:  84%|████████▍ | 211/250 [01:35<00:17,  2.17it/s]

Epoch 211/250, Loss: 0.0055


Training Epochs:  85%|████████▍ | 212/250 [01:35<00:17,  2.15it/s]

Epoch 212/250, Loss: 0.0055


Training Epochs:  85%|████████▌ | 213/250 [01:36<00:17,  2.15it/s]

Epoch 213/250, Loss: 0.0055


Training Epochs:  86%|████████▌ | 214/250 [01:36<00:16,  2.15it/s]

Epoch 214/250, Loss: 0.0056


Training Epochs:  86%|████████▌ | 215/250 [01:36<00:16,  2.16it/s]

Epoch 215/250, Loss: 0.0055


Training Epochs:  86%|████████▋ | 216/250 [01:37<00:15,  2.16it/s]

Epoch 216/250, Loss: 0.0056


Training Epochs:  87%|████████▋ | 217/250 [01:37<00:15,  2.10it/s]

Epoch 217/250, Loss: 0.0056


Training Epochs:  87%|████████▋ | 218/250 [01:38<00:15,  2.07it/s]

Epoch 218/250, Loss: 0.0055


Training Epochs:  88%|████████▊ | 219/250 [01:38<00:15,  2.05it/s]

Epoch 219/250, Loss: 0.0055


Training Epochs:  88%|████████▊ | 220/250 [01:39<00:14,  2.08it/s]

Epoch 220/250, Loss: 0.0055


Training Epochs:  88%|████████▊ | 221/250 [01:39<00:13,  2.12it/s]

Epoch 221/250, Loss: 0.0055


Training Epochs:  89%|████████▉ | 222/250 [01:40<00:13,  2.13it/s]

Epoch 222/250, Loss: 0.0055


Training Epochs:  89%|████████▉ | 223/250 [01:40<00:12,  2.14it/s]

Epoch 223/250, Loss: 0.0055


Training Epochs:  90%|████████▉ | 224/250 [01:41<00:12,  2.15it/s]

Epoch 224/250, Loss: 0.0055


Training Epochs:  90%|█████████ | 225/250 [01:41<00:11,  2.14it/s]

Epoch 225/250, Loss: 0.0055


Training Epochs:  90%|█████████ | 226/250 [01:42<00:11,  2.17it/s]

Epoch 226/250, Loss: 0.0055


Training Epochs:  91%|█████████ | 227/250 [01:42<00:10,  2.15it/s]

Epoch 227/250, Loss: 0.0055


Training Epochs:  91%|█████████ | 228/250 [01:43<00:10,  2.14it/s]

Epoch 228/250, Loss: 0.0055


Training Epochs:  92%|█████████▏| 229/250 [01:43<00:09,  2.12it/s]

Epoch 229/250, Loss: 0.0055


Training Epochs:  92%|█████████▏| 230/250 [01:44<00:09,  2.15it/s]

Epoch 230/250, Loss: 0.0055


Training Epochs:  92%|█████████▏| 231/250 [01:44<00:08,  2.12it/s]

Epoch 231/250, Loss: 0.0056


Training Epochs:  93%|█████████▎| 232/250 [01:44<00:08,  2.12it/s]

Epoch 232/250, Loss: 0.0055


Training Epochs:  93%|█████████▎| 233/250 [01:45<00:07,  2.18it/s]

Epoch 233/250, Loss: 0.0055


Training Epochs:  94%|█████████▎| 234/250 [01:45<00:07,  2.18it/s]

Epoch 234/250, Loss: 0.0055


Training Epochs:  94%|█████████▍| 235/250 [01:46<00:06,  2.18it/s]

Epoch 235/250, Loss: 0.0055


Training Epochs:  94%|█████████▍| 236/250 [01:46<00:06,  2.19it/s]

Epoch 236/250, Loss: 0.0055


Training Epochs:  95%|█████████▍| 237/250 [01:47<00:05,  2.17it/s]

Epoch 237/250, Loss: 0.0055


Training Epochs:  95%|█████████▌| 238/250 [01:47<00:05,  2.17it/s]

Epoch 238/250, Loss: 0.0055


Training Epochs:  96%|█████████▌| 239/250 [01:48<00:05,  2.14it/s]

Epoch 239/250, Loss: 0.0055


Training Epochs:  96%|█████████▌| 240/250 [01:48<00:04,  2.13it/s]

Epoch 240/250, Loss: 0.0055


Training Epochs:  96%|█████████▋| 241/250 [01:49<00:04,  2.14it/s]

Epoch 241/250, Loss: 0.0056


Training Epochs:  97%|█████████▋| 242/250 [01:49<00:03,  2.15it/s]

Epoch 242/250, Loss: 0.0055


Training Epochs:  97%|█████████▋| 243/250 [01:50<00:03,  2.13it/s]

Epoch 243/250, Loss: 0.0055


Training Epochs:  98%|█████████▊| 244/250 [01:50<00:02,  2.12it/s]

Epoch 244/250, Loss: 0.0055


Training Epochs:  98%|█████████▊| 245/250 [01:51<00:02,  2.10it/s]

Epoch 245/250, Loss: 0.0055


Training Epochs:  98%|█████████▊| 246/250 [01:51<00:01,  2.06it/s]

Epoch 246/250, Loss: 0.0055


Training Epochs:  99%|█████████▉| 247/250 [01:52<00:01,  2.06it/s]

Epoch 247/250, Loss: 0.0055


Training Epochs:  99%|█████████▉| 248/250 [01:52<00:00,  2.05it/s]

Epoch 248/250, Loss: 0.0056


Training Epochs: 100%|█████████▉| 249/250 [01:52<00:00,  2.09it/s]

Epoch 249/250, Loss: 0.0055


Training Epochs: 100%|██████████| 250/250 [01:53<00:00,  2.20it/s]

Epoch 250/250, Loss: 0.0055





Test Accuracy: 97.52%

Experimenting with Epochs: 350


Training Epochs:   0%|          | 1/350 [00:00<02:48,  2.07it/s]

Epoch 1/350, Loss: 0.6342


Training Epochs:   1%|          | 2/350 [00:00<02:45,  2.10it/s]

Epoch 2/350, Loss: 0.1224


Training Epochs:   1%|          | 3/350 [00:01<02:42,  2.14it/s]

Epoch 3/350, Loss: 0.0736


Training Epochs:   1%|          | 4/350 [00:01<02:41,  2.14it/s]

Epoch 4/350, Loss: 0.0510


Training Epochs:   1%|▏         | 5/350 [00:02<02:37,  2.18it/s]

Epoch 5/350, Loss: 0.0381


Training Epochs:   2%|▏         | 6/350 [00:02<02:37,  2.18it/s]

Epoch 6/350, Loss: 0.0295


Training Epochs:   2%|▏         | 7/350 [00:03<02:36,  2.19it/s]

Epoch 7/350, Loss: 0.0226


Training Epochs:   2%|▏         | 8/350 [00:03<02:35,  2.20it/s]

Epoch 8/350, Loss: 0.0170


Training Epochs:   3%|▎         | 9/350 [00:04<02:36,  2.18it/s]

Epoch 9/350, Loss: 0.0130


Training Epochs:   3%|▎         | 10/350 [00:04<02:36,  2.17it/s]

Epoch 10/350, Loss: 0.0103


Training Epochs:   3%|▎         | 11/350 [00:05<02:35,  2.18it/s]

Epoch 11/350, Loss: 0.0080


Training Epochs:   3%|▎         | 12/350 [00:05<02:35,  2.17it/s]

Epoch 12/350, Loss: 0.0075


Training Epochs:   4%|▎         | 13/350 [00:06<02:35,  2.16it/s]

Epoch 13/350, Loss: 0.0073


Training Epochs:   4%|▍         | 14/350 [00:06<02:34,  2.17it/s]

Epoch 14/350, Loss: 0.0071


Training Epochs:   4%|▍         | 15/350 [00:06<02:37,  2.13it/s]

Epoch 15/350, Loss: 0.0069


Training Epochs:   5%|▍         | 16/350 [00:07<02:36,  2.13it/s]

Epoch 16/350, Loss: 0.0068


Training Epochs:   5%|▍         | 17/350 [00:07<02:38,  2.10it/s]

Epoch 17/350, Loss: 0.0066


Training Epochs:   5%|▌         | 18/350 [00:08<02:39,  2.08it/s]

Epoch 18/350, Loss: 0.0065


Training Epochs:   5%|▌         | 19/350 [00:08<02:40,  2.07it/s]

Epoch 19/350, Loss: 0.0064


Training Epochs:   6%|▌         | 20/350 [00:09<02:40,  2.05it/s]

Epoch 20/350, Loss: 0.0063


Training Epochs:   6%|▌         | 21/350 [00:09<02:39,  2.06it/s]

Epoch 21/350, Loss: 0.0060


Training Epochs:   6%|▋         | 22/350 [00:10<02:37,  2.08it/s]

Epoch 22/350, Loss: 0.0060


Training Epochs:   7%|▋         | 23/350 [00:10<02:36,  2.09it/s]

Epoch 23/350, Loss: 0.0060


Training Epochs:   7%|▋         | 24/350 [00:11<02:34,  2.12it/s]

Epoch 24/350, Loss: 0.0060


Training Epochs:   7%|▋         | 25/350 [00:11<02:34,  2.10it/s]

Epoch 25/350, Loss: 0.0060


Training Epochs:   7%|▋         | 26/350 [00:12<02:31,  2.13it/s]

Epoch 26/350, Loss: 0.0060


Training Epochs:   8%|▊         | 27/350 [00:12<02:33,  2.10it/s]

Epoch 27/350, Loss: 0.0060


Training Epochs:   8%|▊         | 28/350 [00:13<02:32,  2.12it/s]

Epoch 28/350, Loss: 0.0059


Training Epochs:   8%|▊         | 29/350 [00:13<02:30,  2.13it/s]

Epoch 29/350, Loss: 0.0059


Training Epochs:   9%|▊         | 30/350 [00:14<02:28,  2.16it/s]

Epoch 30/350, Loss: 0.0059


Training Epochs:   9%|▉         | 31/350 [00:14<02:28,  2.14it/s]

Epoch 31/350, Loss: 0.0059


Training Epochs:   9%|▉         | 32/350 [00:15<02:29,  2.13it/s]

Epoch 32/350, Loss: 0.0059


Training Epochs:   9%|▉         | 33/350 [00:15<03:13,  1.64it/s]

Epoch 33/350, Loss: 0.0059


Training Epochs:  10%|▉         | 34/350 [00:16<02:59,  1.76it/s]

Epoch 34/350, Loss: 0.0059


Training Epochs:  10%|█         | 35/350 [00:16<02:47,  1.88it/s]

Epoch 35/350, Loss: 0.0059


Training Epochs:  10%|█         | 36/350 [00:17<02:39,  1.96it/s]

Epoch 36/350, Loss: 0.0059


Training Epochs:  11%|█         | 37/350 [00:17<02:35,  2.01it/s]

Epoch 37/350, Loss: 0.0059


Training Epochs:  11%|█         | 38/350 [00:18<02:32,  2.04it/s]

Epoch 38/350, Loss: 0.0059


Training Epochs:  11%|█         | 39/350 [00:18<02:28,  2.09it/s]

Epoch 39/350, Loss: 0.0059


Training Epochs:  11%|█▏        | 40/350 [00:19<02:26,  2.11it/s]

Epoch 40/350, Loss: 0.0059


Training Epochs:  12%|█▏        | 41/350 [00:19<02:26,  2.11it/s]

Epoch 41/350, Loss: 0.0059


Training Epochs:  12%|█▏        | 42/350 [00:20<02:28,  2.07it/s]

Epoch 42/350, Loss: 0.0059


Training Epochs:  12%|█▏        | 43/350 [00:20<02:28,  2.06it/s]

Epoch 43/350, Loss: 0.0059


Training Epochs:  13%|█▎        | 44/350 [00:21<02:29,  2.04it/s]

Epoch 44/350, Loss: 0.0059


Training Epochs:  13%|█▎        | 45/350 [00:21<02:30,  2.03it/s]

Epoch 45/350, Loss: 0.0059


Training Epochs:  13%|█▎        | 46/350 [00:22<02:29,  2.03it/s]

Epoch 46/350, Loss: 0.0059


Training Epochs:  13%|█▎        | 47/350 [00:22<02:28,  2.05it/s]

Epoch 47/350, Loss: 0.0059


Training Epochs:  14%|█▎        | 48/350 [00:23<02:26,  2.06it/s]

Epoch 48/350, Loss: 0.0059


Training Epochs:  14%|█▍        | 49/350 [00:23<02:26,  2.06it/s]

Epoch 49/350, Loss: 0.0059


Training Epochs:  14%|█▍        | 50/350 [00:24<02:24,  2.08it/s]

Epoch 50/350, Loss: 0.0058


Training Epochs:  15%|█▍        | 51/350 [00:24<02:23,  2.09it/s]

Epoch 51/350, Loss: 0.0059


Training Epochs:  15%|█▍        | 52/350 [00:25<02:22,  2.09it/s]

Epoch 52/350, Loss: 0.0059


Training Epochs:  15%|█▌        | 53/350 [00:25<02:21,  2.09it/s]

Epoch 53/350, Loss: 0.0059


Training Epochs:  15%|█▌        | 54/350 [00:25<02:22,  2.08it/s]

Epoch 54/350, Loss: 0.0059


Training Epochs:  16%|█▌        | 55/350 [00:26<02:19,  2.11it/s]

Epoch 55/350, Loss: 0.0059


Training Epochs:  16%|█▌        | 56/350 [00:26<02:19,  2.10it/s]

Epoch 56/350, Loss: 0.0059


Training Epochs:  16%|█▋        | 57/350 [00:27<02:18,  2.11it/s]

Epoch 57/350, Loss: 0.0059


Training Epochs:  17%|█▋        | 58/350 [00:27<02:19,  2.09it/s]

Epoch 58/350, Loss: 0.0059


Training Epochs:  17%|█▋        | 59/350 [00:28<02:17,  2.12it/s]

Epoch 59/350, Loss: 0.0059


Training Epochs:  17%|█▋        | 60/350 [00:28<02:18,  2.10it/s]

Epoch 60/350, Loss: 0.0059


Training Epochs:  17%|█▋        | 61/350 [00:29<02:17,  2.11it/s]

Epoch 61/350, Loss: 0.0059


Training Epochs:  18%|█▊        | 62/350 [00:29<02:15,  2.12it/s]

Epoch 62/350, Loss: 0.0059


Training Epochs:  18%|█▊        | 63/350 [00:30<02:16,  2.10it/s]

Epoch 63/350, Loss: 0.0059


Training Epochs:  18%|█▊        | 64/350 [00:30<02:14,  2.12it/s]

Epoch 64/350, Loss: 0.0059


Training Epochs:  19%|█▊        | 65/350 [00:31<02:14,  2.12it/s]

Epoch 65/350, Loss: 0.0059


Training Epochs:  19%|█▉        | 66/350 [00:31<02:14,  2.11it/s]

Epoch 66/350, Loss: 0.0058


Training Epochs:  19%|█▉        | 67/350 [00:32<02:15,  2.10it/s]

Epoch 67/350, Loss: 0.0059


Training Epochs:  19%|█▉        | 68/350 [00:32<02:16,  2.06it/s]

Epoch 68/350, Loss: 0.0059


Training Epochs:  20%|█▉        | 69/350 [00:33<02:16,  2.06it/s]

Epoch 69/350, Loss: 0.0059


Training Epochs:  20%|██        | 70/350 [00:33<02:18,  2.02it/s]

Epoch 70/350, Loss: 0.0059


Training Epochs:  20%|██        | 71/350 [00:34<02:17,  2.02it/s]

Epoch 71/350, Loss: 0.0059


Training Epochs:  21%|██        | 72/350 [00:34<02:16,  2.04it/s]

Epoch 72/350, Loss: 0.0059


Training Epochs:  21%|██        | 73/350 [00:35<02:14,  2.06it/s]

Epoch 73/350, Loss: 0.0059


Training Epochs:  21%|██        | 74/350 [00:35<02:14,  2.05it/s]

Epoch 74/350, Loss: 0.0059


Training Epochs:  21%|██▏       | 75/350 [00:36<02:11,  2.09it/s]

Epoch 75/350, Loss: 0.0059


Training Epochs:  22%|██▏       | 76/350 [00:36<02:10,  2.10it/s]

Epoch 76/350, Loss: 0.0059


Training Epochs:  22%|██▏       | 77/350 [00:36<02:09,  2.10it/s]

Epoch 77/350, Loss: 0.0059


Training Epochs:  22%|██▏       | 78/350 [00:37<02:10,  2.08it/s]

Epoch 78/350, Loss: 0.0059


Training Epochs:  23%|██▎       | 79/350 [00:37<02:10,  2.08it/s]

Epoch 79/350, Loss: 0.0059


Training Epochs:  23%|██▎       | 80/350 [00:38<02:10,  2.08it/s]

Epoch 80/350, Loss: 0.0059


Training Epochs:  23%|██▎       | 81/350 [00:38<02:07,  2.12it/s]

Epoch 81/350, Loss: 0.0059


Training Epochs:  23%|██▎       | 82/350 [00:39<02:03,  2.17it/s]

Epoch 82/350, Loss: 0.0059


Training Epochs:  24%|██▎       | 83/350 [00:39<02:04,  2.14it/s]

Epoch 83/350, Loss: 0.0059


Training Epochs:  24%|██▍       | 84/350 [00:40<02:05,  2.11it/s]

Epoch 84/350, Loss: 0.0059


Training Epochs:  24%|██▍       | 85/350 [00:40<02:06,  2.09it/s]

Epoch 85/350, Loss: 0.0059


Training Epochs:  25%|██▍       | 86/350 [00:41<02:05,  2.10it/s]

Epoch 86/350, Loss: 0.0059


Training Epochs:  25%|██▍       | 87/350 [00:41<02:06,  2.08it/s]

Epoch 87/350, Loss: 0.0059


Training Epochs:  25%|██▌       | 88/350 [00:42<02:06,  2.07it/s]

Epoch 88/350, Loss: 0.0059


Training Epochs:  25%|██▌       | 89/350 [00:42<02:05,  2.08it/s]

Epoch 89/350, Loss: 0.0059


Training Epochs:  26%|██▌       | 90/350 [00:43<02:04,  2.09it/s]

Epoch 90/350, Loss: 0.0059


Training Epochs:  26%|██▌       | 91/350 [00:43<02:04,  2.08it/s]

Epoch 91/350, Loss: 0.0059


Training Epochs:  26%|██▋       | 92/350 [00:44<02:04,  2.07it/s]

Epoch 92/350, Loss: 0.0059


Training Epochs:  27%|██▋       | 93/350 [00:44<02:03,  2.08it/s]

Epoch 93/350, Loss: 0.0059


Training Epochs:  27%|██▋       | 94/350 [00:45<02:04,  2.05it/s]

Epoch 94/350, Loss: 0.0059


Training Epochs:  27%|██▋       | 95/350 [00:45<02:05,  2.03it/s]

Epoch 95/350, Loss: 0.0059


Training Epochs:  27%|██▋       | 96/350 [00:46<02:05,  2.02it/s]

Epoch 96/350, Loss: 0.0059


Training Epochs:  28%|██▊       | 97/350 [00:46<02:04,  2.03it/s]

Epoch 97/350, Loss: 0.0059


Training Epochs:  28%|██▊       | 98/350 [00:47<02:03,  2.05it/s]

Epoch 98/350, Loss: 0.0059


Training Epochs:  28%|██▊       | 99/350 [00:47<02:02,  2.04it/s]

Epoch 99/350, Loss: 0.0059


Training Epochs:  29%|██▊       | 100/350 [00:48<02:00,  2.08it/s]

Epoch 100/350, Loss: 0.0059


Training Epochs:  29%|██▉       | 101/350 [00:48<02:00,  2.06it/s]

Epoch 101/350, Loss: 0.0059


Training Epochs:  29%|██▉       | 102/350 [00:49<02:00,  2.07it/s]

Epoch 102/350, Loss: 0.0059


Training Epochs:  29%|██▉       | 103/350 [00:49<01:59,  2.06it/s]

Epoch 103/350, Loss: 0.0059


Training Epochs:  30%|██▉       | 104/350 [00:49<01:57,  2.09it/s]

Epoch 104/350, Loss: 0.0059


Training Epochs:  30%|███       | 105/350 [00:50<01:57,  2.08it/s]

Epoch 105/350, Loss: 0.0058


Training Epochs:  30%|███       | 106/350 [00:50<01:58,  2.06it/s]

Epoch 106/350, Loss: 0.0059


Training Epochs:  31%|███       | 107/350 [00:51<01:54,  2.12it/s]

Epoch 107/350, Loss: 0.0059


Training Epochs:  31%|███       | 108/350 [00:51<01:54,  2.12it/s]

Epoch 108/350, Loss: 0.0059


Training Epochs:  31%|███       | 109/350 [00:52<01:55,  2.08it/s]

Epoch 109/350, Loss: 0.0058


Training Epochs:  31%|███▏      | 110/350 [00:52<01:55,  2.08it/s]

Epoch 110/350, Loss: 0.0059


Training Epochs:  32%|███▏      | 111/350 [00:53<01:55,  2.07it/s]

Epoch 111/350, Loss: 0.0059


Training Epochs:  32%|███▏      | 112/350 [00:53<01:53,  2.10it/s]

Epoch 112/350, Loss: 0.0059


Training Epochs:  32%|███▏      | 113/350 [00:54<01:54,  2.08it/s]

Epoch 113/350, Loss: 0.0059


Training Epochs:  33%|███▎      | 114/350 [00:54<01:53,  2.08it/s]

Epoch 114/350, Loss: 0.0059


Training Epochs:  33%|███▎      | 115/350 [00:55<01:52,  2.09it/s]

Epoch 115/350, Loss: 0.0059


Training Epochs:  33%|███▎      | 116/350 [00:55<01:53,  2.05it/s]

Epoch 116/350, Loss: 0.0059


Training Epochs:  33%|███▎      | 117/350 [00:56<01:54,  2.04it/s]

Epoch 117/350, Loss: 0.0059


Training Epochs:  34%|███▎      | 118/350 [00:56<01:56,  1.99it/s]

Epoch 118/350, Loss: 0.0059


Training Epochs:  34%|███▍      | 119/350 [00:57<01:56,  1.98it/s]

Epoch 119/350, Loss: 0.0059


Training Epochs:  34%|███▍      | 120/350 [00:57<01:55,  1.98it/s]

Epoch 120/350, Loss: 0.0059


Training Epochs:  35%|███▍      | 121/350 [00:58<01:55,  1.98it/s]

Epoch 121/350, Loss: 0.0059


Training Epochs:  35%|███▍      | 122/350 [00:58<01:53,  2.01it/s]

Epoch 122/350, Loss: 0.0059


Training Epochs:  35%|███▌      | 123/350 [00:59<01:51,  2.03it/s]

Epoch 123/350, Loss: 0.0059


Training Epochs:  35%|███▌      | 124/350 [00:59<01:49,  2.07it/s]

Epoch 124/350, Loss: 0.0059


Training Epochs:  36%|███▌      | 125/350 [01:00<01:47,  2.09it/s]

Epoch 125/350, Loss: 0.0059


Training Epochs:  36%|███▌      | 126/350 [01:00<01:47,  2.08it/s]

Epoch 126/350, Loss: 0.0059


Training Epochs:  36%|███▋      | 127/350 [01:01<01:47,  2.07it/s]

Epoch 127/350, Loss: 0.0059


Training Epochs:  37%|███▋      | 128/350 [01:01<01:47,  2.07it/s]

Epoch 128/350, Loss: 0.0059


Training Epochs:  37%|███▋      | 129/350 [01:02<01:45,  2.09it/s]

Epoch 129/350, Loss: 0.0059


Training Epochs:  37%|███▋      | 130/350 [01:02<01:45,  2.08it/s]

Epoch 130/350, Loss: 0.0059


Training Epochs:  37%|███▋      | 131/350 [01:03<01:44,  2.10it/s]

Epoch 131/350, Loss: 0.0059


Training Epochs:  38%|███▊      | 132/350 [01:03<01:44,  2.09it/s]

Epoch 132/350, Loss: 0.0059


Training Epochs:  38%|███▊      | 133/350 [01:04<01:44,  2.08it/s]

Epoch 133/350, Loss: 0.0059


Training Epochs:  38%|███▊      | 134/350 [01:04<01:44,  2.06it/s]

Epoch 134/350, Loss: 0.0059


Training Epochs:  39%|███▊      | 135/350 [01:05<01:43,  2.08it/s]

Epoch 135/350, Loss: 0.0059


Training Epochs:  39%|███▉      | 136/350 [01:05<01:42,  2.08it/s]

Epoch 136/350, Loss: 0.0059


Training Epochs:  39%|███▉      | 137/350 [01:05<01:43,  2.06it/s]

Epoch 137/350, Loss: 0.0059


Training Epochs:  39%|███▉      | 138/350 [01:06<01:42,  2.08it/s]

Epoch 138/350, Loss: 0.0059


Training Epochs:  40%|███▉      | 139/350 [01:06<01:42,  2.06it/s]

Epoch 139/350, Loss: 0.0059


Training Epochs:  40%|████      | 140/350 [01:07<01:42,  2.05it/s]

Epoch 140/350, Loss: 0.0059


Training Epochs:  40%|████      | 141/350 [01:07<01:41,  2.06it/s]

Epoch 141/350, Loss: 0.0059


Training Epochs:  41%|████      | 142/350 [01:08<01:43,  2.01it/s]

Epoch 142/350, Loss: 0.0059


Training Epochs:  41%|████      | 143/350 [01:08<01:43,  2.00it/s]

Epoch 143/350, Loss: 0.0059


Training Epochs:  41%|████      | 144/350 [01:09<01:43,  1.99it/s]

Epoch 144/350, Loss: 0.0059


Training Epochs:  41%|████▏     | 145/350 [01:09<01:43,  1.98it/s]

Epoch 145/350, Loss: 0.0059


Training Epochs:  42%|████▏     | 146/350 [01:10<01:41,  2.02it/s]

Epoch 146/350, Loss: 0.0059


Training Epochs:  42%|████▏     | 147/350 [01:10<01:39,  2.04it/s]

Epoch 147/350, Loss: 0.0059


Training Epochs:  42%|████▏     | 148/350 [01:11<01:37,  2.07it/s]

Epoch 148/350, Loss: 0.0059


Training Epochs:  43%|████▎     | 149/350 [01:11<01:37,  2.06it/s]

Epoch 149/350, Loss: 0.0059


Training Epochs:  43%|████▎     | 150/350 [01:12<01:37,  2.05it/s]

Epoch 150/350, Loss: 0.0059


Training Epochs:  43%|████▎     | 151/350 [01:12<01:38,  2.03it/s]

Epoch 151/350, Loss: 0.0059


Training Epochs:  43%|████▎     | 152/350 [01:13<01:35,  2.07it/s]

Epoch 152/350, Loss: 0.0059


Training Epochs:  44%|████▎     | 153/350 [01:13<01:35,  2.06it/s]

Epoch 153/350, Loss: 0.0059


Training Epochs:  44%|████▍     | 154/350 [01:14<01:35,  2.05it/s]

Epoch 154/350, Loss: 0.0059


Training Epochs:  44%|████▍     | 155/350 [01:14<01:34,  2.06it/s]

Epoch 155/350, Loss: 0.0059


Training Epochs:  45%|████▍     | 156/350 [01:15<01:34,  2.06it/s]

Epoch 156/350, Loss: 0.0059


Training Epochs:  45%|████▍     | 157/350 [01:15<01:32,  2.08it/s]

Epoch 157/350, Loss: 0.0059


Training Epochs:  45%|████▌     | 158/350 [01:16<01:32,  2.07it/s]

Epoch 158/350, Loss: 0.0059


Training Epochs:  45%|████▌     | 159/350 [01:16<01:32,  2.06it/s]

Epoch 159/350, Loss: 0.0058


Training Epochs:  46%|████▌     | 160/350 [01:17<01:31,  2.07it/s]

Epoch 160/350, Loss: 0.0058


Training Epochs:  46%|████▌     | 161/350 [01:17<01:31,  2.07it/s]

Epoch 161/350, Loss: 0.0059


Training Epochs:  46%|████▋     | 162/350 [01:18<01:29,  2.09it/s]

Epoch 162/350, Loss: 0.0059


Training Epochs:  47%|████▋     | 163/350 [01:18<01:30,  2.06it/s]

Epoch 163/350, Loss: 0.0059


Training Epochs:  47%|████▋     | 164/350 [01:19<01:29,  2.07it/s]

Epoch 164/350, Loss: 0.0059


Training Epochs:  47%|████▋     | 165/350 [01:19<01:29,  2.07it/s]

Epoch 165/350, Loss: 0.0059


Training Epochs:  47%|████▋     | 166/350 [01:20<01:30,  2.03it/s]

Epoch 166/350, Loss: 0.0059


Training Epochs:  48%|████▊     | 167/350 [01:20<01:30,  2.02it/s]

Epoch 167/350, Loss: 0.0059


Training Epochs:  48%|████▊     | 168/350 [01:21<01:31,  1.99it/s]

Epoch 168/350, Loss: 0.0059


Training Epochs:  48%|████▊     | 169/350 [01:21<01:31,  1.98it/s]

Epoch 169/350, Loss: 0.0059


Training Epochs:  49%|████▊     | 170/350 [01:22<01:32,  1.95it/s]

Epoch 170/350, Loss: 0.0059


Training Epochs:  49%|████▉     | 171/350 [01:22<01:32,  1.94it/s]

Epoch 171/350, Loss: 0.0058


Training Epochs:  49%|████▉     | 172/350 [01:23<01:31,  1.95it/s]

Epoch 172/350, Loss: 0.0059


Training Epochs:  49%|████▉     | 173/350 [01:23<01:30,  1.96it/s]

Epoch 173/350, Loss: 0.0059


Training Epochs:  50%|████▉     | 174/350 [01:24<01:28,  1.98it/s]

Epoch 174/350, Loss: 0.0059


Training Epochs:  50%|█████     | 175/350 [01:24<01:26,  2.03it/s]

Epoch 175/350, Loss: 0.0058


Training Epochs:  50%|█████     | 176/350 [01:25<01:25,  2.03it/s]

Epoch 176/350, Loss: 0.0059


Training Epochs:  51%|█████     | 177/350 [01:25<01:25,  2.03it/s]

Epoch 177/350, Loss: 0.0059


Training Epochs:  51%|█████     | 178/350 [01:26<01:24,  2.03it/s]

Epoch 178/350, Loss: 0.0059


Training Epochs:  51%|█████     | 179/350 [01:26<01:24,  2.03it/s]

Epoch 179/350, Loss: 0.0059


Training Epochs:  51%|█████▏    | 180/350 [01:27<01:23,  2.04it/s]

Epoch 180/350, Loss: 0.0059


Training Epochs:  52%|█████▏    | 181/350 [01:27<01:23,  2.02it/s]

Epoch 181/350, Loss: 0.0059


Training Epochs:  52%|█████▏    | 182/350 [01:28<01:22,  2.05it/s]

Epoch 182/350, Loss: 0.0059


Training Epochs:  52%|█████▏    | 183/350 [01:28<01:21,  2.04it/s]

Epoch 183/350, Loss: 0.0059


Training Epochs:  53%|█████▎    | 184/350 [01:29<01:21,  2.03it/s]

Epoch 184/350, Loss: 0.0058


Training Epochs:  53%|█████▎    | 185/350 [01:29<01:21,  2.03it/s]

Epoch 185/350, Loss: 0.0059


Training Epochs:  53%|█████▎    | 186/350 [01:30<01:20,  2.03it/s]

Epoch 186/350, Loss: 0.0059


Training Epochs:  53%|█████▎    | 187/350 [01:30<01:20,  2.03it/s]

Epoch 187/350, Loss: 0.0059


Training Epochs:  54%|█████▎    | 188/350 [01:31<01:18,  2.05it/s]

Epoch 188/350, Loss: 0.0059


Training Epochs:  54%|█████▍    | 189/350 [01:31<01:18,  2.05it/s]

Epoch 189/350, Loss: 0.0059


Training Epochs:  54%|█████▍    | 190/350 [01:32<01:18,  2.04it/s]

Epoch 190/350, Loss: 0.0059


Training Epochs:  55%|█████▍    | 191/350 [01:32<01:17,  2.04it/s]

Epoch 191/350, Loss: 0.0059


Training Epochs:  55%|█████▍    | 192/350 [01:33<01:19,  2.00it/s]

Epoch 192/350, Loss: 0.0059


Training Epochs:  55%|█████▌    | 193/350 [01:33<01:19,  1.98it/s]

Epoch 193/350, Loss: 0.0059


Training Epochs:  55%|█████▌    | 194/350 [01:34<01:19,  1.97it/s]

Epoch 194/350, Loss: 0.0059


Training Epochs:  56%|█████▌    | 195/350 [01:34<01:18,  1.97it/s]

Epoch 195/350, Loss: 0.0059


Training Epochs:  56%|█████▌    | 196/350 [01:35<01:17,  1.98it/s]

Epoch 196/350, Loss: 0.0059


Training Epochs:  56%|█████▋    | 197/350 [01:35<01:16,  2.00it/s]

Epoch 197/350, Loss: 0.0059


Training Epochs:  57%|█████▋    | 198/350 [01:36<01:14,  2.04it/s]

Epoch 198/350, Loss: 0.0059


Training Epochs:  57%|█████▋    | 199/350 [01:36<01:14,  2.04it/s]

Epoch 199/350, Loss: 0.0059


Training Epochs:  57%|█████▋    | 200/350 [01:37<01:13,  2.03it/s]

Epoch 200/350, Loss: 0.0059


Training Epochs:  57%|█████▋    | 201/350 [01:37<01:12,  2.05it/s]

Epoch 201/350, Loss: 0.0059


Training Epochs:  58%|█████▊    | 202/350 [01:38<01:12,  2.05it/s]

Epoch 202/350, Loss: 0.0059


Training Epochs:  58%|█████▊    | 203/350 [01:38<01:11,  2.05it/s]

Epoch 203/350, Loss: 0.0059


Training Epochs:  58%|█████▊    | 204/350 [01:38<01:11,  2.05it/s]

Epoch 204/350, Loss: 0.0059


Training Epochs:  59%|█████▊    | 205/350 [01:39<01:11,  2.03it/s]

Epoch 205/350, Loss: 0.0059


Training Epochs:  59%|█████▉    | 206/350 [01:39<01:11,  2.03it/s]

Epoch 206/350, Loss: 0.0059


Training Epochs:  59%|█████▉    | 207/350 [01:40<01:10,  2.03it/s]

Epoch 207/350, Loss: 0.0059


Training Epochs:  59%|█████▉    | 208/350 [01:40<01:10,  2.02it/s]

Epoch 208/350, Loss: 0.0059


Training Epochs:  60%|█████▉    | 209/350 [01:41<01:09,  2.03it/s]

Epoch 209/350, Loss: 0.0059


Training Epochs:  60%|██████    | 210/350 [01:41<01:08,  2.03it/s]

Epoch 210/350, Loss: 0.0059


Training Epochs:  60%|██████    | 211/350 [01:42<01:08,  2.02it/s]

Epoch 211/350, Loss: 0.0058


Training Epochs:  61%|██████    | 212/350 [01:42<01:08,  2.02it/s]

Epoch 212/350, Loss: 0.0059


Training Epochs:  61%|██████    | 213/350 [01:43<01:07,  2.02it/s]

Epoch 213/350, Loss: 0.0058


Training Epochs:  61%|██████    | 214/350 [01:43<01:06,  2.03it/s]

Epoch 214/350, Loss: 0.0059


Training Epochs:  61%|██████▏   | 215/350 [01:44<01:07,  2.01it/s]

Epoch 215/350, Loss: 0.0059


Training Epochs:  62%|██████▏   | 216/350 [01:44<01:06,  2.01it/s]

Epoch 216/350, Loss: 0.0059


Training Epochs:  62%|██████▏   | 217/350 [01:45<01:07,  1.96it/s]

Epoch 217/350, Loss: 0.0059


Training Epochs:  62%|██████▏   | 218/350 [01:46<01:07,  1.94it/s]

Epoch 218/350, Loss: 0.0059


Training Epochs:  63%|██████▎   | 219/350 [01:46<01:08,  1.91it/s]

Epoch 219/350, Loss: 0.0059


Training Epochs:  63%|██████▎   | 220/350 [01:47<01:07,  1.91it/s]

Epoch 220/350, Loss: 0.0059


Training Epochs:  63%|██████▎   | 221/350 [01:47<01:06,  1.95it/s]

Epoch 221/350, Loss: 0.0059


Training Epochs:  63%|██████▎   | 222/350 [01:48<01:05,  1.97it/s]

Epoch 222/350, Loss: 0.0059


Training Epochs:  64%|██████▎   | 223/350 [01:48<01:03,  1.99it/s]

Epoch 223/350, Loss: 0.0059


Training Epochs:  64%|██████▍   | 224/350 [01:49<01:02,  2.00it/s]

Epoch 224/350, Loss: 0.0059


Training Epochs:  64%|██████▍   | 225/350 [01:49<01:02,  2.01it/s]

Epoch 225/350, Loss: 0.0059


Training Epochs:  65%|██████▍   | 226/350 [01:50<01:00,  2.04it/s]

Epoch 226/350, Loss: 0.0058


Training Epochs:  65%|██████▍   | 227/350 [01:50<01:00,  2.02it/s]

Epoch 227/350, Loss: 0.0059


Training Epochs:  65%|██████▌   | 228/350 [01:50<00:59,  2.06it/s]

Epoch 228/350, Loss: 0.0059


Training Epochs:  65%|██████▌   | 229/350 [01:51<00:58,  2.06it/s]

Epoch 229/350, Loss: 0.0059


Training Epochs:  66%|██████▌   | 230/350 [01:51<00:59,  2.03it/s]

Epoch 230/350, Loss: 0.0059


Training Epochs:  66%|██████▌   | 231/350 [01:52<00:58,  2.04it/s]

Epoch 231/350, Loss: 0.0059


Training Epochs:  66%|██████▋   | 232/350 [01:52<00:58,  2.02it/s]

Epoch 232/350, Loss: 0.0059


Training Epochs:  67%|██████▋   | 233/350 [01:53<00:57,  2.03it/s]

Epoch 233/350, Loss: 0.0059


Training Epochs:  67%|██████▋   | 234/350 [01:53<00:57,  2.03it/s]

Epoch 234/350, Loss: 0.0059


Training Epochs:  67%|██████▋   | 235/350 [01:54<00:56,  2.03it/s]

Epoch 235/350, Loss: 0.0059


Training Epochs:  67%|██████▋   | 236/350 [01:54<00:56,  2.00it/s]

Epoch 236/350, Loss: 0.0059


Training Epochs:  68%|██████▊   | 237/350 [01:55<00:56,  1.99it/s]

Epoch 237/350, Loss: 0.0059


Training Epochs:  68%|██████▊   | 238/350 [01:55<00:55,  2.01it/s]

Epoch 238/350, Loss: 0.0059


Training Epochs:  68%|██████▊   | 239/350 [01:56<00:54,  2.02it/s]

Epoch 239/350, Loss: 0.0059


Training Epochs:  69%|██████▊   | 240/350 [01:57<00:56,  1.95it/s]

Epoch 240/350, Loss: 0.0059


Training Epochs:  69%|██████▉   | 241/350 [01:57<00:56,  1.92it/s]

Epoch 241/350, Loss: 0.0059


Training Epochs:  69%|██████▉   | 242/350 [01:58<00:56,  1.92it/s]

Epoch 242/350, Loss: 0.0059


Training Epochs:  69%|██████▉   | 243/350 [01:58<00:55,  1.93it/s]

Epoch 243/350, Loss: 0.0059


Training Epochs:  70%|██████▉   | 244/350 [01:59<00:55,  1.91it/s]

Epoch 244/350, Loss: 0.0059


Training Epochs:  70%|███████   | 245/350 [01:59<00:53,  1.96it/s]

Epoch 245/350, Loss: 0.0059


Training Epochs:  70%|███████   | 246/350 [02:00<00:52,  1.98it/s]

Epoch 246/350, Loss: 0.0059


Training Epochs:  71%|███████   | 247/350 [02:00<00:51,  1.99it/s]

Epoch 247/350, Loss: 0.0059


Training Epochs:  71%|███████   | 248/350 [02:01<00:50,  2.00it/s]

Epoch 248/350, Loss: 0.0059


Training Epochs:  71%|███████   | 249/350 [02:01<00:50,  2.01it/s]

Epoch 249/350, Loss: 0.0059


Training Epochs:  71%|███████▏  | 250/350 [02:02<00:49,  2.03it/s]

Epoch 250/350, Loss: 0.0059


Training Epochs:  72%|███████▏  | 251/350 [02:02<00:49,  2.02it/s]

Epoch 251/350, Loss: 0.0059


Training Epochs:  72%|███████▏  | 252/350 [02:03<00:48,  2.02it/s]

Epoch 252/350, Loss: 0.0059


Training Epochs:  72%|███████▏  | 253/350 [02:03<00:48,  2.00it/s]

Epoch 253/350, Loss: 0.0059


Training Epochs:  73%|███████▎  | 254/350 [02:04<00:48,  2.00it/s]

Epoch 254/350, Loss: 0.0059


Training Epochs:  73%|███████▎  | 255/350 [02:04<00:48,  1.97it/s]

Epoch 255/350, Loss: 0.0059


Training Epochs:  73%|███████▎  | 256/350 [02:05<00:47,  1.99it/s]

Epoch 256/350, Loss: 0.0059


Training Epochs:  73%|███████▎  | 257/350 [02:05<00:46,  1.99it/s]

Epoch 257/350, Loss: 0.0059


Training Epochs:  74%|███████▎  | 258/350 [02:06<00:46,  1.98it/s]

Epoch 258/350, Loss: 0.0059


Training Epochs:  74%|███████▍  | 259/350 [02:06<00:45,  1.99it/s]

Epoch 259/350, Loss: 0.0059


Training Epochs:  74%|███████▍  | 260/350 [02:07<00:45,  1.98it/s]

Epoch 260/350, Loss: 0.0059


Training Epochs:  75%|███████▍  | 261/350 [02:07<00:44,  1.99it/s]

Epoch 261/350, Loss: 0.0059


Training Epochs:  75%|███████▍  | 262/350 [02:08<00:43,  2.01it/s]

Epoch 262/350, Loss: 0.0059


Training Epochs:  75%|███████▌  | 263/350 [02:08<00:43,  1.98it/s]

Epoch 263/350, Loss: 0.0059


Training Epochs:  75%|███████▌  | 264/350 [02:09<00:43,  1.97it/s]

Epoch 264/350, Loss: 0.0059


Training Epochs:  76%|███████▌  | 265/350 [02:09<00:43,  1.96it/s]

Epoch 265/350, Loss: 0.0059


Training Epochs:  76%|███████▌  | 266/350 [02:10<00:42,  1.97it/s]

Epoch 266/350, Loss: 0.0059


Training Epochs:  76%|███████▋  | 267/350 [02:10<00:42,  1.94it/s]

Epoch 267/350, Loss: 0.0059


Training Epochs:  77%|███████▋  | 268/350 [02:11<00:42,  1.94it/s]

Epoch 268/350, Loss: 0.0059


Training Epochs:  77%|███████▋  | 269/350 [02:11<00:41,  1.95it/s]

Epoch 269/350, Loss: 0.0059


Training Epochs:  77%|███████▋  | 270/350 [02:12<00:40,  1.98it/s]

Epoch 270/350, Loss: 0.0059


Training Epochs:  77%|███████▋  | 271/350 [02:12<00:40,  1.96it/s]

Epoch 271/350, Loss: 0.0059


Training Epochs:  78%|███████▊  | 272/350 [02:13<00:39,  1.97it/s]

Epoch 272/350, Loss: 0.0059


Training Epochs:  78%|███████▊  | 273/350 [02:13<00:38,  2.00it/s]

Epoch 273/350, Loss: 0.0059


Training Epochs:  78%|███████▊  | 274/350 [02:14<00:38,  1.98it/s]

Epoch 274/350, Loss: 0.0059


Training Epochs:  79%|███████▊  | 275/350 [02:14<00:38,  1.97it/s]

Epoch 275/350, Loss: 0.0059


Training Epochs:  79%|███████▉  | 276/350 [02:15<00:37,  1.99it/s]

Epoch 276/350, Loss: 0.0059


Training Epochs:  79%|███████▉  | 277/350 [02:15<00:37,  1.97it/s]

Epoch 277/350, Loss: 0.0058


Training Epochs:  79%|███████▉  | 278/350 [02:16<00:36,  1.99it/s]

Epoch 278/350, Loss: 0.0059


Training Epochs:  80%|███████▉  | 279/350 [02:16<00:35,  1.98it/s]

Epoch 279/350, Loss: 0.0059


Training Epochs:  80%|████████  | 280/350 [02:17<00:35,  1.96it/s]

Epoch 280/350, Loss: 0.0059


Training Epochs:  80%|████████  | 281/350 [02:17<00:35,  1.97it/s]

Epoch 281/350, Loss: 0.0059


Training Epochs:  81%|████████  | 282/350 [02:18<00:34,  1.98it/s]

Epoch 282/350, Loss: 0.0059


Training Epochs:  81%|████████  | 283/350 [02:18<00:33,  1.99it/s]

Epoch 283/350, Loss: 0.0059


Training Epochs:  81%|████████  | 284/350 [02:19<00:32,  2.00it/s]

Epoch 284/350, Loss: 0.0059


Training Epochs:  81%|████████▏ | 285/350 [02:19<00:32,  2.01it/s]

Epoch 285/350, Loss: 0.0059


Training Epochs:  82%|████████▏ | 286/350 [02:20<00:31,  2.01it/s]

Epoch 286/350, Loss: 0.0059


Training Epochs:  82%|████████▏ | 287/350 [02:20<00:31,  1.98it/s]

Epoch 287/350, Loss: 0.0059


Training Epochs:  82%|████████▏ | 288/350 [02:21<00:31,  1.96it/s]

Epoch 288/350, Loss: 0.0059


Training Epochs:  83%|████████▎ | 289/350 [02:21<00:31,  1.91it/s]

Epoch 289/350, Loss: 0.0059


Training Epochs:  83%|████████▎ | 290/350 [02:22<00:31,  1.91it/s]

Epoch 290/350, Loss: 0.0059


Training Epochs:  83%|████████▎ | 291/350 [02:22<00:31,  1.88it/s]

Epoch 291/350, Loss: 0.0059


Training Epochs:  83%|████████▎ | 292/350 [02:23<00:30,  1.90it/s]

Epoch 292/350, Loss: 0.0059


Training Epochs:  84%|████████▎ | 293/350 [02:23<00:30,  1.89it/s]

Epoch 293/350, Loss: 0.0059


Training Epochs:  84%|████████▍ | 294/350 [02:24<00:29,  1.93it/s]

Epoch 294/350, Loss: 0.0059


Training Epochs:  84%|████████▍ | 295/350 [02:24<00:28,  1.93it/s]

Epoch 295/350, Loss: 0.0059


Training Epochs:  85%|████████▍ | 296/350 [02:25<00:27,  1.96it/s]

Epoch 296/350, Loss: 0.0059


Training Epochs:  85%|████████▍ | 297/350 [02:25<00:26,  1.97it/s]

Epoch 297/350, Loss: 0.0059


Training Epochs:  85%|████████▌ | 298/350 [02:26<00:26,  1.96it/s]

Epoch 298/350, Loss: 0.0059


Training Epochs:  85%|████████▌ | 299/350 [02:26<00:25,  1.99it/s]

Epoch 299/350, Loss: 0.0059


Training Epochs:  86%|████████▌ | 300/350 [02:27<00:25,  1.99it/s]

Epoch 300/350, Loss: 0.0059


Training Epochs:  86%|████████▌ | 301/350 [02:27<00:24,  2.02it/s]

Epoch 301/350, Loss: 0.0059


Training Epochs:  86%|████████▋ | 302/350 [02:28<00:23,  2.03it/s]

Epoch 302/350, Loss: 0.0059


Training Epochs:  87%|████████▋ | 303/350 [02:28<00:23,  2.00it/s]

Epoch 303/350, Loss: 0.0059


Training Epochs:  87%|████████▋ | 304/350 [02:29<00:22,  2.01it/s]

Epoch 304/350, Loss: 0.0059


Training Epochs:  87%|████████▋ | 305/350 [02:29<00:22,  1.99it/s]

Epoch 305/350, Loss: 0.0059


Training Epochs:  87%|████████▋ | 306/350 [02:30<00:22,  1.99it/s]

Epoch 306/350, Loss: 0.0059


Training Epochs:  88%|████████▊ | 307/350 [02:30<00:21,  1.98it/s]

Epoch 307/350, Loss: 0.0058


Training Epochs:  88%|████████▊ | 308/350 [02:31<00:21,  2.00it/s]

Epoch 308/350, Loss: 0.0059


Training Epochs:  88%|████████▊ | 309/350 [02:31<00:20,  1.97it/s]

Epoch 309/350, Loss: 0.0058


Training Epochs:  89%|████████▊ | 310/350 [02:32<00:20,  1.98it/s]

Epoch 310/350, Loss: 0.0058


Training Epochs:  89%|████████▉ | 311/350 [02:32<00:19,  1.97it/s]

Epoch 311/350, Loss: 0.0059


Training Epochs:  89%|████████▉ | 312/350 [02:33<00:19,  1.94it/s]

Epoch 312/350, Loss: 0.0059


Training Epochs:  89%|████████▉ | 313/350 [02:34<00:19,  1.93it/s]

Epoch 313/350, Loss: 0.0058


Training Epochs:  90%|████████▉ | 314/350 [02:34<00:18,  1.92it/s]

Epoch 314/350, Loss: 0.0059


Training Epochs:  90%|█████████ | 315/350 [02:35<00:18,  1.91it/s]

Epoch 315/350, Loss: 0.0059


Training Epochs:  90%|█████████ | 316/350 [02:35<00:17,  1.93it/s]

Epoch 316/350, Loss: 0.0059


Training Epochs:  91%|█████████ | 317/350 [02:36<00:16,  1.97it/s]

Epoch 317/350, Loss: 0.0059


Training Epochs:  91%|█████████ | 318/350 [02:36<00:16,  1.95it/s]

Epoch 318/350, Loss: 0.0059


Training Epochs:  91%|█████████ | 319/350 [02:37<00:15,  1.99it/s]

Epoch 319/350, Loss: 0.0059


Training Epochs:  91%|█████████▏| 320/350 [02:37<00:14,  2.00it/s]

Epoch 320/350, Loss: 0.0059


Training Epochs:  92%|█████████▏| 321/350 [02:38<00:14,  2.01it/s]

Epoch 321/350, Loss: 0.0059


Training Epochs:  92%|█████████▏| 322/350 [02:38<00:13,  2.02it/s]

Epoch 322/350, Loss: 0.0059


Training Epochs:  92%|█████████▏| 323/350 [02:39<00:13,  2.00it/s]

Epoch 323/350, Loss: 0.0059


Training Epochs:  93%|█████████▎| 324/350 [02:39<00:13,  1.99it/s]

Epoch 324/350, Loss: 0.0059


Training Epochs:  93%|█████████▎| 325/350 [02:40<00:12,  1.99it/s]

Epoch 325/350, Loss: 0.0059


Training Epochs:  93%|█████████▎| 326/350 [02:40<00:12,  2.00it/s]

Epoch 326/350, Loss: 0.0059


Training Epochs:  93%|█████████▎| 327/350 [02:41<00:11,  1.99it/s]

Epoch 327/350, Loss: 0.0059


Training Epochs:  94%|█████████▎| 328/350 [02:41<00:11,  1.97it/s]

Epoch 328/350, Loss: 0.0059


Training Epochs:  94%|█████████▍| 329/350 [02:42<00:10,  1.96it/s]

Epoch 329/350, Loss: 0.0059


Training Epochs:  94%|█████████▍| 330/350 [02:42<00:10,  1.97it/s]

Epoch 330/350, Loss: 0.0059


Training Epochs:  95%|█████████▍| 331/350 [02:43<00:09,  1.97it/s]

Epoch 331/350, Loss: 0.0059


Training Epochs:  95%|█████████▍| 332/350 [02:43<00:09,  1.97it/s]

Epoch 332/350, Loss: 0.0059


Training Epochs:  95%|█████████▌| 333/350 [02:44<00:08,  1.98it/s]

Epoch 333/350, Loss: 0.0059


Training Epochs:  95%|█████████▌| 334/350 [02:44<00:08,  1.97it/s]

Epoch 334/350, Loss: 0.0059


Training Epochs:  96%|█████████▌| 335/350 [02:45<00:07,  1.99it/s]

Epoch 335/350, Loss: 0.0059


Training Epochs:  96%|█████████▌| 336/350 [02:45<00:07,  2.00it/s]

Epoch 336/350, Loss: 0.0059


Training Epochs:  96%|█████████▋| 337/350 [02:46<00:06,  1.96it/s]

Epoch 337/350, Loss: 0.0059


Training Epochs:  97%|█████████▋| 338/350 [02:46<00:06,  1.92it/s]

Epoch 338/350, Loss: 0.0059


Training Epochs:  97%|█████████▋| 339/350 [02:47<00:05,  1.89it/s]

Epoch 339/350, Loss: 0.0059


Training Epochs:  97%|█████████▋| 340/350 [02:47<00:05,  1.88it/s]

Epoch 340/350, Loss: 0.0059


Training Epochs:  97%|█████████▋| 341/350 [02:48<00:04,  1.91it/s]

Epoch 341/350, Loss: 0.0059


Training Epochs:  98%|█████████▊| 342/350 [02:48<00:04,  1.92it/s]

Epoch 342/350, Loss: 0.0059


Training Epochs:  98%|█████████▊| 343/350 [02:49<00:03,  1.94it/s]

Epoch 343/350, Loss: 0.0059


Training Epochs:  98%|█████████▊| 344/350 [02:49<00:03,  1.93it/s]

Epoch 344/350, Loss: 0.0059


Training Epochs:  99%|█████████▊| 345/350 [02:50<00:02,  1.95it/s]

Epoch 345/350, Loss: 0.0059


Training Epochs:  99%|█████████▉| 346/350 [02:50<00:02,  1.95it/s]

Epoch 346/350, Loss: 0.0059


Training Epochs:  99%|█████████▉| 347/350 [02:51<00:01,  1.95it/s]

Epoch 347/350, Loss: 0.0059


Training Epochs:  99%|█████████▉| 348/350 [02:51<00:01,  1.96it/s]

Epoch 348/350, Loss: 0.0059


Training Epochs: 100%|█████████▉| 349/350 [02:52<00:00,  1.98it/s]

Epoch 349/350, Loss: 0.0059


Training Epochs: 100%|██████████| 350/350 [02:52<00:00,  2.02it/s]

Epoch 350/350, Loss: 0.0058





Test Accuracy: 97.12%


In [12]:
# Display results
print("\nExperiment Results:")
for key, value in results.items():
    print(f"{key}: {value}")


Experiment Results:
hidden_size: {32: 0.9699346405228758, 64: 0.9803921568627451}
pooling_type: {'max': 0.9673202614379085, 'avg': 0.9673202614379085}
optimizer: {'SGD': 0.9006535947712418, 'RMSProp': 0.9699346405228758, 'Adam': 0.9738562091503268}
epochs: {5: 0.9725490196078431, 50: 0.9660130718954248, 100: 0.9686274509803922, 250: 0.9751633986928104, 350: 0.9712418300653595}
