In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from google.colab import files

In [2]:
# Upload dataset
uploaded = files.upload()
import pandas as pd

# Load dataset
file_name = list(uploaded.keys())[0]
data = pd.read_csv(file_name, header=None)

Saving spambase.data to spambase.data


In [3]:
# Splitting features and target
X = data.iloc[:, :-1].values  # Features
y = data.iloc[:, -1].values  # Target

In [4]:
# Standardizing the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [5]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# Creating DataLoader for batch processing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=12)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=12)



In [6]:
# Define RNN model class
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, pooling_type):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.pooling_type = pooling_type
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        out, _ = self.rnn(x)
        if self.pooling_type == 'max':
            out = torch.max(out, dim=1).values
        elif self.pooling_type == 'avg':
            out = torch.mean(out, dim=1)
        out = self.fc(out)
        return out

In [7]:
# Define training function
def train_model(model, train_loader, optimizer, criterion, num_epochs, scheduler=None):
    model.train()
    for epoch in tqdm(range(num_epochs), desc="Training Epochs"):
        epoch_loss = 0
        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch.unsqueeze(1))  # Adding sequence dimension
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
        if scheduler:
            scheduler.step()
        print(f"Epoch {epoch + 1}/{num_epochs}, Loss: {epoch_loss / len(train_loader):.4f}")

In [8]:
# Define evaluation function
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            outputs = model(X_batch.unsqueeze(1))  # Adding sequence dimension
            _, predicted = torch.max(outputs, 1)
            total += y_batch.size(0)
            correct += (predicted == y_batch).sum().item()
    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
    return accuracy

In [9]:
# Parameters
input_size = X_train.shape[1]
output_size = len(np.unique(y))
hidden_sizes = [32, 64]
pooling_types = ['max', 'avg']
optimizers = {'SGD': optim.SGD, 'RMSProp': optim.RMSprop, 'Adam': optim.Adam}
epochs_list = [5, 50, 100, 250, 350]

In [10]:
# Results dictionary
results = {
    'hidden_size': {},
    'pooling_type': {},
    'optimizer': {},
    'epochs': {}
}

In [11]:
# Experiment with hidden sizes
for hidden_size in hidden_sizes:
    print(f"\nExperimenting with Hidden Size: {hidden_size}")
    model = RNNModel(input_size, hidden_size, num_layers=1, output_size=output_size, pooling_type='avg')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=50, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['hidden_size'][hidden_size] = accuracy


Experimenting with Hidden Size: 32


Training Epochs:   2%|▏         | 1/50 [00:01<00:53,  1.09s/it]

Epoch 1/50, Loss: 0.2526


Training Epochs:   4%|▍         | 2/50 [00:01<00:46,  1.03it/s]

Epoch 2/50, Loss: 0.1861


Training Epochs:   6%|▌         | 3/50 [00:02<00:42,  1.10it/s]

Epoch 3/50, Loss: 0.1726


Training Epochs:   8%|▊         | 4/50 [00:03<00:38,  1.19it/s]

Epoch 4/50, Loss: 0.1563


Training Epochs:  10%|█         | 5/50 [00:05<01:01,  1.36s/it]

Epoch 5/50, Loss: 0.1485


Training Epochs:  12%|█▏        | 6/50 [00:06<00:48,  1.10s/it]

Epoch 6/50, Loss: 0.1370


Training Epochs:  14%|█▍        | 7/50 [00:06<00:39,  1.08it/s]

Epoch 7/50, Loss: 0.1233


Training Epochs:  16%|█▌        | 8/50 [00:07<00:34,  1.22it/s]

Epoch 8/50, Loss: 0.1241


Training Epochs:  18%|█▊        | 9/50 [00:08<00:30,  1.35it/s]

Epoch 9/50, Loss: 0.1097


Training Epochs:  20%|██        | 10/50 [00:08<00:27,  1.45it/s]

Epoch 10/50, Loss: 0.1040


Training Epochs:  22%|██▏       | 11/50 [00:09<00:25,  1.52it/s]

Epoch 11/50, Loss: 0.0895


Training Epochs:  24%|██▍       | 12/50 [00:09<00:24,  1.57it/s]

Epoch 12/50, Loss: 0.0871


Training Epochs:  26%|██▌       | 13/50 [00:10<00:23,  1.61it/s]

Epoch 13/50, Loss: 0.0863


Training Epochs:  28%|██▊       | 14/50 [00:11<00:21,  1.64it/s]

Epoch 14/50, Loss: 0.0851


Training Epochs:  30%|███       | 15/50 [00:11<00:21,  1.64it/s]

Epoch 15/50, Loss: 0.0845


Training Epochs:  32%|███▏      | 16/50 [00:12<00:20,  1.67it/s]

Epoch 16/50, Loss: 0.0837


Training Epochs:  34%|███▍      | 17/50 [00:12<00:19,  1.66it/s]

Epoch 17/50, Loss: 0.0829


Training Epochs:  36%|███▌      | 18/50 [00:13<00:20,  1.57it/s]

Epoch 18/50, Loss: 0.0818


Training Epochs:  38%|███▊      | 19/50 [00:14<00:22,  1.39it/s]

Epoch 19/50, Loss: 0.0819


Training Epochs:  40%|████      | 20/50 [00:15<00:23,  1.25it/s]

Epoch 20/50, Loss: 0.0808


Training Epochs:  42%|████▏     | 21/50 [00:16<00:23,  1.25it/s]

Epoch 21/50, Loss: 0.0795


Training Epochs:  44%|████▍     | 22/50 [00:16<00:20,  1.35it/s]

Epoch 22/50, Loss: 0.0792


Training Epochs:  46%|████▌     | 23/50 [00:17<00:18,  1.45it/s]

Epoch 23/50, Loss: 0.0786


Training Epochs:  48%|████▊     | 24/50 [00:18<00:17,  1.51it/s]

Epoch 24/50, Loss: 0.0797


Training Epochs:  50%|█████     | 25/50 [00:18<00:15,  1.57it/s]

Epoch 25/50, Loss: 0.0786


Training Epochs:  52%|█████▏    | 26/50 [00:19<00:14,  1.61it/s]

Epoch 26/50, Loss: 0.0785


Training Epochs:  54%|█████▍    | 27/50 [00:19<00:13,  1.64it/s]

Epoch 27/50, Loss: 0.0784


Training Epochs:  56%|█████▌    | 28/50 [00:20<00:13,  1.68it/s]

Epoch 28/50, Loss: 0.0790


Training Epochs:  58%|█████▊    | 29/50 [00:20<00:12,  1.68it/s]

Epoch 29/50, Loss: 0.0789


Training Epochs:  60%|██████    | 30/50 [00:21<00:11,  1.70it/s]

Epoch 30/50, Loss: 0.0786


Training Epochs:  62%|██████▏   | 31/50 [00:22<00:11,  1.70it/s]

Epoch 31/50, Loss: 0.0785


Training Epochs:  64%|██████▍   | 32/50 [00:22<00:10,  1.70it/s]

Epoch 32/50, Loss: 0.0780


Training Epochs:  66%|██████▌   | 33/50 [00:23<00:10,  1.68it/s]

Epoch 33/50, Loss: 0.0782


Training Epochs:  68%|██████▊   | 34/50 [00:23<00:09,  1.68it/s]

Epoch 34/50, Loss: 0.0781


Training Epochs:  70%|███████   | 35/50 [00:24<00:09,  1.59it/s]

Epoch 35/50, Loss: 0.0781


Training Epochs:  72%|███████▏  | 36/50 [00:25<00:08,  1.60it/s]

Epoch 36/50, Loss: 0.0783


Training Epochs:  74%|███████▍  | 37/50 [00:25<00:08,  1.60it/s]

Epoch 37/50, Loss: 0.0780


Training Epochs:  76%|███████▌  | 38/50 [00:27<00:09,  1.28it/s]

Epoch 38/50, Loss: 0.0783


Training Epochs:  78%|███████▊  | 39/50 [00:27<00:09,  1.22it/s]

Epoch 39/50, Loss: 0.0778


Training Epochs:  80%|████████  | 40/50 [00:28<00:08,  1.16it/s]

Epoch 40/50, Loss: 0.0779


Training Epochs:  82%|████████▏ | 41/50 [00:29<00:07,  1.22it/s]

Epoch 41/50, Loss: 0.0779


Training Epochs:  84%|████████▍ | 42/50 [00:30<00:06,  1.32it/s]

Epoch 42/50, Loss: 0.0778


Training Epochs:  86%|████████▌ | 43/50 [00:30<00:04,  1.41it/s]

Epoch 43/50, Loss: 0.0802


Training Epochs:  88%|████████▊ | 44/50 [00:31<00:04,  1.48it/s]

Epoch 44/50, Loss: 0.0779


Training Epochs:  90%|█████████ | 45/50 [00:31<00:03,  1.54it/s]

Epoch 45/50, Loss: 0.0779


Training Epochs:  92%|█████████▏| 46/50 [00:32<00:02,  1.57it/s]

Epoch 46/50, Loss: 0.0781


Training Epochs:  94%|█████████▍| 47/50 [00:33<00:01,  1.62it/s]

Epoch 47/50, Loss: 0.0779


Training Epochs:  96%|█████████▌| 48/50 [00:33<00:01,  1.64it/s]

Epoch 48/50, Loss: 0.0779


Training Epochs:  98%|█████████▊| 49/50 [00:34<00:00,  1.65it/s]

Epoch 49/50, Loss: 0.0786


Training Epochs: 100%|██████████| 50/50 [00:34<00:00,  1.43it/s]

Epoch 50/50, Loss: 0.0785





Test Accuracy: 93.81%

Experimenting with Hidden Size: 64


Training Epochs:   2%|▏         | 1/50 [00:00<00:42,  1.16it/s]

Epoch 1/50, Loss: 0.2495


Training Epochs:   4%|▍         | 2/50 [00:01<00:40,  1.17it/s]

Epoch 2/50, Loss: 0.1913


Training Epochs:   6%|▌         | 3/50 [00:02<00:37,  1.26it/s]

Epoch 3/50, Loss: 0.1789


Training Epochs:   8%|▊         | 4/50 [00:03<00:37,  1.21it/s]

Epoch 4/50, Loss: 0.1667


Training Epochs:  10%|█         | 5/50 [00:03<00:33,  1.33it/s]

Epoch 5/50, Loss: 0.1540


Training Epochs:  12%|█▏        | 6/50 [00:04<00:36,  1.22it/s]

Epoch 6/50, Loss: 0.1425


Training Epochs:  14%|█▍        | 7/50 [00:05<00:36,  1.17it/s]

Epoch 7/50, Loss: 0.1320


Training Epochs:  16%|█▌        | 8/50 [00:06<00:37,  1.13it/s]

Epoch 8/50, Loss: 0.1222


Training Epochs:  18%|█▊        | 9/50 [00:07<00:33,  1.22it/s]

Epoch 9/50, Loss: 0.1159


Training Epochs:  20%|██        | 10/50 [00:08<00:29,  1.33it/s]

Epoch 10/50, Loss: 0.1077


Training Epochs:  22%|██▏       | 11/50 [00:08<00:27,  1.41it/s]

Epoch 11/50, Loss: 0.0898


Training Epochs:  24%|██▍       | 12/50 [00:09<00:25,  1.48it/s]

Epoch 12/50, Loss: 0.0856


Training Epochs:  26%|██▌       | 13/50 [00:09<00:24,  1.53it/s]

Epoch 13/50, Loss: 0.0834


Training Epochs:  28%|██▊       | 14/50 [00:10<00:22,  1.57it/s]

Epoch 14/50, Loss: 0.0829


Training Epochs:  30%|███       | 15/50 [00:11<00:21,  1.59it/s]

Epoch 15/50, Loss: 0.0812


Training Epochs:  32%|███▏      | 16/50 [00:11<00:21,  1.60it/s]

Epoch 16/50, Loss: 0.0796


Training Epochs:  34%|███▍      | 17/50 [00:12<00:20,  1.61it/s]

Epoch 17/50, Loss: 0.0795


Training Epochs:  36%|███▌      | 18/50 [00:12<00:20,  1.57it/s]

Epoch 18/50, Loss: 0.0770


Training Epochs:  38%|███▊      | 19/50 [00:13<00:19,  1.57it/s]

Epoch 19/50, Loss: 0.0763


Training Epochs:  40%|████      | 20/50 [00:14<00:18,  1.59it/s]

Epoch 20/50, Loss: 0.0750


Training Epochs:  42%|████▏     | 21/50 [00:14<00:18,  1.58it/s]

Epoch 21/50, Loss: 0.0729


Training Epochs:  44%|████▍     | 22/50 [00:15<00:17,  1.58it/s]

Epoch 22/50, Loss: 0.0727


Training Epochs:  46%|████▌     | 23/50 [00:16<00:17,  1.58it/s]

Epoch 23/50, Loss: 0.0726


Training Epochs:  48%|████▊     | 24/50 [00:16<00:16,  1.57it/s]

Epoch 24/50, Loss: 0.0726


Training Epochs:  50%|█████     | 25/50 [00:17<00:17,  1.47it/s]

Epoch 25/50, Loss: 0.0722


Training Epochs:  52%|█████▏    | 26/50 [00:18<00:18,  1.31it/s]

Epoch 26/50, Loss: 0.0721


Training Epochs:  54%|█████▍    | 27/50 [00:19<00:19,  1.20it/s]

Epoch 27/50, Loss: 0.0723


Training Epochs:  56%|█████▌    | 28/50 [00:20<00:19,  1.15it/s]

Epoch 28/50, Loss: 0.0720


Training Epochs:  58%|█████▊    | 29/50 [00:21<00:17,  1.22it/s]

Epoch 29/50, Loss: 0.0721


Training Epochs:  60%|██████    | 30/50 [00:21<00:15,  1.32it/s]

Epoch 30/50, Loss: 0.0716


Training Epochs:  62%|██████▏   | 31/50 [00:22<00:13,  1.39it/s]

Epoch 31/50, Loss: 0.0717


Training Epochs:  64%|██████▍   | 32/50 [00:23<00:12,  1.44it/s]

Epoch 32/50, Loss: 0.0719


Training Epochs:  66%|██████▌   | 33/50 [00:23<00:11,  1.49it/s]

Epoch 33/50, Loss: 0.0713


Training Epochs:  68%|██████▊   | 34/50 [00:24<00:10,  1.52it/s]

Epoch 34/50, Loss: 0.0715


Training Epochs:  70%|███████   | 35/50 [00:24<00:09,  1.56it/s]

Epoch 35/50, Loss: 0.0714


Training Epochs:  72%|███████▏  | 36/50 [00:25<00:09,  1.55it/s]

Epoch 36/50, Loss: 0.0716


Training Epochs:  74%|███████▍  | 37/50 [00:26<00:08,  1.55it/s]

Epoch 37/50, Loss: 0.0718


Training Epochs:  76%|███████▌  | 38/50 [00:26<00:07,  1.56it/s]

Epoch 38/50, Loss: 0.0725


Training Epochs:  78%|███████▊  | 39/50 [00:27<00:07,  1.55it/s]

Epoch 39/50, Loss: 0.0716


Training Epochs:  80%|████████  | 40/50 [00:28<00:06,  1.55it/s]

Epoch 40/50, Loss: 0.0718


Training Epochs:  82%|████████▏ | 41/50 [00:28<00:05,  1.56it/s]

Epoch 41/50, Loss: 0.0715


Training Epochs:  84%|████████▍ | 42/50 [00:29<00:05,  1.56it/s]

Epoch 42/50, Loss: 0.0716


Training Epochs:  86%|████████▌ | 43/50 [00:30<00:04,  1.56it/s]

Epoch 43/50, Loss: 0.0733


Training Epochs:  88%|████████▊ | 44/50 [00:30<00:03,  1.55it/s]

Epoch 44/50, Loss: 0.0717


Training Epochs:  90%|█████████ | 45/50 [00:31<00:03,  1.35it/s]

Epoch 45/50, Loss: 0.0721


Training Epochs:  92%|█████████▏| 46/50 [00:32<00:03,  1.26it/s]

Epoch 46/50, Loss: 0.0715


Training Epochs:  94%|█████████▍| 47/50 [00:33<00:02,  1.18it/s]

Epoch 47/50, Loss: 0.0720


Training Epochs:  96%|█████████▌| 48/50 [00:34<00:01,  1.22it/s]

Epoch 48/50, Loss: 0.0715


Training Epochs:  98%|█████████▊| 49/50 [00:34<00:00,  1.31it/s]

Epoch 49/50, Loss: 0.0715


Training Epochs: 100%|██████████| 50/50 [00:35<00:00,  1.41it/s]

Epoch 50/50, Loss: 0.0713





Test Accuracy: 92.62%


In [12]:
# Experiment with pooling types
for pooling_type in pooling_types:
    print(f"\nExperimenting with Pooling Type: {pooling_type}")
    model = RNNModel(input_size, 32, num_layers=1, output_size=output_size, pooling_type=pooling_type)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=50, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['pooling_type'][pooling_type] = accuracy


Experimenting with Pooling Type: max


Training Epochs:   2%|▏         | 1/50 [00:00<00:30,  1.59it/s]

Epoch 1/50, Loss: 0.2588


Training Epochs:   4%|▍         | 2/50 [00:01<00:30,  1.57it/s]

Epoch 2/50, Loss: 0.1903


Training Epochs:   6%|▌         | 3/50 [00:01<00:30,  1.53it/s]

Epoch 3/50, Loss: 0.1742


Training Epochs:   8%|▊         | 4/50 [00:02<00:30,  1.51it/s]

Epoch 4/50, Loss: 0.1643


Training Epochs:  10%|█         | 5/50 [00:03<00:29,  1.52it/s]

Epoch 5/50, Loss: 0.1519


Training Epochs:  12%|█▏        | 6/50 [00:03<00:28,  1.52it/s]

Epoch 6/50, Loss: 0.1438


Training Epochs:  14%|█▍        | 7/50 [00:04<00:28,  1.52it/s]

Epoch 7/50, Loss: 0.1349


Training Epochs:  16%|█▌        | 8/50 [00:05<00:27,  1.53it/s]

Epoch 8/50, Loss: 0.1268


Training Epochs:  18%|█▊        | 9/50 [00:05<00:26,  1.53it/s]

Epoch 9/50, Loss: 0.1168


Training Epochs:  20%|██        | 10/50 [00:06<00:25,  1.54it/s]

Epoch 10/50, Loss: 0.1121


Training Epochs:  22%|██▏       | 11/50 [00:07<00:25,  1.53it/s]

Epoch 11/50, Loss: 0.0970


Training Epochs:  24%|██▍       | 12/50 [00:07<00:24,  1.53it/s]

Epoch 12/50, Loss: 0.0940


Training Epochs:  26%|██▌       | 13/50 [00:08<00:27,  1.35it/s]

Epoch 13/50, Loss: 0.0925


Training Epochs:  28%|██▊       | 14/50 [00:09<00:29,  1.23it/s]

Epoch 14/50, Loss: 0.0913


Training Epochs:  30%|███       | 15/50 [00:10<00:30,  1.14it/s]

Epoch 15/50, Loss: 0.0905


Training Epochs:  32%|███▏      | 16/50 [00:11<00:28,  1.19it/s]

Epoch 16/50, Loss: 0.0891


Training Epochs:  34%|███▍      | 17/50 [00:12<00:25,  1.28it/s]

Epoch 17/50, Loss: 0.0900


Training Epochs:  36%|███▌      | 18/50 [00:12<00:23,  1.34it/s]

Epoch 18/50, Loss: 0.0893


Training Epochs:  38%|███▊      | 19/50 [00:13<00:22,  1.36it/s]

Epoch 19/50, Loss: 0.0887


Training Epochs:  40%|████      | 20/50 [00:14<00:21,  1.39it/s]

Epoch 20/50, Loss: 0.0868


Training Epochs:  42%|████▏     | 21/50 [00:14<00:20,  1.42it/s]

Epoch 21/50, Loss: 0.0843


Training Epochs:  44%|████▍     | 22/50 [00:15<00:19,  1.44it/s]

Epoch 22/50, Loss: 0.0850


Training Epochs:  46%|████▌     | 23/50 [00:16<00:18,  1.45it/s]

Epoch 23/50, Loss: 0.0840


Training Epochs:  48%|████▊     | 24/50 [00:16<00:17,  1.46it/s]

Epoch 24/50, Loss: 0.0838


Training Epochs:  50%|█████     | 25/50 [00:17<00:16,  1.48it/s]

Epoch 25/50, Loss: 0.0846


Training Epochs:  52%|█████▏    | 26/50 [00:18<00:16,  1.48it/s]

Epoch 26/50, Loss: 0.0846


Training Epochs:  54%|█████▍    | 27/50 [00:18<00:15,  1.50it/s]

Epoch 27/50, Loss: 0.0840


Training Epochs:  56%|█████▌    | 28/50 [00:19<00:14,  1.50it/s]

Epoch 28/50, Loss: 0.0846


Training Epochs:  58%|█████▊    | 29/50 [00:20<00:13,  1.50it/s]

Epoch 29/50, Loss: 0.0837


Training Epochs:  60%|██████    | 30/50 [00:20<00:13,  1.52it/s]

Epoch 30/50, Loss: 0.0836


Training Epochs:  62%|██████▏   | 31/50 [00:21<00:14,  1.35it/s]

Epoch 31/50, Loss: 0.0847


Training Epochs:  64%|██████▍   | 32/50 [00:22<00:14,  1.23it/s]

Epoch 32/50, Loss: 0.0843


Training Epochs:  66%|██████▌   | 33/50 [00:23<00:15,  1.12it/s]

Epoch 33/50, Loss: 0.0831


Training Epochs:  68%|██████▊   | 34/50 [00:24<00:13,  1.17it/s]

Epoch 34/50, Loss: 0.0840


Training Epochs:  70%|███████   | 35/50 [00:25<00:12,  1.24it/s]

Epoch 35/50, Loss: 0.0835


Training Epochs:  72%|███████▏  | 36/50 [00:26<00:10,  1.31it/s]

Epoch 36/50, Loss: 0.0831


Training Epochs:  74%|███████▍  | 37/50 [00:26<00:09,  1.35it/s]

Epoch 37/50, Loss: 0.0832


Training Epochs:  76%|███████▌  | 38/50 [00:27<00:08,  1.39it/s]

Epoch 38/50, Loss: 0.0844


Training Epochs:  78%|███████▊  | 39/50 [00:28<00:07,  1.41it/s]

Epoch 39/50, Loss: 0.0833


Training Epochs:  80%|████████  | 40/50 [00:28<00:07,  1.42it/s]

Epoch 40/50, Loss: 0.0835


Training Epochs:  82%|████████▏ | 41/50 [00:29<00:06,  1.45it/s]

Epoch 41/50, Loss: 0.0834


Training Epochs:  84%|████████▍ | 42/50 [00:30<00:05,  1.45it/s]

Epoch 42/50, Loss: 0.0835


Training Epochs:  86%|████████▌ | 43/50 [00:30<00:04,  1.45it/s]

Epoch 43/50, Loss: 0.0836


Training Epochs:  88%|████████▊ | 44/50 [00:31<00:04,  1.46it/s]

Epoch 44/50, Loss: 0.0832


Training Epochs:  90%|█████████ | 45/50 [00:32<00:03,  1.45it/s]

Epoch 45/50, Loss: 0.0831


Training Epochs:  92%|█████████▏| 46/50 [00:32<00:02,  1.45it/s]

Epoch 46/50, Loss: 0.0836


Training Epochs:  94%|█████████▍| 47/50 [00:33<00:02,  1.45it/s]

Epoch 47/50, Loss: 0.0833


Training Epochs:  96%|█████████▌| 48/50 [00:34<00:01,  1.42it/s]

Epoch 48/50, Loss: 0.0832


Training Epochs:  98%|█████████▊| 49/50 [00:35<00:00,  1.25it/s]

Epoch 49/50, Loss: 0.0840


Training Epochs: 100%|██████████| 50/50 [00:36<00:00,  1.37it/s]

Epoch 50/50, Loss: 0.0865





Test Accuracy: 93.92%

Experimenting with Pooling Type: avg


Training Epochs:   2%|▏         | 1/50 [00:00<00:37,  1.32it/s]

Epoch 1/50, Loss: 0.2579


Training Epochs:   4%|▍         | 2/50 [00:01<00:34,  1.38it/s]

Epoch 2/50, Loss: 0.1837


Training Epochs:   6%|▌         | 3/50 [00:02<00:33,  1.39it/s]

Epoch 3/50, Loss: 0.1723


Training Epochs:   8%|▊         | 4/50 [00:02<00:33,  1.39it/s]

Epoch 4/50, Loss: 0.1641


Training Epochs:  10%|█         | 5/50 [00:03<00:32,  1.40it/s]

Epoch 5/50, Loss: 0.1516


Training Epochs:  12%|█▏        | 6/50 [00:04<00:32,  1.37it/s]

Epoch 6/50, Loss: 0.1400


Training Epochs:  14%|█▍        | 7/50 [00:05<00:31,  1.36it/s]

Epoch 7/50, Loss: 0.1346


Training Epochs:  16%|█▌        | 8/50 [00:05<00:30,  1.39it/s]

Epoch 8/50, Loss: 0.1260


Training Epochs:  18%|█▊        | 9/50 [00:06<00:30,  1.37it/s]

Epoch 9/50, Loss: 0.1159


Training Epochs:  20%|██        | 10/50 [00:07<00:28,  1.39it/s]

Epoch 10/50, Loss: 0.1093


Training Epochs:  22%|██▏       | 11/50 [00:07<00:27,  1.42it/s]

Epoch 11/50, Loss: 0.0927


Training Epochs:  24%|██▍       | 12/50 [00:08<00:26,  1.44it/s]

Epoch 12/50, Loss: 0.0882


Training Epochs:  26%|██▌       | 13/50 [00:09<00:25,  1.44it/s]

Epoch 13/50, Loss: 0.0871


Training Epochs:  28%|██▊       | 14/50 [00:09<00:24,  1.45it/s]

Epoch 14/50, Loss: 0.0856


Training Epochs:  30%|███       | 15/50 [00:10<00:26,  1.31it/s]

Epoch 15/50, Loss: 0.0857


Training Epochs:  32%|███▏      | 16/50 [00:11<00:28,  1.18it/s]

Epoch 16/50, Loss: 0.0841


Training Epochs:  34%|███▍      | 17/50 [00:12<00:29,  1.11it/s]

Epoch 17/50, Loss: 0.0827


Training Epochs:  36%|███▌      | 18/50 [00:13<00:28,  1.13it/s]

Epoch 18/50, Loss: 0.0838


Training Epochs:  38%|███▊      | 19/50 [00:14<00:25,  1.21it/s]

Epoch 19/50, Loss: 0.0817


Training Epochs:  40%|████      | 20/50 [00:15<00:23,  1.28it/s]

Epoch 20/50, Loss: 0.0803


Training Epochs:  42%|████▏     | 21/50 [00:15<00:22,  1.31it/s]

Epoch 21/50, Loss: 0.0780


Training Epochs:  44%|████▍     | 22/50 [00:16<00:20,  1.34it/s]

Epoch 22/50, Loss: 0.0778


Training Epochs:  46%|████▌     | 23/50 [00:17<00:19,  1.37it/s]

Epoch 23/50, Loss: 0.0784


Training Epochs:  48%|████▊     | 24/50 [00:18<00:18,  1.38it/s]

Epoch 24/50, Loss: 0.0781


Training Epochs:  50%|█████     | 25/50 [00:18<00:17,  1.40it/s]

Epoch 25/50, Loss: 0.0774


Training Epochs:  52%|█████▏    | 26/50 [00:19<00:16,  1.42it/s]

Epoch 26/50, Loss: 0.0772


Training Epochs:  54%|█████▍    | 27/50 [00:20<00:16,  1.41it/s]

Epoch 27/50, Loss: 0.0775


Training Epochs:  56%|█████▌    | 28/50 [00:20<00:15,  1.42it/s]

Epoch 28/50, Loss: 0.0774


Training Epochs:  58%|█████▊    | 29/50 [00:21<00:14,  1.43it/s]

Epoch 29/50, Loss: 0.0772


Training Epochs:  60%|██████    | 30/50 [00:22<00:14,  1.42it/s]

Epoch 30/50, Loss: 0.0778


Training Epochs:  62%|██████▏   | 31/50 [00:22<00:13,  1.41it/s]

Epoch 31/50, Loss: 0.0768


Training Epochs:  64%|██████▍   | 32/50 [00:23<00:13,  1.37it/s]

Epoch 32/50, Loss: 0.0777


Training Epochs:  66%|██████▌   | 33/50 [00:24<00:14,  1.19it/s]

Epoch 33/50, Loss: 0.0782


Training Epochs:  68%|██████▊   | 34/50 [00:25<00:14,  1.09it/s]

Epoch 34/50, Loss: 0.0772


Training Epochs:  70%|███████   | 35/50 [00:26<00:13,  1.12it/s]

Epoch 35/50, Loss: 0.0777


Training Epochs:  72%|███████▏  | 36/50 [00:27<00:11,  1.18it/s]

Epoch 36/50, Loss: 0.0779


Training Epochs:  74%|███████▍  | 37/50 [00:28<00:10,  1.23it/s]

Epoch 37/50, Loss: 0.0773


Training Epochs:  76%|███████▌  | 38/50 [00:28<00:09,  1.26it/s]

Epoch 38/50, Loss: 0.0771


Training Epochs:  78%|███████▊  | 39/50 [00:29<00:08,  1.31it/s]

Epoch 39/50, Loss: 0.0767


Training Epochs:  80%|████████  | 40/50 [00:30<00:07,  1.33it/s]

Epoch 40/50, Loss: 0.0773


Training Epochs:  82%|████████▏ | 41/50 [00:31<00:06,  1.35it/s]

Epoch 41/50, Loss: 0.0769


Training Epochs:  84%|████████▍ | 42/50 [00:31<00:05,  1.38it/s]

Epoch 42/50, Loss: 0.0766


Training Epochs:  86%|████████▌ | 43/50 [00:32<00:05,  1.38it/s]

Epoch 43/50, Loss: 0.0765


Training Epochs:  88%|████████▊ | 44/50 [00:33<00:04,  1.40it/s]

Epoch 44/50, Loss: 0.0771


Training Epochs:  90%|█████████ | 45/50 [00:33<00:03,  1.41it/s]

Epoch 45/50, Loss: 0.0773


Training Epochs:  92%|█████████▏| 46/50 [00:34<00:02,  1.41it/s]

Epoch 46/50, Loss: 0.0765


Training Epochs:  94%|█████████▍| 47/50 [00:35<00:02,  1.41it/s]

Epoch 47/50, Loss: 0.0770


Training Epochs:  96%|█████████▌| 48/50 [00:35<00:01,  1.41it/s]

Epoch 48/50, Loss: 0.0767


Training Epochs:  98%|█████████▊| 49/50 [00:36<00:00,  1.26it/s]

Epoch 49/50, Loss: 0.0766


Training Epochs: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s]

Epoch 50/50, Loss: 0.0776





Test Accuracy: 93.49%


In [13]:
# Experiment with optimizers
for optimizer_name, optimizer_class in optimizers.items():
    print(f"\nExperimenting with Optimizer: {optimizer_name}")
    model = RNNModel(input_size, 32, num_layers=1, output_size=output_size, pooling_type='avg')
    criterion = nn.CrossEntropyLoss()
    optimizer = optimizer_class(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=50, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['optimizer'][optimizer_name] = accuracy


Experimenting with Optimizer: SGD


Training Epochs:   2%|▏         | 1/50 [00:01<00:53,  1.10s/it]

Epoch 1/50, Loss: 0.6122


Training Epochs:   4%|▍         | 2/50 [00:01<00:42,  1.14it/s]

Epoch 2/50, Loss: 0.4982


Training Epochs:   6%|▌         | 3/50 [00:02<00:37,  1.24it/s]

Epoch 3/50, Loss: 0.4228


Training Epochs:   8%|▊         | 4/50 [00:03<00:35,  1.30it/s]

Epoch 4/50, Loss: 0.3714


Training Epochs:  10%|█         | 5/50 [00:03<00:33,  1.34it/s]

Epoch 5/50, Loss: 0.3366


Training Epochs:  12%|█▏        | 6/50 [00:04<00:31,  1.39it/s]

Epoch 6/50, Loss: 0.3129


Training Epochs:  14%|█▍        | 7/50 [00:05<00:30,  1.40it/s]

Epoch 7/50, Loss: 0.2949


Training Epochs:  16%|█▌        | 8/50 [00:05<00:29,  1.43it/s]

Epoch 8/50, Loss: 0.2821


Training Epochs:  18%|█▊        | 9/50 [00:06<00:28,  1.43it/s]

Epoch 9/50, Loss: 0.2712


Training Epochs:  20%|██        | 10/50 [00:07<00:28,  1.43it/s]

Epoch 10/50, Loss: 0.2615


Training Epochs:  22%|██▏       | 11/50 [00:08<00:27,  1.43it/s]

Epoch 11/50, Loss: 0.2589


Training Epochs:  24%|██▍       | 12/50 [00:08<00:26,  1.44it/s]

Epoch 12/50, Loss: 0.2567


Training Epochs:  26%|██▌       | 13/50 [00:09<00:25,  1.44it/s]

Epoch 13/50, Loss: 0.2558


Training Epochs:  28%|██▊       | 14/50 [00:10<00:24,  1.44it/s]

Epoch 14/50, Loss: 0.2550


Training Epochs:  30%|███       | 15/50 [00:10<00:24,  1.44it/s]

Epoch 15/50, Loss: 0.2541


Training Epochs:  32%|███▏      | 16/50 [00:11<00:25,  1.32it/s]

Epoch 16/50, Loss: 0.2548


Training Epochs:  34%|███▍      | 17/50 [00:12<00:27,  1.18it/s]

Epoch 17/50, Loss: 0.2534


Training Epochs:  36%|███▌      | 18/50 [00:13<00:29,  1.09it/s]

Epoch 18/50, Loss: 0.2531


Training Epochs:  38%|███▊      | 19/50 [00:14<00:27,  1.13it/s]

Epoch 19/50, Loss: 0.2532


Training Epochs:  40%|████      | 20/50 [00:15<00:24,  1.20it/s]

Epoch 20/50, Loss: 0.2532


Training Epochs:  42%|████▏     | 21/50 [00:16<00:22,  1.27it/s]

Epoch 21/50, Loss: 0.2524


Training Epochs:  44%|████▍     | 22/50 [00:16<00:21,  1.32it/s]

Epoch 22/50, Loss: 0.2526


Training Epochs:  46%|████▌     | 23/50 [00:17<00:20,  1.34it/s]

Epoch 23/50, Loss: 0.2513


Training Epochs:  48%|████▊     | 24/50 [00:18<00:18,  1.38it/s]

Epoch 24/50, Loss: 0.2510


Training Epochs:  50%|█████     | 25/50 [00:18<00:17,  1.40it/s]

Epoch 25/50, Loss: 0.2510


Training Epochs:  52%|█████▏    | 26/50 [00:19<00:17,  1.40it/s]

Epoch 26/50, Loss: 0.2503


Training Epochs:  54%|█████▍    | 27/50 [00:20<00:16,  1.41it/s]

Epoch 27/50, Loss: 0.2518


Training Epochs:  56%|█████▌    | 28/50 [00:21<00:15,  1.41it/s]

Epoch 28/50, Loss: 0.2513


Training Epochs:  58%|█████▊    | 29/50 [00:21<00:14,  1.41it/s]

Epoch 29/50, Loss: 0.2512


Training Epochs:  60%|██████    | 30/50 [00:22<00:14,  1.43it/s]

Epoch 30/50, Loss: 0.2511


Training Epochs:  62%|██████▏   | 31/50 [00:23<00:13,  1.43it/s]

Epoch 31/50, Loss: 0.2506


Training Epochs:  64%|██████▍   | 32/50 [00:23<00:12,  1.43it/s]

Epoch 32/50, Loss: 0.2528


Training Epochs:  66%|██████▌   | 33/50 [00:24<00:12,  1.35it/s]

Epoch 33/50, Loss: 0.2508


Training Epochs:  68%|██████▊   | 34/50 [00:25<00:13,  1.17it/s]

Epoch 34/50, Loss: 0.2503


Training Epochs:  70%|███████   | 35/50 [00:26<00:13,  1.07it/s]

Epoch 35/50, Loss: 0.2505


Training Epochs:  72%|███████▏  | 36/50 [00:27<00:12,  1.11it/s]

Epoch 36/50, Loss: 0.2511


Training Epochs:  74%|███████▍  | 37/50 [00:28<00:11,  1.13it/s]

Epoch 37/50, Loss: 0.2508


Training Epochs:  76%|███████▌  | 38/50 [00:29<00:09,  1.21it/s]

Epoch 38/50, Loss: 0.2510


Training Epochs:  78%|███████▊  | 39/50 [00:29<00:08,  1.26it/s]

Epoch 39/50, Loss: 0.2507


Training Epochs:  80%|████████  | 40/50 [00:30<00:07,  1.31it/s]

Epoch 40/50, Loss: 0.2511


Training Epochs:  82%|████████▏ | 41/50 [00:31<00:06,  1.31it/s]

Epoch 41/50, Loss: 0.2502


Training Epochs:  84%|████████▍ | 42/50 [00:32<00:06,  1.33it/s]

Epoch 42/50, Loss: 0.2523


Training Epochs:  86%|████████▌ | 43/50 [00:32<00:05,  1.36it/s]

Epoch 43/50, Loss: 0.2505


Training Epochs:  88%|████████▊ | 44/50 [00:33<00:04,  1.37it/s]

Epoch 44/50, Loss: 0.2522


Training Epochs:  90%|█████████ | 45/50 [00:34<00:03,  1.38it/s]

Epoch 45/50, Loss: 0.2503


Training Epochs:  92%|█████████▏| 46/50 [00:34<00:02,  1.39it/s]

Epoch 46/50, Loss: 0.2511


Training Epochs:  94%|█████████▍| 47/50 [00:35<00:02,  1.39it/s]

Epoch 47/50, Loss: 0.2499


Training Epochs:  96%|█████████▌| 48/50 [00:36<00:01,  1.39it/s]

Epoch 48/50, Loss: 0.2510


Training Epochs:  98%|█████████▊| 49/50 [00:37<00:00,  1.38it/s]

Epoch 49/50, Loss: 0.2512


Training Epochs: 100%|██████████| 50/50 [00:38<00:00,  1.31it/s]

Epoch 50/50, Loss: 0.2527





Test Accuracy: 91.10%

Experimenting with Optimizer: RMSProp


Training Epochs:   2%|▏         | 1/50 [00:01<00:55,  1.14s/it]

Epoch 1/50, Loss: 0.2292


Training Epochs:   4%|▍         | 2/50 [00:01<00:44,  1.08it/s]

Epoch 2/50, Loss: 0.1801


Training Epochs:   6%|▌         | 3/50 [00:02<00:39,  1.18it/s]

Epoch 3/50, Loss: 0.1655


Training Epochs:   8%|▊         | 4/50 [00:03<00:37,  1.22it/s]

Epoch 4/50, Loss: 0.1547


Training Epochs:  10%|█         | 5/50 [00:04<00:35,  1.25it/s]

Epoch 5/50, Loss: 0.1430


Training Epochs:  12%|█▏        | 6/50 [00:04<00:34,  1.28it/s]

Epoch 6/50, Loss: 0.1361


Training Epochs:  14%|█▍        | 7/50 [00:05<00:33,  1.29it/s]

Epoch 7/50, Loss: 0.1267


Training Epochs:  16%|█▌        | 8/50 [00:06<00:32,  1.31it/s]

Epoch 8/50, Loss: 0.1204


Training Epochs:  18%|█▊        | 9/50 [00:07<00:31,  1.32it/s]

Epoch 9/50, Loss: 0.1157


Training Epochs:  20%|██        | 10/50 [00:07<00:30,  1.32it/s]

Epoch 10/50, Loss: 0.1090


Training Epochs:  22%|██▏       | 11/50 [00:08<00:29,  1.32it/s]

Epoch 11/50, Loss: 0.0915


Training Epochs:  24%|██▍       | 12/50 [00:09<00:28,  1.33it/s]

Epoch 12/50, Loss: 0.0889


Training Epochs:  26%|██▌       | 13/50 [00:10<00:27,  1.34it/s]

Epoch 13/50, Loss: 0.0866


Training Epochs:  28%|██▊       | 14/50 [00:10<00:27,  1.33it/s]

Epoch 14/50, Loss: 0.0850


Training Epochs:  30%|███       | 15/50 [00:11<00:29,  1.20it/s]

Epoch 15/50, Loss: 0.0839


Training Epochs:  32%|███▏      | 16/50 [00:13<00:31,  1.07it/s]

Epoch 16/50, Loss: 0.0828


Training Epochs:  34%|███▍      | 17/50 [00:14<00:33,  1.00s/it]

Epoch 17/50, Loss: 0.0838


Training Epochs:  36%|███▌      | 18/50 [00:15<00:30,  1.05it/s]

Epoch 18/50, Loss: 0.0806


Training Epochs:  38%|███▊      | 19/50 [00:15<00:27,  1.12it/s]

Epoch 19/50, Loss: 0.0804


Training Epochs:  40%|████      | 20/50 [00:16<00:25,  1.18it/s]

Epoch 20/50, Loss: 0.0788


Training Epochs:  42%|████▏     | 21/50 [00:17<00:23,  1.22it/s]

Epoch 21/50, Loss: 0.0769


Training Epochs:  44%|████▍     | 22/50 [00:18<00:22,  1.25it/s]

Epoch 22/50, Loss: 0.0762


Training Epochs:  46%|████▌     | 23/50 [00:18<00:21,  1.27it/s]

Epoch 23/50, Loss: 0.0765


Training Epochs:  48%|████▊     | 24/50 [00:19<00:21,  1.24it/s]

Epoch 24/50, Loss: 0.0760


Training Epochs:  50%|█████     | 25/50 [00:20<00:19,  1.26it/s]

Epoch 25/50, Loss: 0.0768


Training Epochs:  52%|█████▏    | 26/50 [00:21<00:18,  1.27it/s]

Epoch 26/50, Loss: 0.0762


Training Epochs:  54%|█████▍    | 27/50 [00:22<00:17,  1.29it/s]

Epoch 27/50, Loss: 0.0759


Training Epochs:  56%|█████▌    | 28/50 [00:22<00:16,  1.30it/s]

Epoch 28/50, Loss: 0.0755


Training Epochs:  58%|█████▊    | 29/50 [00:23<00:16,  1.30it/s]

Epoch 29/50, Loss: 0.0755


Training Epochs:  60%|██████    | 30/50 [00:24<00:15,  1.29it/s]

Epoch 30/50, Loss: 0.0756


Training Epochs:  62%|██████▏   | 31/50 [00:25<00:16,  1.15it/s]

Epoch 31/50, Loss: 0.0761


Training Epochs:  64%|██████▍   | 32/50 [00:26<00:17,  1.04it/s]

Epoch 32/50, Loss: 0.0759


Training Epochs:  66%|██████▌   | 33/50 [00:27<00:17,  1.03s/it]

Epoch 33/50, Loss: 0.0755


Training Epochs:  68%|██████▊   | 34/50 [00:28<00:15,  1.04it/s]

Epoch 34/50, Loss: 0.0773


Training Epochs:  70%|███████   | 35/50 [00:29<00:13,  1.10it/s]

Epoch 35/50, Loss: 0.0752


Training Epochs:  72%|███████▏  | 36/50 [00:30<00:12,  1.16it/s]

Epoch 36/50, Loss: 0.0755


Training Epochs:  74%|███████▍  | 37/50 [00:30<00:10,  1.20it/s]

Epoch 37/50, Loss: 0.0753


Training Epochs:  76%|███████▌  | 38/50 [00:31<00:09,  1.23it/s]

Epoch 38/50, Loss: 0.0755


Training Epochs:  78%|███████▊  | 39/50 [00:32<00:08,  1.26it/s]

Epoch 39/50, Loss: 0.0756


Training Epochs:  80%|████████  | 40/50 [00:33<00:07,  1.27it/s]

Epoch 40/50, Loss: 0.0755


Training Epochs:  82%|████████▏ | 41/50 [00:33<00:07,  1.28it/s]

Epoch 41/50, Loss: 0.0753


Training Epochs:  84%|████████▍ | 42/50 [00:34<00:06,  1.28it/s]

Epoch 42/50, Loss: 0.0752


Training Epochs:  86%|████████▌ | 43/50 [00:35<00:05,  1.29it/s]

Epoch 43/50, Loss: 0.0768


Training Epochs:  88%|████████▊ | 44/50 [00:36<00:04,  1.27it/s]

Epoch 44/50, Loss: 0.0750


Training Epochs:  90%|█████████ | 45/50 [00:37<00:03,  1.27it/s]

Epoch 45/50, Loss: 0.0751


Training Epochs:  92%|█████████▏| 46/50 [00:37<00:03,  1.28it/s]

Epoch 46/50, Loss: 0.0760


Training Epochs:  94%|█████████▍| 47/50 [00:39<00:02,  1.10it/s]

Epoch 47/50, Loss: 0.0750


Training Epochs:  96%|█████████▌| 48/50 [00:44<00:04,  2.16s/it]

Epoch 48/50, Loss: 0.0751


Training Epochs:  98%|█████████▊| 49/50 [00:44<00:01,  1.75s/it]

Epoch 49/50, Loss: 0.0751


Training Epochs: 100%|██████████| 50/50 [00:45<00:00,  1.09it/s]

Epoch 50/50, Loss: 0.0755





Test Accuracy: 92.83%

Experimenting with Optimizer: Adam


Training Epochs:   2%|▏         | 1/50 [00:00<00:37,  1.30it/s]

Epoch 1/50, Loss: 0.2610


Training Epochs:   4%|▍         | 2/50 [00:01<00:37,  1.28it/s]

Epoch 2/50, Loss: 0.1925


Training Epochs:   6%|▌         | 3/50 [00:02<00:37,  1.26it/s]

Epoch 3/50, Loss: 0.1732


Training Epochs:   8%|▊         | 4/50 [00:03<00:36,  1.25it/s]

Epoch 4/50, Loss: 0.1586


Training Epochs:  10%|█         | 5/50 [00:04<00:38,  1.17it/s]

Epoch 5/50, Loss: 0.1522


Training Epochs:  12%|█▏        | 6/50 [00:04<00:37,  1.19it/s]

Epoch 6/50, Loss: 0.1403


Training Epochs:  14%|█▍        | 7/50 [00:05<00:35,  1.21it/s]

Epoch 7/50, Loss: 0.1337


Training Epochs:  16%|█▌        | 8/50 [00:06<00:37,  1.12it/s]

Epoch 8/50, Loss: 0.1222


Training Epochs:  18%|█▊        | 9/50 [00:07<00:40,  1.02it/s]

Epoch 9/50, Loss: 0.1129


Training Epochs:  20%|██        | 10/50 [00:10<00:56,  1.41s/it]

Epoch 10/50, Loss: 0.1031


Training Epochs:  22%|██▏       | 11/50 [00:12<00:59,  1.53s/it]

Epoch 11/50, Loss: 0.0915


Training Epochs:  24%|██▍       | 12/50 [00:13<00:55,  1.47s/it]

Epoch 12/50, Loss: 0.0851


Training Epochs:  26%|██▌       | 13/50 [00:15<00:55,  1.51s/it]

Epoch 13/50, Loss: 0.0842


Training Epochs:  28%|██▊       | 14/50 [00:16<00:51,  1.43s/it]

Epoch 14/50, Loss: 0.0828


Training Epochs:  30%|███       | 15/50 [00:17<00:49,  1.41s/it]

Epoch 15/50, Loss: 0.0827


Training Epochs:  32%|███▏      | 16/50 [00:18<00:41,  1.23s/it]

Epoch 16/50, Loss: 0.0808


Training Epochs:  34%|███▍      | 17/50 [00:19<00:37,  1.13s/it]

Epoch 17/50, Loss: 0.0805


Training Epochs:  36%|███▌      | 18/50 [00:20<00:39,  1.24s/it]

Epoch 18/50, Loss: 0.0794


Training Epochs:  38%|███▊      | 19/50 [00:21<00:36,  1.16s/it]

Epoch 19/50, Loss: 0.0780


Training Epochs:  40%|████      | 20/50 [00:23<00:35,  1.18s/it]

Epoch 20/50, Loss: 0.0771


Training Epochs:  42%|████▏     | 21/50 [00:24<00:34,  1.18s/it]

Epoch 21/50, Loss: 0.0753


Training Epochs:  44%|████▍     | 22/50 [00:25<00:31,  1.12s/it]

Epoch 22/50, Loss: 0.0753


Training Epochs:  46%|████▌     | 23/50 [00:26<00:27,  1.03s/it]

Epoch 23/50, Loss: 0.0762


Training Epochs:  48%|████▊     | 24/50 [00:26<00:25,  1.03it/s]

Epoch 24/50, Loss: 0.0753


Training Epochs:  50%|█████     | 25/50 [00:27<00:23,  1.07it/s]

Epoch 25/50, Loss: 0.0762


Training Epochs:  52%|█████▏    | 26/50 [00:28<00:21,  1.12it/s]

Epoch 26/50, Loss: 0.0749


Training Epochs:  54%|█████▍    | 27/50 [00:29<00:19,  1.15it/s]

Epoch 27/50, Loss: 0.0758


Training Epochs:  56%|█████▌    | 28/50 [00:30<00:18,  1.18it/s]

Epoch 28/50, Loss: 0.0748


Training Epochs:  58%|█████▊    | 29/50 [00:30<00:17,  1.19it/s]

Epoch 29/50, Loss: 0.0749


Training Epochs:  60%|██████    | 30/50 [00:31<00:16,  1.21it/s]

Epoch 30/50, Loss: 0.0753


Training Epochs:  62%|██████▏   | 31/50 [00:32<00:15,  1.22it/s]

Epoch 31/50, Loss: 0.0748


Training Epochs:  64%|██████▍   | 32/50 [00:33<00:14,  1.23it/s]

Epoch 32/50, Loss: 0.0749


Training Epochs:  66%|██████▌   | 33/50 [00:34<00:14,  1.17it/s]

Epoch 33/50, Loss: 0.0746


Training Epochs:  68%|██████▊   | 34/50 [00:35<00:14,  1.08it/s]

Epoch 34/50, Loss: 0.0745


Training Epochs:  70%|███████   | 35/50 [00:36<00:15,  1.02s/it]

Epoch 35/50, Loss: 0.0750


Training Epochs:  72%|███████▏  | 36/50 [00:37<00:15,  1.10s/it]

Epoch 36/50, Loss: 0.0747


Training Epochs:  74%|███████▍  | 37/50 [00:38<00:13,  1.04s/it]

Epoch 37/50, Loss: 0.0758


Training Epochs:  76%|███████▌  | 38/50 [00:39<00:11,  1.02it/s]

Epoch 38/50, Loss: 0.0752


Training Epochs:  78%|███████▊  | 39/50 [00:40<00:10,  1.06it/s]

Epoch 39/50, Loss: 0.0744


Training Epochs:  80%|████████  | 40/50 [00:41<00:09,  1.11it/s]

Epoch 40/50, Loss: 0.0748


Training Epochs:  82%|████████▏ | 41/50 [00:42<00:07,  1.14it/s]

Epoch 41/50, Loss: 0.0745


Training Epochs:  84%|████████▍ | 42/50 [00:42<00:06,  1.17it/s]

Epoch 42/50, Loss: 0.0742


Training Epochs:  86%|████████▌ | 43/50 [00:43<00:05,  1.18it/s]

Epoch 43/50, Loss: 0.0745


Training Epochs:  88%|████████▊ | 44/50 [00:44<00:05,  1.18it/s]

Epoch 44/50, Loss: 0.0752


Training Epochs:  90%|█████████ | 45/50 [00:45<00:04,  1.19it/s]

Epoch 45/50, Loss: 0.0748


Training Epochs:  92%|█████████▏| 46/50 [00:46<00:03,  1.21it/s]

Epoch 46/50, Loss: 0.0757


Training Epochs:  94%|█████████▍| 47/50 [00:47<00:02,  1.22it/s]

Epoch 47/50, Loss: 0.0752


Training Epochs:  96%|█████████▌| 48/50 [00:47<00:01,  1.23it/s]

Epoch 48/50, Loss: 0.0746


Training Epochs:  98%|█████████▊| 49/50 [00:48<00:00,  1.11it/s]

Epoch 49/50, Loss: 0.0745


Training Epochs: 100%|██████████| 50/50 [00:50<00:00,  1.00s/it]

Epoch 50/50, Loss: 0.0748





Test Accuracy: 92.83%


In [14]:
# Experiment with epochs
for num_epochs in epochs_list:
    print(f"\nExperimenting with Epochs: {num_epochs}")
    model = RNNModel(input_size, 32, num_layers=1, output_size=output_size, pooling_type='avg')
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.01)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
    train_model(model, train_loader, optimizer, criterion, num_epochs=num_epochs, scheduler=scheduler)
    accuracy = evaluate_model(model, test_loader)
    results['epochs'][num_epochs] = accuracy


Experimenting with Epochs: 5


Training Epochs:  20%|██        | 1/5 [00:00<00:03,  1.14it/s]

Epoch 1/5, Loss: 0.2570


Training Epochs:  40%|████      | 2/5 [00:01<00:02,  1.18it/s]

Epoch 2/5, Loss: 0.1899


Training Epochs:  60%|██████    | 3/5 [00:02<00:01,  1.19it/s]

Epoch 3/5, Loss: 0.1770


Training Epochs:  80%|████████  | 4/5 [00:03<00:00,  1.20it/s]

Epoch 4/5, Loss: 0.1654


Training Epochs: 100%|██████████| 5/5 [00:04<00:00,  1.20it/s]

Epoch 5/5, Loss: 0.1524





Test Accuracy: 93.49%

Experimenting with Epochs: 50


Training Epochs:   2%|▏         | 1/50 [00:00<00:40,  1.22it/s]

Epoch 1/50, Loss: 0.2750


Training Epochs:   4%|▍         | 2/50 [00:01<00:39,  1.22it/s]

Epoch 2/50, Loss: 0.1888


Training Epochs:   6%|▌         | 3/50 [00:02<00:38,  1.23it/s]

Epoch 3/50, Loss: 0.1757


Training Epochs:   8%|▊         | 4/50 [00:03<00:37,  1.21it/s]

Epoch 4/50, Loss: 0.1609


Training Epochs:  10%|█         | 5/50 [00:04<00:37,  1.21it/s]

Epoch 5/50, Loss: 0.1515


Training Epochs:  12%|█▏        | 6/50 [00:04<00:36,  1.20it/s]

Epoch 6/50, Loss: 0.1403


Training Epochs:  14%|█▍        | 7/50 [00:06<00:39,  1.08it/s]

Epoch 7/50, Loss: 0.1304


Training Epochs:  16%|█▌        | 8/50 [00:07<00:44,  1.06s/it]

Epoch 8/50, Loss: 0.1219


Training Epochs:  18%|█▊        | 9/50 [00:08<00:48,  1.18s/it]

Epoch 9/50, Loss: 0.1124


Training Epochs:  20%|██        | 10/50 [00:09<00:43,  1.09s/it]

Epoch 10/50, Loss: 0.1124


Training Epochs:  22%|██▏       | 11/50 [00:10<00:39,  1.01s/it]

Epoch 11/50, Loss: 0.0917


Training Epochs:  24%|██▍       | 12/50 [00:11<00:36,  1.05it/s]

Epoch 12/50, Loss: 0.0902


Training Epochs:  26%|██▌       | 13/50 [00:12<00:34,  1.08it/s]

Epoch 13/50, Loss: 0.0882


Training Epochs:  28%|██▊       | 14/50 [00:13<00:32,  1.12it/s]

Epoch 14/50, Loss: 0.0911


Training Epochs:  30%|███       | 15/50 [00:13<00:30,  1.14it/s]

Epoch 15/50, Loss: 0.0860


Training Epochs:  32%|███▏      | 16/50 [00:14<00:29,  1.16it/s]

Epoch 16/50, Loss: 0.0850


Training Epochs:  34%|███▍      | 17/50 [00:15<00:28,  1.17it/s]

Epoch 17/50, Loss: 0.0843


Training Epochs:  36%|███▌      | 18/50 [00:16<00:27,  1.17it/s]

Epoch 18/50, Loss: 0.0843


Training Epochs:  38%|███▊      | 19/50 [00:17<00:26,  1.17it/s]

Epoch 19/50, Loss: 0.0832


Training Epochs:  40%|████      | 20/50 [00:18<00:25,  1.17it/s]

Epoch 20/50, Loss: 0.0823


Training Epochs:  42%|████▏     | 21/50 [00:19<00:24,  1.17it/s]

Epoch 21/50, Loss: 0.0802


Training Epochs:  44%|████▍     | 22/50 [00:20<00:27,  1.03it/s]

Epoch 22/50, Loss: 0.0809


Training Epochs:  46%|████▌     | 23/50 [00:21<00:29,  1.09s/it]

Epoch 23/50, Loss: 0.0798


Training Epochs:  48%|████▊     | 24/50 [00:22<00:27,  1.06s/it]

Epoch 24/50, Loss: 0.0804


Training Epochs:  50%|█████     | 25/50 [00:23<00:24,  1.01it/s]

Epoch 25/50, Loss: 0.0806


Training Epochs:  52%|█████▏    | 26/50 [00:24<00:22,  1.06it/s]

Epoch 26/50, Loss: 0.0815


Training Epochs:  54%|█████▍    | 27/50 [00:25<00:21,  1.09it/s]

Epoch 27/50, Loss: 0.0825


Training Epochs:  56%|█████▌    | 28/50 [00:25<00:19,  1.11it/s]

Epoch 28/50, Loss: 0.0793


Training Epochs:  58%|█████▊    | 29/50 [00:26<00:18,  1.13it/s]

Epoch 29/50, Loss: 0.0801


Training Epochs:  60%|██████    | 30/50 [00:27<00:17,  1.15it/s]

Epoch 30/50, Loss: 0.0792


Training Epochs:  62%|██████▏   | 31/50 [00:28<00:16,  1.16it/s]

Epoch 31/50, Loss: 0.0792


Training Epochs:  64%|██████▍   | 32/50 [00:29<00:15,  1.17it/s]

Epoch 32/50, Loss: 0.0800


Training Epochs:  66%|██████▌   | 33/50 [00:30<00:14,  1.17it/s]

Epoch 33/50, Loss: 0.0790


Training Epochs:  68%|██████▊   | 34/50 [00:31<00:13,  1.17it/s]

Epoch 34/50, Loss: 0.0788


Training Epochs:  70%|███████   | 35/50 [00:31<00:12,  1.16it/s]

Epoch 35/50, Loss: 0.0790


Training Epochs:  72%|███████▏  | 36/50 [00:33<00:13,  1.02it/s]

Epoch 36/50, Loss: 0.0798


Training Epochs:  74%|███████▍  | 37/50 [00:34<00:14,  1.09s/it]

Epoch 37/50, Loss: 0.0799


Training Epochs:  76%|███████▌  | 38/50 [00:35<00:12,  1.07s/it]

Epoch 38/50, Loss: 0.0792


Training Epochs:  78%|███████▊  | 39/50 [00:36<00:11,  1.01s/it]

Epoch 39/50, Loss: 0.0789


Training Epochs:  80%|████████  | 40/50 [00:37<00:09,  1.04it/s]

Epoch 40/50, Loss: 0.0791


Training Epochs:  82%|████████▏ | 41/50 [00:38<00:08,  1.08it/s]

Epoch 41/50, Loss: 0.0820


Training Epochs:  84%|████████▍ | 42/50 [00:39<00:07,  1.08it/s]

Epoch 42/50, Loss: 0.0794


Training Epochs:  86%|████████▌ | 43/50 [00:39<00:06,  1.11it/s]

Epoch 43/50, Loss: 0.0793


Training Epochs:  88%|████████▊ | 44/50 [00:40<00:05,  1.12it/s]

Epoch 44/50, Loss: 0.0791


Training Epochs:  90%|█████████ | 45/50 [00:41<00:04,  1.14it/s]

Epoch 45/50, Loss: 0.0799


Training Epochs:  92%|█████████▏| 46/50 [00:42<00:03,  1.15it/s]

Epoch 46/50, Loss: 0.0797


Training Epochs:  94%|█████████▍| 47/50 [00:43<00:02,  1.15it/s]

Epoch 47/50, Loss: 0.0791


Training Epochs:  96%|█████████▌| 48/50 [00:44<00:01,  1.15it/s]

Epoch 48/50, Loss: 0.0799


Training Epochs:  98%|█████████▊| 49/50 [00:45<00:00,  1.13it/s]

Epoch 49/50, Loss: 0.0808


Training Epochs: 100%|██████████| 50/50 [00:46<00:00,  1.07it/s]

Epoch 50/50, Loss: 0.0791





Test Accuracy: 93.27%

Experimenting with Epochs: 100


Training Epochs:   1%|          | 1/100 [00:00<01:32,  1.07it/s]

Epoch 1/100, Loss: 0.2665


Training Epochs:   2%|▏         | 2/100 [00:01<01:27,  1.12it/s]

Epoch 2/100, Loss: 0.1874


Training Epochs:   3%|▎         | 3/100 [00:02<01:25,  1.14it/s]

Epoch 3/100, Loss: 0.1743


Training Epochs:   4%|▍         | 4/100 [00:03<01:23,  1.14it/s]

Epoch 4/100, Loss: 0.1644


Training Epochs:   5%|▌         | 5/100 [00:04<01:22,  1.15it/s]

Epoch 5/100, Loss: 0.1512


Training Epochs:   6%|▌         | 6/100 [00:05<01:22,  1.15it/s]

Epoch 6/100, Loss: 0.1461


Training Epochs:   7%|▋         | 7/100 [00:06<01:20,  1.15it/s]

Epoch 7/100, Loss: 0.1309


Training Epochs:   8%|▊         | 8/100 [00:07<01:20,  1.14it/s]

Epoch 8/100, Loss: 0.1215


Training Epochs:   9%|▉         | 9/100 [00:07<01:20,  1.14it/s]

Epoch 9/100, Loss: 0.1161


Training Epochs:  10%|█         | 10/100 [00:08<01:19,  1.14it/s]

Epoch 10/100, Loss: 0.1081


Training Epochs:  11%|█         | 11/100 [00:09<01:18,  1.13it/s]

Epoch 11/100, Loss: 0.0931


Training Epochs:  12%|█▏        | 12/100 [00:10<01:21,  1.08it/s]

Epoch 12/100, Loss: 0.0885


Training Epochs:  13%|█▎        | 13/100 [00:12<01:44,  1.20s/it]

Epoch 13/100, Loss: 0.0867


Training Epochs:  14%|█▍        | 14/100 [00:13<01:39,  1.15s/it]

Epoch 14/100, Loss: 0.0860


Training Epochs:  15%|█▌        | 15/100 [00:14<01:30,  1.06s/it]

Epoch 15/100, Loss: 0.0854


Training Epochs:  16%|█▌        | 16/100 [00:15<01:24,  1.01s/it]

Epoch 16/100, Loss: 0.0847


Training Epochs:  17%|█▋        | 17/100 [00:16<01:20,  1.03it/s]

Epoch 17/100, Loss: 0.0839


Training Epochs:  18%|█▊        | 18/100 [00:17<01:17,  1.05it/s]

Epoch 18/100, Loss: 0.0827


Training Epochs:  19%|█▉        | 19/100 [00:17<01:15,  1.08it/s]

Epoch 19/100, Loss: 0.0820


Training Epochs:  20%|██        | 20/100 [00:18<01:12,  1.10it/s]

Epoch 20/100, Loss: 0.0816


Training Epochs:  21%|██        | 21/100 [00:19<01:11,  1.11it/s]

Epoch 21/100, Loss: 0.0797


Training Epochs:  22%|██▏       | 22/100 [00:20<01:09,  1.12it/s]

Epoch 22/100, Loss: 0.0800


Training Epochs:  23%|██▎       | 23/100 [00:21<01:10,  1.10it/s]

Epoch 23/100, Loss: 0.0787


Training Epochs:  24%|██▍       | 24/100 [00:22<01:08,  1.11it/s]

Epoch 24/100, Loss: 0.0788


Training Epochs:  25%|██▌       | 25/100 [00:23<01:11,  1.05it/s]

Epoch 25/100, Loss: 0.0794


Training Epochs:  26%|██▌       | 26/100 [00:24<01:20,  1.08s/it]

Epoch 26/100, Loss: 0.0791


Training Epochs:  27%|██▋       | 27/100 [00:26<01:24,  1.16s/it]

Epoch 27/100, Loss: 0.0784


Training Epochs:  28%|██▊       | 28/100 [00:27<01:18,  1.09s/it]

Epoch 28/100, Loss: 0.0785


Training Epochs:  29%|██▉       | 29/100 [00:28<01:13,  1.04s/it]

Epoch 29/100, Loss: 0.0790


Training Epochs:  30%|███       | 30/100 [00:29<01:15,  1.08s/it]

Epoch 30/100, Loss: 0.0783


Training Epochs:  31%|███       | 31/100 [00:30<01:15,  1.10s/it]

Epoch 31/100, Loss: 0.0785


Training Epochs:  32%|███▏      | 32/100 [00:31<01:11,  1.05s/it]

Epoch 32/100, Loss: 0.0781


Training Epochs:  33%|███▎      | 33/100 [00:32<01:06,  1.00it/s]

Epoch 33/100, Loss: 0.0787


Training Epochs:  34%|███▍      | 34/100 [00:33<01:04,  1.02it/s]

Epoch 34/100, Loss: 0.0781


Training Epochs:  35%|███▌      | 35/100 [00:34<01:02,  1.04it/s]

Epoch 35/100, Loss: 0.0782


Training Epochs:  36%|███▌      | 36/100 [00:34<01:00,  1.06it/s]

Epoch 36/100, Loss: 0.0788


Training Epochs:  37%|███▋      | 37/100 [00:35<00:58,  1.08it/s]

Epoch 37/100, Loss: 0.0795


Training Epochs:  38%|███▊      | 38/100 [00:36<01:01,  1.01it/s]

Epoch 38/100, Loss: 0.0782


Training Epochs:  39%|███▉      | 39/100 [00:38<01:07,  1.11s/it]

Epoch 39/100, Loss: 0.0779


Training Epochs:  40%|████      | 40/100 [00:39<01:12,  1.21s/it]

Epoch 40/100, Loss: 0.0778


Training Epochs:  41%|████      | 41/100 [00:40<01:07,  1.15s/it]

Epoch 41/100, Loss: 0.0783


Training Epochs:  42%|████▏     | 42/100 [00:41<01:01,  1.07s/it]

Epoch 42/100, Loss: 0.0788


Training Epochs:  43%|████▎     | 43/100 [00:42<00:57,  1.02s/it]

Epoch 43/100, Loss: 0.0779


Training Epochs:  44%|████▍     | 44/100 [00:43<00:55,  1.02it/s]

Epoch 44/100, Loss: 0.0782


Training Epochs:  45%|████▌     | 45/100 [00:44<00:52,  1.05it/s]

Epoch 45/100, Loss: 0.0781


Training Epochs:  46%|████▌     | 46/100 [00:45<00:50,  1.07it/s]

Epoch 46/100, Loss: 0.0789


Training Epochs:  47%|████▋     | 47/100 [00:46<00:49,  1.08it/s]

Epoch 47/100, Loss: 0.0784


Training Epochs:  48%|████▊     | 48/100 [00:47<00:48,  1.08it/s]

Epoch 48/100, Loss: 0.0779


Training Epochs:  49%|████▉     | 49/100 [00:48<00:46,  1.09it/s]

Epoch 49/100, Loss: 0.0777


Training Epochs:  50%|█████     | 50/100 [00:48<00:46,  1.08it/s]

Epoch 50/100, Loss: 0.0787


Training Epochs:  51%|█████     | 51/100 [00:49<00:45,  1.08it/s]

Epoch 51/100, Loss: 0.0782


Training Epochs:  52%|█████▏    | 52/100 [00:51<00:50,  1.05s/it]

Epoch 52/100, Loss: 0.0812


Training Epochs:  53%|█████▎    | 53/100 [00:52<00:54,  1.16s/it]

Epoch 53/100, Loss: 0.0782


Training Epochs:  54%|█████▍    | 54/100 [00:54<00:57,  1.25s/it]

Epoch 54/100, Loss: 0.0788


Training Epochs:  55%|█████▌    | 55/100 [00:55<00:52,  1.16s/it]

Epoch 55/100, Loss: 0.0780


Training Epochs:  56%|█████▌    | 56/100 [00:56<00:48,  1.09s/it]

Epoch 56/100, Loss: 0.0777


Training Epochs:  57%|█████▋    | 57/100 [00:56<00:45,  1.05s/it]

Epoch 57/100, Loss: 0.0781


Training Epochs:  58%|█████▊    | 58/100 [00:57<00:42,  1.01s/it]

Epoch 58/100, Loss: 0.0781


Training Epochs:  59%|█████▉    | 59/100 [00:58<00:40,  1.01it/s]

Epoch 59/100, Loss: 0.0787


Training Epochs:  60%|██████    | 60/100 [00:59<00:38,  1.03it/s]

Epoch 60/100, Loss: 0.0782


Training Epochs:  61%|██████    | 61/100 [01:00<00:37,  1.04it/s]

Epoch 61/100, Loss: 0.0779


Training Epochs:  62%|██████▏   | 62/100 [01:01<00:36,  1.05it/s]

Epoch 62/100, Loss: 0.0788


Training Epochs:  63%|██████▎   | 63/100 [01:02<00:34,  1.06it/s]

Epoch 63/100, Loss: 0.0781


Training Epochs:  64%|██████▍   | 64/100 [01:03<00:33,  1.07it/s]

Epoch 64/100, Loss: 0.0781


Training Epochs:  65%|██████▌   | 65/100 [01:04<00:32,  1.06it/s]

Epoch 65/100, Loss: 0.0787


Training Epochs:  66%|██████▌   | 66/100 [01:05<00:36,  1.07s/it]

Epoch 66/100, Loss: 0.0780


Training Epochs:  67%|██████▋   | 67/100 [01:07<00:38,  1.17s/it]

Epoch 67/100, Loss: 0.0779


Training Epochs:  68%|██████▊   | 68/100 [01:08<00:35,  1.11s/it]

Epoch 68/100, Loss: 0.0778


Training Epochs:  69%|██████▉   | 69/100 [01:09<00:32,  1.05s/it]

Epoch 69/100, Loss: 0.0780


Training Epochs:  70%|███████   | 70/100 [01:09<00:30,  1.01s/it]

Epoch 70/100, Loss: 0.0781


Training Epochs:  71%|███████   | 71/100 [01:10<00:28,  1.01it/s]

Epoch 71/100, Loss: 0.0781


Training Epochs:  72%|███████▏  | 72/100 [01:11<00:27,  1.02it/s]

Epoch 72/100, Loss: 0.0783


Training Epochs:  73%|███████▎  | 73/100 [01:12<00:26,  1.03it/s]

Epoch 73/100, Loss: 0.0786


Training Epochs:  74%|███████▍  | 74/100 [01:13<00:24,  1.05it/s]

Epoch 74/100, Loss: 0.0786


Training Epochs:  75%|███████▌  | 75/100 [01:14<00:23,  1.06it/s]

Epoch 75/100, Loss: 0.0779


Training Epochs:  76%|███████▌  | 76/100 [01:15<00:22,  1.07it/s]

Epoch 76/100, Loss: 0.0780


Training Epochs:  77%|███████▋  | 77/100 [01:16<00:21,  1.07it/s]

Epoch 77/100, Loss: 0.0783


Training Epochs:  78%|███████▊  | 78/100 [01:17<00:20,  1.06it/s]

Epoch 78/100, Loss: 0.0779


Training Epochs:  79%|███████▉  | 79/100 [01:18<00:22,  1.09s/it]

Epoch 79/100, Loss: 0.0780


Training Epochs:  80%|████████  | 80/100 [01:20<00:23,  1.20s/it]

Epoch 80/100, Loss: 0.0778


Training Epochs:  81%|████████  | 81/100 [01:21<00:21,  1.14s/it]

Epoch 81/100, Loss: 0.0781


Training Epochs:  82%|████████▏ | 82/100 [01:22<00:19,  1.09s/it]

Epoch 82/100, Loss: 0.0782


Training Epochs:  83%|████████▎ | 83/100 [01:23<00:17,  1.05s/it]

Epoch 83/100, Loss: 0.0778


Training Epochs:  84%|████████▍ | 84/100 [01:24<00:16,  1.01s/it]

Epoch 84/100, Loss: 0.0779


Training Epochs:  85%|████████▌ | 85/100 [01:25<00:14,  1.02it/s]

Epoch 85/100, Loss: 0.0786


Training Epochs:  86%|████████▌ | 86/100 [01:26<00:13,  1.04it/s]

Epoch 86/100, Loss: 0.0783


Training Epochs:  87%|████████▋ | 87/100 [01:26<00:12,  1.04it/s]

Epoch 87/100, Loss: 0.0779


Training Epochs:  88%|████████▊ | 88/100 [01:27<00:11,  1.05it/s]

Epoch 88/100, Loss: 0.0796


Training Epochs:  89%|████████▉ | 89/100 [01:28<00:10,  1.06it/s]

Epoch 89/100, Loss: 0.0781


Training Epochs:  90%|█████████ | 90/100 [01:29<00:09,  1.06it/s]

Epoch 90/100, Loss: 0.0781


Training Epochs:  91%|█████████ | 91/100 [01:30<00:08,  1.04it/s]

Epoch 91/100, Loss: 0.0780


Training Epochs:  92%|█████████▏| 92/100 [01:32<00:08,  1.10s/it]

Epoch 92/100, Loss: 0.0791


Training Epochs:  93%|█████████▎| 93/100 [01:33<00:08,  1.21s/it]

Epoch 93/100, Loss: 0.0781


Training Epochs:  94%|█████████▍| 94/100 [01:34<00:06,  1.15s/it]

Epoch 94/100, Loss: 0.0779


Training Epochs:  95%|█████████▌| 95/100 [01:35<00:05,  1.09s/it]

Epoch 95/100, Loss: 0.0778


Training Epochs:  96%|█████████▌| 96/100 [01:36<00:04,  1.05s/it]

Epoch 96/100, Loss: 0.0779


Training Epochs:  97%|█████████▋| 97/100 [01:37<00:03,  1.01s/it]

Epoch 97/100, Loss: 0.0781


Training Epochs:  98%|█████████▊| 98/100 [01:38<00:01,  1.00it/s]

Epoch 98/100, Loss: 0.0784


Training Epochs:  99%|█████████▉| 99/100 [01:39<00:00,  1.02it/s]

Epoch 99/100, Loss: 0.0783


Training Epochs: 100%|██████████| 100/100 [01:40<00:00,  1.00s/it]

Epoch 100/100, Loss: 0.0781





Test Accuracy: 93.27%

Experimenting with Epochs: 250


Training Epochs:   0%|          | 1/250 [00:00<03:52,  1.07it/s]

Epoch 1/250, Loss: 0.2833


Training Epochs:   1%|          | 2/250 [00:01<03:50,  1.08it/s]

Epoch 2/250, Loss: 0.1949


Training Epochs:   1%|          | 3/250 [00:02<03:51,  1.07it/s]

Epoch 3/250, Loss: 0.1781


Training Epochs:   2%|▏         | 4/250 [00:04<04:47,  1.17s/it]

Epoch 4/250, Loss: 0.1672


Training Epochs:   2%|▏         | 5/250 [00:05<05:13,  1.28s/it]

Epoch 5/250, Loss: 0.1568


Training Epochs:   2%|▏         | 6/250 [00:06<04:51,  1.19s/it]

Epoch 6/250, Loss: 0.1410


Training Epochs:   3%|▎         | 7/250 [00:07<04:32,  1.12s/it]

Epoch 7/250, Loss: 0.1367


Training Epochs:   3%|▎         | 8/250 [00:08<04:17,  1.07s/it]

Epoch 8/250, Loss: 0.1242


Training Epochs:   4%|▎         | 9/250 [00:09<04:07,  1.03s/it]

Epoch 9/250, Loss: 0.1165


Training Epochs:   4%|▍         | 10/250 [00:10<04:00,  1.00s/it]

Epoch 10/250, Loss: 0.1092


Training Epochs:   4%|▍         | 11/250 [00:11<04:06,  1.03s/it]

Epoch 11/250, Loss: 0.0898


Training Epochs:   5%|▍         | 12/250 [00:12<03:59,  1.00s/it]

Epoch 12/250, Loss: 0.0868


Training Epochs:   5%|▌         | 13/250 [00:13<03:55,  1.01it/s]

Epoch 13/250, Loss: 0.0848


Training Epochs:   6%|▌         | 14/250 [00:14<03:51,  1.02it/s]

Epoch 14/250, Loss: 0.0836


Training Epochs:   6%|▌         | 15/250 [00:15<03:48,  1.03it/s]

Epoch 15/250, Loss: 0.0828


Training Epochs:   6%|▋         | 16/250 [00:16<04:06,  1.05s/it]

Epoch 16/250, Loss: 0.0808


Training Epochs:   7%|▋         | 17/250 [00:18<04:37,  1.19s/it]

Epoch 17/250, Loss: 0.0795


Training Epochs:   7%|▋         | 18/250 [00:19<04:35,  1.19s/it]

Epoch 18/250, Loss: 0.0807


Training Epochs:   8%|▊         | 19/250 [00:20<04:19,  1.12s/it]

Epoch 19/250, Loss: 0.0784


Training Epochs:   8%|▊         | 20/250 [00:21<04:06,  1.07s/it]

Epoch 20/250, Loss: 0.0768


Training Epochs:   8%|▊         | 21/250 [00:22<03:57,  1.04s/it]

Epoch 21/250, Loss: 0.0756


Training Epochs:   9%|▉         | 22/250 [00:23<03:50,  1.01s/it]

Epoch 22/250, Loss: 0.0748


Training Epochs:   9%|▉         | 23/250 [00:24<03:44,  1.01it/s]

Epoch 23/250, Loss: 0.0753


Training Epochs:  10%|▉         | 24/250 [00:25<03:42,  1.02it/s]

Epoch 24/250, Loss: 0.0750


Training Epochs:  10%|█         | 25/250 [00:26<03:40,  1.02it/s]

Epoch 25/250, Loss: 0.0746


Training Epochs:  10%|█         | 26/250 [00:27<03:39,  1.02it/s]

Epoch 26/250, Loss: 0.0748


Training Epochs:  11%|█         | 27/250 [00:28<03:36,  1.03it/s]

Epoch 27/250, Loss: 0.0744


Training Epochs:  11%|█         | 28/250 [00:29<03:39,  1.01it/s]

Epoch 28/250, Loss: 0.0748


Training Epochs:  12%|█▏        | 29/250 [00:30<04:19,  1.18s/it]

Epoch 29/250, Loss: 0.0743


Training Epochs:  12%|█▏        | 30/250 [00:32<04:45,  1.30s/it]

Epoch 30/250, Loss: 0.0741


Training Epochs:  12%|█▏        | 31/250 [00:33<04:27,  1.22s/it]

Epoch 31/250, Loss: 0.0748


Training Epochs:  13%|█▎        | 32/250 [00:34<04:10,  1.15s/it]

Epoch 32/250, Loss: 0.0739


Training Epochs:  13%|█▎        | 33/250 [00:35<03:59,  1.10s/it]

Epoch 33/250, Loss: 0.0738


Training Epochs:  14%|█▎        | 34/250 [00:36<03:52,  1.08s/it]

Epoch 34/250, Loss: 0.0765


Training Epochs:  14%|█▍        | 35/250 [00:37<03:45,  1.05s/it]

Epoch 35/250, Loss: 0.0744


Training Epochs:  14%|█▍        | 36/250 [00:38<03:40,  1.03s/it]

Epoch 36/250, Loss: 0.0747


Training Epochs:  15%|█▍        | 37/250 [00:39<03:35,  1.01s/it]

Epoch 37/250, Loss: 0.0737


Training Epochs:  15%|█▌        | 38/250 [00:40<03:33,  1.01s/it]

Epoch 38/250, Loss: 0.0742


Training Epochs:  16%|█▌        | 39/250 [00:41<03:31,  1.00s/it]

Epoch 39/250, Loss: 0.0738


Training Epochs:  16%|█▌        | 40/250 [00:42<03:43,  1.07s/it]

Epoch 40/250, Loss: 0.0738


Training Epochs:  16%|█▋        | 41/250 [00:44<04:09,  1.19s/it]

Epoch 41/250, Loss: 0.0742


Training Epochs:  17%|█▋        | 42/250 [00:45<04:29,  1.29s/it]

Epoch 42/250, Loss: 0.0742


Training Epochs:  17%|█▋        | 43/250 [00:46<04:17,  1.24s/it]

Epoch 43/250, Loss: 0.0738


Training Epochs:  18%|█▊        | 44/250 [00:47<03:58,  1.16s/it]

Epoch 44/250, Loss: 0.0737


Training Epochs:  18%|█▊        | 45/250 [00:48<03:46,  1.10s/it]

Epoch 45/250, Loss: 0.0745


Training Epochs:  18%|█▊        | 46/250 [00:49<03:36,  1.06s/it]

Epoch 46/250, Loss: 0.0740


Training Epochs:  19%|█▉        | 47/250 [00:50<03:30,  1.04s/it]

Epoch 47/250, Loss: 0.0736


Training Epochs:  19%|█▉        | 48/250 [00:51<03:26,  1.02s/it]

Epoch 48/250, Loss: 0.0743


Training Epochs:  20%|█▉        | 49/250 [00:52<03:24,  1.02s/it]

Epoch 49/250, Loss: 0.0746


Training Epochs:  20%|██        | 50/250 [00:53<03:21,  1.01s/it]

Epoch 50/250, Loss: 0.0741


Training Epochs:  20%|██        | 51/250 [00:54<03:20,  1.01s/it]

Epoch 51/250, Loss: 0.0738


Training Epochs:  21%|██        | 52/250 [00:55<03:20,  1.01s/it]

Epoch 52/250, Loss: 0.0741


Training Epochs:  21%|██        | 53/250 [00:56<03:41,  1.12s/it]

Epoch 53/250, Loss: 0.0745


Training Epochs:  22%|██▏       | 54/250 [00:58<04:07,  1.26s/it]

Epoch 54/250, Loss: 0.0746


Training Epochs:  22%|██▏       | 55/250 [00:59<03:56,  1.21s/it]

Epoch 55/250, Loss: 0.0737


Training Epochs:  22%|██▏       | 56/250 [01:00<03:41,  1.14s/it]

Epoch 56/250, Loss: 0.0739


Training Epochs:  23%|██▎       | 57/250 [01:01<03:31,  1.10s/it]

Epoch 57/250, Loss: 0.0740


Training Epochs:  23%|██▎       | 58/250 [01:02<03:25,  1.07s/it]

Epoch 58/250, Loss: 0.0737


Training Epochs:  24%|██▎       | 59/250 [01:03<03:20,  1.05s/it]

Epoch 59/250, Loss: 0.0738


Training Epochs:  24%|██▍       | 60/250 [01:04<03:17,  1.04s/it]

Epoch 60/250, Loss: 0.0746


Training Epochs:  24%|██▍       | 61/250 [01:05<03:14,  1.03s/it]

Epoch 61/250, Loss: 0.0738


Training Epochs:  25%|██▍       | 62/250 [01:06<03:12,  1.02s/it]

Epoch 62/250, Loss: 0.0737


Training Epochs:  25%|██▌       | 63/250 [01:07<03:12,  1.03s/it]

Epoch 63/250, Loss: 0.0740


Training Epochs:  26%|██▌       | 64/250 [01:08<03:10,  1.03s/it]

Epoch 64/250, Loss: 0.0740


Training Epochs:  26%|██▌       | 65/250 [01:10<03:31,  1.14s/it]

Epoch 65/250, Loss: 0.0737


Training Epochs:  26%|██▋       | 66/250 [01:11<03:52,  1.26s/it]

Epoch 66/250, Loss: 0.0736


Training Epochs:  27%|██▋       | 67/250 [01:12<03:50,  1.26s/it]

Epoch 67/250, Loss: 0.0742


Training Epochs:  27%|██▋       | 68/250 [01:13<03:35,  1.19s/it]

Epoch 68/250, Loss: 0.0748


Training Epochs:  28%|██▊       | 69/250 [01:14<03:25,  1.13s/it]

Epoch 69/250, Loss: 0.0738


Training Epochs:  28%|██▊       | 70/250 [01:15<03:18,  1.10s/it]

Epoch 70/250, Loss: 0.0736


Training Epochs:  28%|██▊       | 71/250 [01:16<03:13,  1.08s/it]

Epoch 71/250, Loss: 0.0741


Training Epochs:  29%|██▉       | 72/250 [01:17<03:08,  1.06s/it]

Epoch 72/250, Loss: 0.0748


Training Epochs:  29%|██▉       | 73/250 [01:19<03:04,  1.04s/it]

Epoch 73/250, Loss: 0.0743


Training Epochs:  30%|██▉       | 74/250 [01:20<03:01,  1.03s/it]

Epoch 74/250, Loss: 0.0742


Training Epochs:  30%|███       | 75/250 [01:21<02:59,  1.03s/it]

Epoch 75/250, Loss: 0.0737


Training Epochs:  30%|███       | 76/250 [01:22<03:01,  1.04s/it]

Epoch 76/250, Loss: 0.0737


Training Epochs:  31%|███       | 77/250 [01:23<03:29,  1.21s/it]

Epoch 77/250, Loss: 0.0739


Training Epochs:  31%|███       | 78/250 [01:25<03:50,  1.34s/it]

Epoch 78/250, Loss: 0.0739


Training Epochs:  32%|███▏      | 79/250 [01:26<03:33,  1.25s/it]

Epoch 79/250, Loss: 0.0740


Training Epochs:  32%|███▏      | 80/250 [01:27<03:20,  1.18s/it]

Epoch 80/250, Loss: 0.0738


Training Epochs:  32%|███▏      | 81/250 [01:28<03:10,  1.13s/it]

Epoch 81/250, Loss: 0.0740


Training Epochs:  33%|███▎      | 82/250 [01:29<03:04,  1.10s/it]

Epoch 82/250, Loss: 0.0737


Training Epochs:  33%|███▎      | 83/250 [01:30<02:58,  1.07s/it]

Epoch 83/250, Loss: 0.0742


Training Epochs:  34%|███▎      | 84/250 [01:31<02:56,  1.06s/it]

Epoch 84/250, Loss: 0.0744


Training Epochs:  34%|███▍      | 85/250 [01:32<02:53,  1.05s/it]

Epoch 85/250, Loss: 0.0743


Training Epochs:  34%|███▍      | 86/250 [01:33<02:51,  1.05s/it]

Epoch 86/250, Loss: 0.0737


Training Epochs:  35%|███▍      | 87/250 [01:34<02:49,  1.04s/it]

Epoch 87/250, Loss: 0.0742


Training Epochs:  35%|███▌      | 88/250 [01:35<02:51,  1.06s/it]

Epoch 88/250, Loss: 0.0738


Training Epochs:  36%|███▌      | 89/250 [01:37<03:17,  1.23s/it]

Epoch 89/250, Loss: 0.0737


Training Epochs:  36%|███▌      | 90/250 [01:38<03:30,  1.31s/it]

Epoch 90/250, Loss: 0.0745


Training Epochs:  36%|███▋      | 91/250 [01:39<03:16,  1.24s/it]

Epoch 91/250, Loss: 0.0738


Training Epochs:  37%|███▋      | 92/250 [01:40<03:05,  1.17s/it]

Epoch 92/250, Loss: 0.0738


Training Epochs:  37%|███▋      | 93/250 [01:41<02:59,  1.14s/it]

Epoch 93/250, Loss: 0.0741


Training Epochs:  38%|███▊      | 94/250 [01:43<02:54,  1.12s/it]

Epoch 94/250, Loss: 0.0736


Training Epochs:  38%|███▊      | 95/250 [01:44<02:48,  1.08s/it]

Epoch 95/250, Loss: 0.0737


Training Epochs:  38%|███▊      | 96/250 [01:45<02:44,  1.07s/it]

Epoch 96/250, Loss: 0.0745


Training Epochs:  39%|███▉      | 97/250 [01:46<02:41,  1.06s/it]

Epoch 97/250, Loss: 0.0738


Training Epochs:  39%|███▉      | 98/250 [01:47<02:38,  1.04s/it]

Epoch 98/250, Loss: 0.0739


Training Epochs:  40%|███▉      | 99/250 [01:48<02:35,  1.03s/it]

Epoch 99/250, Loss: 0.0741


Training Epochs:  40%|████      | 100/250 [01:49<02:35,  1.04s/it]

Epoch 100/250, Loss: 0.0736


Training Epochs:  40%|████      | 101/250 [01:50<02:57,  1.19s/it]

Epoch 101/250, Loss: 0.0741


Training Epochs:  41%|████      | 102/250 [01:52<03:13,  1.31s/it]

Epoch 102/250, Loss: 0.0738


Training Epochs:  41%|████      | 103/250 [01:53<03:01,  1.23s/it]

Epoch 103/250, Loss: 0.0744


Training Epochs:  42%|████▏     | 104/250 [01:54<02:50,  1.16s/it]

Epoch 104/250, Loss: 0.0740


Training Epochs:  42%|████▏     | 105/250 [01:55<02:41,  1.11s/it]

Epoch 105/250, Loss: 0.0736


Training Epochs:  42%|████▏     | 106/250 [01:56<02:35,  1.08s/it]

Epoch 106/250, Loss: 0.0740


Training Epochs:  43%|████▎     | 107/250 [01:57<02:31,  1.06s/it]

Epoch 107/250, Loss: 0.0741


Training Epochs:  43%|████▎     | 108/250 [01:58<02:28,  1.05s/it]

Epoch 108/250, Loss: 0.0740


Training Epochs:  44%|████▎     | 109/250 [01:59<02:26,  1.04s/it]

Epoch 109/250, Loss: 0.0744


Training Epochs:  44%|████▍     | 110/250 [02:00<02:23,  1.03s/it]

Epoch 110/250, Loss: 0.0757


Training Epochs:  44%|████▍     | 111/250 [02:01<02:22,  1.02s/it]

Epoch 111/250, Loss: 0.0751


Training Epochs:  45%|████▍     | 112/250 [02:02<02:20,  1.02s/it]

Epoch 112/250, Loss: 0.0736


Training Epochs:  45%|████▌     | 113/250 [02:03<02:40,  1.17s/it]

Epoch 113/250, Loss: 0.0744


Training Epochs:  46%|████▌     | 114/250 [02:05<02:58,  1.31s/it]

Epoch 114/250, Loss: 0.0744


Training Epochs:  46%|████▌     | 115/250 [02:06<02:47,  1.24s/it]

Epoch 115/250, Loss: 0.0745


Training Epochs:  46%|████▋     | 116/250 [02:07<02:38,  1.18s/it]

Epoch 116/250, Loss: 0.0745


Training Epochs:  47%|████▋     | 117/250 [02:08<02:31,  1.14s/it]

Epoch 117/250, Loss: 0.0739


Training Epochs:  47%|████▋     | 118/250 [02:09<02:27,  1.12s/it]

Epoch 118/250, Loss: 0.0741


Training Epochs:  48%|████▊     | 119/250 [02:10<02:23,  1.09s/it]

Epoch 119/250, Loss: 0.0744


Training Epochs:  48%|████▊     | 120/250 [02:11<02:19,  1.07s/it]

Epoch 120/250, Loss: 0.0747


Training Epochs:  48%|████▊     | 121/250 [02:12<02:18,  1.07s/it]

Epoch 121/250, Loss: 0.0738


Training Epochs:  49%|████▉     | 122/250 [02:14<02:20,  1.10s/it]

Epoch 122/250, Loss: 0.0739


Training Epochs:  49%|████▉     | 123/250 [02:15<02:16,  1.07s/it]

Epoch 123/250, Loss: 0.0738


Training Epochs:  50%|████▉     | 124/250 [02:16<02:22,  1.13s/it]

Epoch 124/250, Loss: 0.0741


Training Epochs:  50%|█████     | 125/250 [02:17<02:38,  1.27s/it]

Epoch 125/250, Loss: 0.0735


Training Epochs:  50%|█████     | 126/250 [02:19<02:35,  1.25s/it]

Epoch 126/250, Loss: 0.0738


Training Epochs:  51%|█████     | 127/250 [02:20<02:25,  1.19s/it]

Epoch 127/250, Loss: 0.0738


Training Epochs:  51%|█████     | 128/250 [02:21<02:18,  1.14s/it]

Epoch 128/250, Loss: 0.0736


Training Epochs:  52%|█████▏    | 129/250 [02:22<02:14,  1.11s/it]

Epoch 129/250, Loss: 0.0742


Training Epochs:  52%|█████▏    | 130/250 [02:23<02:12,  1.10s/it]

Epoch 130/250, Loss: 0.0752


Training Epochs:  52%|█████▏    | 131/250 [02:24<02:13,  1.12s/it]

Epoch 131/250, Loss: 0.0737


Training Epochs:  53%|█████▎    | 132/250 [02:25<02:14,  1.14s/it]

Epoch 132/250, Loss: 0.0737


Training Epochs:  53%|█████▎    | 133/250 [02:26<02:12,  1.13s/it]

Epoch 133/250, Loss: 0.0739


Training Epochs:  54%|█████▎    | 134/250 [02:31<04:03,  2.10s/it]

Epoch 134/250, Loss: 0.0741


Training Epochs:  54%|█████▍    | 135/250 [02:32<03:29,  1.82s/it]

Epoch 135/250, Loss: 0.0738


Training Epochs:  54%|█████▍    | 136/250 [02:33<03:01,  1.60s/it]

Epoch 136/250, Loss: 0.0740


Training Epochs:  55%|█████▍    | 137/250 [02:34<02:43,  1.45s/it]

Epoch 137/250, Loss: 0.0739


Training Epochs:  55%|█████▌    | 138/250 [02:35<02:29,  1.34s/it]

Epoch 138/250, Loss: 0.0743


Training Epochs:  56%|█████▌    | 139/250 [02:36<02:19,  1.26s/it]

Epoch 139/250, Loss: 0.0747


Training Epochs:  56%|█████▌    | 140/250 [02:37<02:12,  1.20s/it]

Epoch 140/250, Loss: 0.0737


Training Epochs:  56%|█████▋    | 141/250 [02:38<02:07,  1.17s/it]

Epoch 141/250, Loss: 0.0737


Training Epochs:  57%|█████▋    | 142/250 [02:39<02:02,  1.14s/it]

Epoch 142/250, Loss: 0.0738


Training Epochs:  57%|█████▋    | 143/250 [02:40<01:59,  1.12s/it]

Epoch 143/250, Loss: 0.0740


Training Epochs:  58%|█████▊    | 144/250 [02:42<02:04,  1.17s/it]

Epoch 144/250, Loss: 0.0740


Training Epochs:  58%|█████▊    | 145/250 [02:44<02:20,  1.34s/it]

Epoch 145/250, Loss: 0.0737


Training Epochs:  58%|█████▊    | 146/250 [02:45<02:27,  1.42s/it]

Epoch 146/250, Loss: 0.0738


Training Epochs:  59%|█████▉    | 147/250 [02:46<02:17,  1.33s/it]

Epoch 147/250, Loss: 0.0736


Training Epochs:  59%|█████▉    | 148/250 [02:47<02:08,  1.26s/it]

Epoch 148/250, Loss: 0.0741


Training Epochs:  60%|█████▉    | 149/250 [02:48<02:01,  1.20s/it]

Epoch 149/250, Loss: 0.0736


Training Epochs:  60%|██████    | 150/250 [02:50<01:56,  1.16s/it]

Epoch 150/250, Loss: 0.0735


Training Epochs:  60%|██████    | 151/250 [02:51<01:51,  1.13s/it]

Epoch 151/250, Loss: 0.0746


Training Epochs:  61%|██████    | 152/250 [02:52<01:49,  1.11s/it]

Epoch 152/250, Loss: 0.0737


Training Epochs:  61%|██████    | 153/250 [02:53<01:47,  1.11s/it]

Epoch 153/250, Loss: 0.0740


Training Epochs:  62%|██████▏   | 154/250 [02:54<01:46,  1.11s/it]

Epoch 154/250, Loss: 0.0739


Training Epochs:  62%|██████▏   | 155/250 [02:55<01:44,  1.11s/it]

Epoch 155/250, Loss: 0.0738


Training Epochs:  62%|██████▏   | 156/250 [02:56<01:54,  1.22s/it]

Epoch 156/250, Loss: 0.0740


Training Epochs:  63%|██████▎   | 157/250 [02:58<02:08,  1.38s/it]

Epoch 157/250, Loss: 0.0747


Training Epochs:  63%|██████▎   | 158/250 [02:59<02:00,  1.31s/it]

Epoch 158/250, Loss: 0.0738


Training Epochs:  64%|██████▎   | 159/250 [03:00<01:53,  1.24s/it]

Epoch 159/250, Loss: 0.0736


Training Epochs:  64%|██████▍   | 160/250 [03:01<01:47,  1.19s/it]

Epoch 160/250, Loss: 0.0736


Training Epochs:  64%|██████▍   | 161/250 [03:03<01:43,  1.16s/it]

Epoch 161/250, Loss: 0.0738


Training Epochs:  65%|██████▍   | 162/250 [03:04<01:40,  1.14s/it]

Epoch 162/250, Loss: 0.0737


Training Epochs:  65%|██████▌   | 163/250 [03:05<01:37,  1.13s/it]

Epoch 163/250, Loss: 0.0740


Training Epochs:  66%|██████▌   | 164/250 [03:06<01:35,  1.11s/it]

Epoch 164/250, Loss: 0.0744


Training Epochs:  66%|██████▌   | 165/250 [03:07<01:33,  1.11s/it]

Epoch 165/250, Loss: 0.0748


Training Epochs:  66%|██████▋   | 166/250 [03:08<01:33,  1.11s/it]

Epoch 166/250, Loss: 0.0752


Training Epochs:  67%|██████▋   | 167/250 [03:10<01:41,  1.23s/it]

Epoch 167/250, Loss: 0.0744


Training Epochs:  67%|██████▋   | 168/250 [03:11<01:53,  1.38s/it]

Epoch 168/250, Loss: 0.0746


Training Epochs:  68%|██████▊   | 169/250 [03:12<01:46,  1.32s/it]

Epoch 169/250, Loss: 0.0737


Training Epochs:  68%|██████▊   | 170/250 [03:14<01:40,  1.26s/it]

Epoch 170/250, Loss: 0.0736


Training Epochs:  68%|██████▊   | 171/250 [03:15<01:35,  1.21s/it]

Epoch 171/250, Loss: 0.0744


Training Epochs:  69%|██████▉   | 172/250 [03:16<01:31,  1.17s/it]

Epoch 172/250, Loss: 0.0739


Training Epochs:  69%|██████▉   | 173/250 [03:17<01:28,  1.15s/it]

Epoch 173/250, Loss: 0.0739


Training Epochs:  70%|██████▉   | 174/250 [03:18<01:26,  1.14s/it]

Epoch 174/250, Loss: 0.0742


Training Epochs:  70%|███████   | 175/250 [03:19<01:24,  1.13s/it]

Epoch 175/250, Loss: 0.0737


Training Epochs:  70%|███████   | 176/250 [03:20<01:23,  1.12s/it]

Epoch 176/250, Loss: 0.0739


Training Epochs:  71%|███████   | 177/250 [03:21<01:21,  1.11s/it]

Epoch 177/250, Loss: 0.0739


Training Epochs:  71%|███████   | 178/250 [03:23<01:31,  1.27s/it]

Epoch 178/250, Loss: 0.0738


Training Epochs:  72%|███████▏  | 179/250 [03:25<01:39,  1.40s/it]

Epoch 179/250, Loss: 0.0737


Training Epochs:  72%|███████▏  | 180/250 [03:26<01:32,  1.32s/it]

Epoch 180/250, Loss: 0.0738


Training Epochs:  72%|███████▏  | 181/250 [03:27<01:26,  1.25s/it]

Epoch 181/250, Loss: 0.0737


Training Epochs:  73%|███████▎  | 182/250 [03:28<01:21,  1.20s/it]

Epoch 182/250, Loss: 0.0738


Training Epochs:  73%|███████▎  | 183/250 [03:29<01:17,  1.16s/it]

Epoch 183/250, Loss: 0.0737


Training Epochs:  74%|███████▎  | 184/250 [03:30<01:14,  1.13s/it]

Epoch 184/250, Loss: 0.0741


Training Epochs:  74%|███████▍  | 185/250 [03:31<01:12,  1.12s/it]

Epoch 185/250, Loss: 0.0737


Training Epochs:  74%|███████▍  | 186/250 [03:32<01:11,  1.11s/it]

Epoch 186/250, Loss: 0.0738


Training Epochs:  75%|███████▍  | 187/250 [03:33<01:10,  1.11s/it]

Epoch 187/250, Loss: 0.0746


Training Epochs:  75%|███████▌  | 188/250 [03:34<01:08,  1.10s/it]

Epoch 188/250, Loss: 0.0747


Training Epochs:  76%|███████▌  | 189/250 [03:36<01:13,  1.21s/it]

Epoch 189/250, Loss: 0.0737


Training Epochs:  76%|███████▌  | 190/250 [03:38<01:21,  1.36s/it]

Epoch 190/250, Loss: 0.0746


Training Epochs:  76%|███████▋  | 191/250 [03:39<01:17,  1.32s/it]

Epoch 191/250, Loss: 0.0737


Training Epochs:  77%|███████▋  | 192/250 [03:40<01:12,  1.25s/it]

Epoch 192/250, Loss: 0.0740


Training Epochs:  77%|███████▋  | 193/250 [03:41<01:08,  1.21s/it]

Epoch 193/250, Loss: 0.0746


Training Epochs:  78%|███████▊  | 194/250 [03:42<01:05,  1.17s/it]

Epoch 194/250, Loss: 0.0743


Training Epochs:  78%|███████▊  | 195/250 [03:43<01:03,  1.15s/it]

Epoch 195/250, Loss: 0.0738


Training Epochs:  78%|███████▊  | 196/250 [03:45<01:04,  1.20s/it]

Epoch 196/250, Loss: 0.0736


Training Epochs:  79%|███████▉  | 197/250 [03:46<01:01,  1.16s/it]

Epoch 197/250, Loss: 0.0745


Training Epochs:  79%|███████▉  | 198/250 [03:47<00:59,  1.14s/it]

Epoch 198/250, Loss: 0.0738


Training Epochs:  80%|███████▉  | 199/250 [03:48<00:57,  1.13s/it]

Epoch 199/250, Loss: 0.0738


Training Epochs:  80%|████████  | 200/250 [03:49<01:03,  1.27s/it]

Epoch 200/250, Loss: 0.0741


Training Epochs:  80%|████████  | 201/250 [03:51<01:08,  1.39s/it]

Epoch 201/250, Loss: 0.0757


Training Epochs:  81%|████████  | 202/250 [03:52<01:04,  1.34s/it]

Epoch 202/250, Loss: 0.0744


Training Epochs:  81%|████████  | 203/250 [03:53<00:59,  1.27s/it]

Epoch 203/250, Loss: 0.0736


Training Epochs:  82%|████████▏ | 204/250 [03:55<00:56,  1.22s/it]

Epoch 204/250, Loss: 0.0738


Training Epochs:  82%|████████▏ | 205/250 [03:56<00:53,  1.19s/it]

Epoch 205/250, Loss: 0.0737


Training Epochs:  82%|████████▏ | 206/250 [03:57<00:51,  1.16s/it]

Epoch 206/250, Loss: 0.0738


Training Epochs:  83%|████████▎ | 207/250 [03:58<00:48,  1.14s/it]

Epoch 207/250, Loss: 0.0738


Training Epochs:  83%|████████▎ | 208/250 [03:59<00:47,  1.13s/it]

Epoch 208/250, Loss: 0.0739


Training Epochs:  84%|████████▎ | 209/250 [04:00<00:45,  1.12s/it]

Epoch 209/250, Loss: 0.0738


Training Epochs:  84%|████████▍ | 210/250 [04:01<00:44,  1.12s/it]

Epoch 210/250, Loss: 0.0738


Training Epochs:  84%|████████▍ | 211/250 [04:03<00:48,  1.25s/it]

Epoch 211/250, Loss: 0.0736


Training Epochs:  85%|████████▍ | 212/250 [04:04<00:52,  1.39s/it]

Epoch 212/250, Loss: 0.0738


Training Epochs:  85%|████████▌ | 213/250 [04:06<00:49,  1.34s/it]

Epoch 213/250, Loss: 0.0743


Training Epochs:  86%|████████▌ | 214/250 [04:07<00:46,  1.28s/it]

Epoch 214/250, Loss: 0.0737


Training Epochs:  86%|████████▌ | 215/250 [04:08<00:43,  1.24s/it]

Epoch 215/250, Loss: 0.0735


Training Epochs:  86%|████████▋ | 216/250 [04:09<00:40,  1.20s/it]

Epoch 216/250, Loss: 0.0739


Training Epochs:  87%|████████▋ | 217/250 [04:10<00:38,  1.18s/it]

Epoch 217/250, Loss: 0.0739


Training Epochs:  87%|████████▋ | 218/250 [04:11<00:37,  1.16s/it]

Epoch 218/250, Loss: 0.0737


Training Epochs:  88%|████████▊ | 219/250 [04:12<00:35,  1.15s/it]

Epoch 219/250, Loss: 0.0745


Training Epochs:  88%|████████▊ | 220/250 [04:14<00:34,  1.15s/it]

Epoch 220/250, Loss: 0.0740


Training Epochs:  88%|████████▊ | 221/250 [04:15<00:34,  1.18s/it]

Epoch 221/250, Loss: 0.0739


Training Epochs:  89%|████████▉ | 222/250 [04:17<00:38,  1.37s/it]

Epoch 222/250, Loss: 0.0748


Training Epochs:  89%|████████▉ | 223/250 [04:18<00:39,  1.48s/it]

Epoch 223/250, Loss: 0.0741


Training Epochs:  90%|████████▉ | 224/250 [04:20<00:36,  1.40s/it]

Epoch 224/250, Loss: 0.0737


Training Epochs:  90%|█████████ | 225/250 [04:21<00:33,  1.32s/it]

Epoch 225/250, Loss: 0.0745


Training Epochs:  90%|█████████ | 226/250 [04:22<00:30,  1.26s/it]

Epoch 226/250, Loss: 0.0737


Training Epochs:  91%|█████████ | 227/250 [04:23<00:28,  1.22s/it]

Epoch 227/250, Loss: 0.0745


Training Epochs:  91%|█████████ | 228/250 [04:24<00:26,  1.20s/it]

Epoch 228/250, Loss: 0.0738


Training Epochs:  92%|█████████▏| 229/250 [04:25<00:24,  1.19s/it]

Epoch 229/250, Loss: 0.0738


Training Epochs:  92%|█████████▏| 230/250 [04:26<00:23,  1.18s/it]

Epoch 230/250, Loss: 0.0747


Training Epochs:  92%|█████████▏| 231/250 [04:28<00:22,  1.16s/it]

Epoch 231/250, Loss: 0.0736


Training Epochs:  93%|█████████▎| 232/250 [04:29<00:21,  1.21s/it]

Epoch 232/250, Loss: 0.0744


Training Epochs:  93%|█████████▎| 233/250 [04:31<00:24,  1.41s/it]

Epoch 233/250, Loss: 0.0742


Training Epochs:  94%|█████████▎| 234/250 [04:32<00:22,  1.39s/it]

Epoch 234/250, Loss: 0.0738


Training Epochs:  94%|█████████▍| 235/250 [04:33<00:19,  1.32s/it]

Epoch 235/250, Loss: 0.0738


Training Epochs:  94%|█████████▍| 236/250 [04:34<00:17,  1.26s/it]

Epoch 236/250, Loss: 0.0744


Training Epochs:  95%|█████████▍| 237/250 [04:35<00:15,  1.23s/it]

Epoch 237/250, Loss: 0.0738


Training Epochs:  95%|█████████▌| 238/250 [04:37<00:14,  1.20s/it]

Epoch 238/250, Loss: 0.0739


Training Epochs:  96%|█████████▌| 239/250 [04:38<00:13,  1.19s/it]

Epoch 239/250, Loss: 0.0740


Training Epochs:  96%|█████████▌| 240/250 [04:39<00:11,  1.19s/it]

Epoch 240/250, Loss: 0.0740


Training Epochs:  96%|█████████▋| 241/250 [04:40<00:10,  1.18s/it]

Epoch 241/250, Loss: 0.0739


Training Epochs:  97%|█████████▋| 242/250 [04:41<00:09,  1.18s/it]

Epoch 242/250, Loss: 0.0762


Training Epochs:  97%|█████████▋| 243/250 [04:43<00:09,  1.37s/it]

Epoch 243/250, Loss: 0.0749


Training Epochs:  98%|█████████▊| 244/250 [04:45<00:09,  1.52s/it]

Epoch 244/250, Loss: 0.0737


Training Epochs:  98%|█████████▊| 245/250 [04:46<00:07,  1.44s/it]

Epoch 245/250, Loss: 0.0739


Training Epochs:  98%|█████████▊| 246/250 [04:47<00:05,  1.36s/it]

Epoch 246/250, Loss: 0.0740


Training Epochs:  99%|█████████▉| 247/250 [04:49<00:03,  1.30s/it]

Epoch 247/250, Loss: 0.0738


Training Epochs:  99%|█████████▉| 248/250 [04:50<00:02,  1.27s/it]

Epoch 248/250, Loss: 0.0743


Training Epochs: 100%|█████████▉| 249/250 [04:51<00:01,  1.24s/it]

Epoch 249/250, Loss: 0.0745


Training Epochs: 100%|██████████| 250/250 [04:52<00:00,  1.17s/it]

Epoch 250/250, Loss: 0.0736





Test Accuracy: 94.14%

Experimenting with Epochs: 350


Training Epochs:   0%|          | 1/350 [00:01<06:29,  1.12s/it]

Epoch 1/350, Loss: 0.2544


Training Epochs:   1%|          | 2/350 [00:02<06:43,  1.16s/it]

Epoch 2/350, Loss: 0.1887


Training Epochs:   1%|          | 3/350 [00:04<08:26,  1.46s/it]

Epoch 3/350, Loss: 0.1755


Training Epochs:   1%|          | 4/350 [00:06<09:28,  1.64s/it]

Epoch 4/350, Loss: 0.1678


Training Epochs:   1%|▏         | 5/350 [00:07<08:32,  1.49s/it]

Epoch 5/350, Loss: 0.1528


Training Epochs:   2%|▏         | 6/350 [00:08<07:49,  1.37s/it]

Epoch 6/350, Loss: 0.1378


Training Epochs:   2%|▏         | 7/350 [00:09<07:24,  1.30s/it]

Epoch 7/350, Loss: 0.1303


Training Epochs:   2%|▏         | 8/350 [00:10<07:10,  1.26s/it]

Epoch 8/350, Loss: 0.1181


Training Epochs:   3%|▎         | 9/350 [00:11<06:59,  1.23s/it]

Epoch 9/350, Loss: 0.1141


Training Epochs:   3%|▎         | 10/350 [00:13<06:49,  1.20s/it]

Epoch 10/350, Loss: 0.1054


Training Epochs:   3%|▎         | 11/350 [00:14<06:44,  1.19s/it]

Epoch 11/350, Loss: 0.0858


Training Epochs:   3%|▎         | 12/350 [00:15<06:37,  1.18s/it]

Epoch 12/350, Loss: 0.0854


Training Epochs:   4%|▎         | 13/350 [00:16<06:50,  1.22s/it]

Epoch 13/350, Loss: 0.0819


Training Epochs:   4%|▍         | 14/350 [00:18<07:49,  1.40s/it]

Epoch 14/350, Loss: 0.0806


Training Epochs:   4%|▍         | 15/350 [00:19<07:44,  1.39s/it]

Epoch 15/350, Loss: 0.0790


Training Epochs:   5%|▍         | 16/350 [00:20<07:18,  1.31s/it]

Epoch 16/350, Loss: 0.0796


Training Epochs:   5%|▍         | 17/350 [00:22<07:15,  1.31s/it]

Epoch 17/350, Loss: 0.0777


Training Epochs:   5%|▌         | 18/350 [00:23<06:56,  1.26s/it]

Epoch 18/350, Loss: 0.0763


Training Epochs:   5%|▌         | 19/350 [00:24<06:46,  1.23s/it]

Epoch 19/350, Loss: 0.0754


Training Epochs:   6%|▌         | 20/350 [00:25<06:35,  1.20s/it]

Epoch 20/350, Loss: 0.0746


Training Epochs:   6%|▌         | 21/350 [00:26<06:30,  1.19s/it]

Epoch 21/350, Loss: 0.0734


Training Epochs:   6%|▋         | 22/350 [00:27<06:24,  1.17s/it]

Epoch 22/350, Loss: 0.0731


Training Epochs:   7%|▋         | 23/350 [00:29<06:24,  1.18s/it]

Epoch 23/350, Loss: 0.0732


Training Epochs:   7%|▋         | 24/350 [00:30<07:16,  1.34s/it]

Epoch 24/350, Loss: 0.0724


Training Epochs:   7%|▋         | 25/350 [00:32<08:00,  1.48s/it]

Epoch 25/350, Loss: 0.0723


Training Epochs:   7%|▋         | 26/350 [00:33<07:35,  1.40s/it]

Epoch 26/350, Loss: 0.0726


Training Epochs:   8%|▊         | 27/350 [00:35<07:11,  1.34s/it]

Epoch 27/350, Loss: 0.0735


Training Epochs:   8%|▊         | 28/350 [00:36<06:51,  1.28s/it]

Epoch 28/350, Loss: 0.0726


Training Epochs:   8%|▊         | 29/350 [00:37<06:37,  1.24s/it]

Epoch 29/350, Loss: 0.0720


Training Epochs:   9%|▊         | 30/350 [00:38<06:30,  1.22s/it]

Epoch 30/350, Loss: 0.0723


Training Epochs:   9%|▉         | 31/350 [00:39<06:23,  1.20s/it]

Epoch 31/350, Loss: 0.0719


Training Epochs:   9%|▉         | 32/350 [00:40<06:16,  1.19s/it]

Epoch 32/350, Loss: 0.0729


Training Epochs:   9%|▉         | 33/350 [00:42<06:14,  1.18s/it]

Epoch 33/350, Loss: 0.0717


Training Epochs:  10%|▉         | 34/350 [00:43<06:26,  1.22s/it]

Epoch 34/350, Loss: 0.0717


Training Epochs:  10%|█         | 35/350 [00:45<07:21,  1.40s/it]

Epoch 35/350, Loss: 0.0717


Training Epochs:  10%|█         | 36/350 [00:46<07:15,  1.39s/it]

Epoch 36/350, Loss: 0.0724


Training Epochs:  11%|█         | 37/350 [00:47<06:55,  1.33s/it]

Epoch 37/350, Loss: 0.0720


Training Epochs:  11%|█         | 38/350 [00:48<06:37,  1.27s/it]

Epoch 38/350, Loss: 0.0717


Training Epochs:  11%|█         | 39/350 [00:50<06:26,  1.24s/it]

Epoch 39/350, Loss: 0.0738


Training Epochs:  11%|█▏        | 40/350 [00:51<06:16,  1.22s/it]

Epoch 40/350, Loss: 0.0718


Training Epochs:  12%|█▏        | 41/350 [00:52<06:16,  1.22s/it]

Epoch 41/350, Loss: 0.0717


Training Epochs:  12%|█▏        | 42/350 [00:53<06:08,  1.20s/it]

Epoch 42/350, Loss: 0.0716


Training Epochs:  12%|█▏        | 43/350 [00:54<06:04,  1.19s/it]

Epoch 43/350, Loss: 0.0720


Training Epochs:  13%|█▎        | 44/350 [00:56<06:11,  1.21s/it]

Epoch 44/350, Loss: 0.0716


Training Epochs:  13%|█▎        | 45/350 [00:57<07:08,  1.40s/it]

Epoch 45/350, Loss: 0.0723


Training Epochs:  13%|█▎        | 46/350 [00:59<08:01,  1.58s/it]

Epoch 46/350, Loss: 0.0718


Training Epochs:  13%|█▎        | 47/350 [01:01<07:22,  1.46s/it]

Epoch 47/350, Loss: 0.0717


Training Epochs:  14%|█▎        | 48/350 [01:02<06:55,  1.37s/it]

Epoch 48/350, Loss: 0.0717


Training Epochs:  14%|█▍        | 49/350 [01:03<06:36,  1.32s/it]

Epoch 49/350, Loss: 0.0727


Training Epochs:  14%|█▍        | 50/350 [01:04<06:23,  1.28s/it]

Epoch 50/350, Loss: 0.0717


Training Epochs:  15%|█▍        | 51/350 [01:05<06:10,  1.24s/it]

Epoch 51/350, Loss: 0.0718


Training Epochs:  15%|█▍        | 52/350 [01:06<06:03,  1.22s/it]

Epoch 52/350, Loss: 0.0717


Training Epochs:  15%|█▌        | 53/350 [01:08<05:58,  1.21s/it]

Epoch 53/350, Loss: 0.0719


Training Epochs:  15%|█▌        | 54/350 [01:09<05:55,  1.20s/it]

Epoch 54/350, Loss: 0.0722


Training Epochs:  16%|█▌        | 55/350 [01:10<06:23,  1.30s/it]

Epoch 55/350, Loss: 0.0716


Training Epochs:  16%|█▌        | 56/350 [01:12<07:12,  1.47s/it]

Epoch 56/350, Loss: 0.0719


Training Epochs:  16%|█▋        | 57/350 [01:14<06:59,  1.43s/it]

Epoch 57/350, Loss: 0.0716


Training Epochs:  17%|█▋        | 58/350 [01:15<06:36,  1.36s/it]

Epoch 58/350, Loss: 0.0718


Training Epochs:  17%|█▋        | 59/350 [01:16<06:18,  1.30s/it]

Epoch 59/350, Loss: 0.0722


Training Epochs:  17%|█▋        | 60/350 [01:17<06:05,  1.26s/it]

Epoch 60/350, Loss: 0.0718


Training Epochs:  17%|█▋        | 61/350 [01:18<05:56,  1.23s/it]

Epoch 61/350, Loss: 0.0719


Training Epochs:  18%|█▊        | 62/350 [01:19<05:52,  1.22s/it]

Epoch 62/350, Loss: 0.0716


Training Epochs:  18%|█▊        | 63/350 [01:21<05:49,  1.22s/it]

Epoch 63/350, Loss: 0.0727


Training Epochs:  18%|█▊        | 64/350 [01:22<05:46,  1.21s/it]

Epoch 64/350, Loss: 0.0737


Training Epochs:  19%|█▊        | 65/350 [01:23<06:15,  1.32s/it]

Epoch 65/350, Loss: 0.0723


Training Epochs:  19%|█▉        | 66/350 [01:25<06:57,  1.47s/it]

Epoch 66/350, Loss: 0.0717


Training Epochs:  19%|█▉        | 67/350 [01:27<06:46,  1.44s/it]

Epoch 67/350, Loss: 0.0717


Training Epochs:  19%|█▉        | 68/350 [01:28<06:22,  1.36s/it]

Epoch 68/350, Loss: 0.0718


Training Epochs:  20%|█▉        | 69/350 [01:29<06:06,  1.31s/it]

Epoch 69/350, Loss: 0.0717


Training Epochs:  20%|██        | 70/350 [01:30<05:54,  1.27s/it]

Epoch 70/350, Loss: 0.0726


Training Epochs:  20%|██        | 71/350 [01:31<05:47,  1.24s/it]

Epoch 71/350, Loss: 0.0726


Training Epochs:  21%|██        | 72/350 [01:32<05:42,  1.23s/it]

Epoch 72/350, Loss: 0.0725


Training Epochs:  21%|██        | 73/350 [01:34<05:39,  1.23s/it]

Epoch 73/350, Loss: 0.0725


Training Epochs:  21%|██        | 74/350 [01:35<05:35,  1.22s/it]

Epoch 74/350, Loss: 0.0737


Training Epochs:  21%|██▏       | 75/350 [01:36<05:53,  1.29s/it]

Epoch 75/350, Loss: 0.0720


Training Epochs:  22%|██▏       | 76/350 [01:38<06:39,  1.46s/it]

Epoch 76/350, Loss: 0.0722


Training Epochs:  22%|██▏       | 77/350 [01:40<06:28,  1.42s/it]

Epoch 77/350, Loss: 0.0717


Training Epochs:  22%|██▏       | 78/350 [01:41<06:07,  1.35s/it]

Epoch 78/350, Loss: 0.0718


Training Epochs:  23%|██▎       | 79/350 [01:42<05:52,  1.30s/it]

Epoch 79/350, Loss: 0.0718


Training Epochs:  23%|██▎       | 80/350 [01:43<05:43,  1.27s/it]

Epoch 80/350, Loss: 0.0717


Training Epochs:  23%|██▎       | 81/350 [01:44<05:37,  1.25s/it]

Epoch 81/350, Loss: 0.0720


Training Epochs:  23%|██▎       | 82/350 [01:46<05:31,  1.24s/it]

Epoch 82/350, Loss: 0.0728


Training Epochs:  24%|██▎       | 83/350 [01:47<05:25,  1.22s/it]

Epoch 83/350, Loss: 0.0717


Training Epochs:  24%|██▍       | 84/350 [01:48<05:22,  1.21s/it]

Epoch 84/350, Loss: 0.0718


Training Epochs:  24%|██▍       | 85/350 [01:49<05:41,  1.29s/it]

Epoch 85/350, Loss: 0.0716


Training Epochs:  25%|██▍       | 86/350 [01:51<06:27,  1.47s/it]

Epoch 86/350, Loss: 0.0720


Training Epochs:  25%|██▍       | 87/350 [01:53<06:28,  1.48s/it]

Epoch 87/350, Loss: 0.0715


Training Epochs:  25%|██▌       | 88/350 [01:54<06:05,  1.39s/it]

Epoch 88/350, Loss: 0.0718


Training Epochs:  25%|██▌       | 89/350 [01:55<05:50,  1.34s/it]

Epoch 89/350, Loss: 0.0718


Training Epochs:  26%|██▌       | 90/350 [01:56<05:37,  1.30s/it]

Epoch 90/350, Loss: 0.0717


Training Epochs:  26%|██▌       | 91/350 [01:58<05:27,  1.27s/it]

Epoch 91/350, Loss: 0.0716


Training Epochs:  26%|██▋       | 92/350 [01:59<05:22,  1.25s/it]

Epoch 92/350, Loss: 0.0724


Training Epochs:  27%|██▋       | 93/350 [02:00<05:21,  1.25s/it]

Epoch 93/350, Loss: 0.0717


Training Epochs:  27%|██▋       | 94/350 [02:01<05:17,  1.24s/it]

Epoch 94/350, Loss: 0.0724


Training Epochs:  27%|██▋       | 95/350 [02:03<05:39,  1.33s/it]

Epoch 95/350, Loss: 0.0719


Training Epochs:  27%|██▋       | 96/350 [02:05<06:20,  1.50s/it]

Epoch 96/350, Loss: 0.0721


Training Epochs:  28%|██▊       | 97/350 [02:06<06:07,  1.45s/it]

Epoch 97/350, Loss: 0.0717


Training Epochs:  28%|██▊       | 98/350 [02:07<05:48,  1.38s/it]

Epoch 98/350, Loss: 0.0717


Training Epochs:  28%|██▊       | 99/350 [02:08<05:34,  1.33s/it]

Epoch 99/350, Loss: 0.0716


Training Epochs:  29%|██▊       | 100/350 [02:10<05:23,  1.30s/it]

Epoch 100/350, Loss: 0.0716


Training Epochs:  29%|██▉       | 101/350 [02:11<05:16,  1.27s/it]

Epoch 101/350, Loss: 0.0724


Training Epochs:  29%|██▉       | 102/350 [02:12<05:11,  1.26s/it]

Epoch 102/350, Loss: 0.0716


Training Epochs:  29%|██▉       | 103/350 [02:13<05:07,  1.25s/it]

Epoch 103/350, Loss: 0.0720


Training Epochs:  30%|██▉       | 104/350 [02:15<05:05,  1.24s/it]

Epoch 104/350, Loss: 0.0717


Training Epochs:  30%|███       | 105/350 [02:16<05:29,  1.35s/it]

Epoch 105/350, Loss: 0.0718


Training Epochs:  30%|███       | 106/350 [02:18<06:12,  1.53s/it]

Epoch 106/350, Loss: 0.0716


Training Epochs:  31%|███       | 107/350 [02:19<05:59,  1.48s/it]

Epoch 107/350, Loss: 0.0725


Training Epochs:  31%|███       | 108/350 [02:21<05:41,  1.41s/it]

Epoch 108/350, Loss: 0.0719


Training Epochs:  31%|███       | 109/350 [02:22<05:25,  1.35s/it]

Epoch 109/350, Loss: 0.0715


Training Epochs:  31%|███▏      | 110/350 [02:23<05:33,  1.39s/it]

Epoch 110/350, Loss: 0.0716


Training Epochs:  32%|███▏      | 111/350 [02:25<05:22,  1.35s/it]

Epoch 111/350, Loss: 0.0719


Training Epochs:  32%|███▏      | 112/350 [02:26<05:13,  1.32s/it]

Epoch 112/350, Loss: 0.0716


Training Epochs:  32%|███▏      | 113/350 [02:27<05:06,  1.29s/it]

Epoch 113/350, Loss: 0.0720


Training Epochs:  33%|███▎      | 114/350 [02:28<05:02,  1.28s/it]

Epoch 114/350, Loss: 0.0715


Training Epochs:  33%|███▎      | 115/350 [02:30<05:46,  1.47s/it]

Epoch 115/350, Loss: 0.0728


Training Epochs:  33%|███▎      | 116/350 [02:32<06:16,  1.61s/it]

Epoch 116/350, Loss: 0.0717


Training Epochs:  33%|███▎      | 117/350 [02:34<05:52,  1.51s/it]

Epoch 117/350, Loss: 0.0715


Training Epochs:  34%|███▎      | 118/350 [02:35<05:32,  1.43s/it]

Epoch 118/350, Loss: 0.0721


Training Epochs:  34%|███▍      | 119/350 [02:36<05:17,  1.38s/it]

Epoch 119/350, Loss: 0.0719


Training Epochs:  34%|███▍      | 120/350 [02:37<05:07,  1.34s/it]

Epoch 120/350, Loss: 0.0717


Training Epochs:  35%|███▍      | 121/350 [02:38<04:59,  1.31s/it]

Epoch 121/350, Loss: 0.0720


Training Epochs:  35%|███▍      | 122/350 [02:40<04:53,  1.29s/it]

Epoch 122/350, Loss: 0.0725


Training Epochs:  35%|███▌      | 123/350 [02:41<04:47,  1.27s/it]

Epoch 123/350, Loss: 0.0723


Training Epochs:  35%|███▌      | 124/350 [02:42<04:48,  1.28s/it]

Epoch 124/350, Loss: 0.0720


Training Epochs:  36%|███▌      | 125/350 [02:44<05:30,  1.47s/it]

Epoch 125/350, Loss: 0.0720


Training Epochs:  36%|███▌      | 126/350 [02:46<06:01,  1.61s/it]

Epoch 126/350, Loss: 0.0717


Training Epochs:  36%|███▋      | 127/350 [02:47<05:40,  1.53s/it]

Epoch 127/350, Loss: 0.0728


Training Epochs:  37%|███▋      | 128/350 [02:49<05:20,  1.44s/it]

Epoch 128/350, Loss: 0.0726


Training Epochs:  37%|███▋      | 129/350 [02:50<05:07,  1.39s/it]

Epoch 129/350, Loss: 0.0716


Training Epochs:  37%|███▋      | 130/350 [02:51<05:00,  1.37s/it]

Epoch 130/350, Loss: 0.0718


Training Epochs:  37%|███▋      | 131/350 [02:53<04:51,  1.33s/it]

Epoch 131/350, Loss: 0.0716


Training Epochs:  38%|███▊      | 132/350 [02:54<05:00,  1.38s/it]

Epoch 132/350, Loss: 0.0726


Training Epochs:  38%|███▊      | 133/350 [02:55<04:51,  1.35s/it]

Epoch 133/350, Loss: 0.0720


Training Epochs:  38%|███▊      | 134/350 [02:57<04:59,  1.39s/it]

Epoch 134/350, Loss: 0.0718


Training Epochs:  39%|███▊      | 135/350 [02:59<05:34,  1.56s/it]

Epoch 135/350, Loss: 0.0719


Training Epochs:  39%|███▉      | 136/350 [03:00<05:25,  1.52s/it]

Epoch 136/350, Loss: 0.0718


Training Epochs:  39%|███▉      | 137/350 [03:01<05:09,  1.45s/it]

Epoch 137/350, Loss: 0.0726


Training Epochs:  39%|███▉      | 138/350 [03:03<04:56,  1.40s/it]

Epoch 138/350, Loss: 0.0724


Training Epochs:  40%|███▉      | 139/350 [03:04<04:47,  1.36s/it]

Epoch 139/350, Loss: 0.0716


Training Epochs:  40%|████      | 140/350 [03:05<04:40,  1.34s/it]

Epoch 140/350, Loss: 0.0726


Training Epochs:  40%|████      | 141/350 [03:07<04:36,  1.32s/it]

Epoch 141/350, Loss: 0.0724


Training Epochs:  41%|████      | 142/350 [03:08<04:31,  1.30s/it]

Epoch 142/350, Loss: 0.0717


Training Epochs:  41%|████      | 143/350 [03:09<04:28,  1.30s/it]

Epoch 143/350, Loss: 0.0718


Training Epochs:  41%|████      | 144/350 [03:11<05:02,  1.47s/it]

Epoch 144/350, Loss: 0.0722


Training Epochs:  41%|████▏     | 145/350 [03:13<05:32,  1.62s/it]

Epoch 145/350, Loss: 0.0716


Training Epochs:  42%|████▏     | 146/350 [03:14<05:13,  1.54s/it]

Epoch 146/350, Loss: 0.0720


Training Epochs:  42%|████▏     | 147/350 [03:16<04:56,  1.46s/it]

Epoch 147/350, Loss: 0.0722


Training Epochs:  42%|████▏     | 148/350 [03:17<04:42,  1.40s/it]

Epoch 148/350, Loss: 0.0716


Training Epochs:  43%|████▎     | 149/350 [03:18<04:30,  1.35s/it]

Epoch 149/350, Loss: 0.0716


Training Epochs:  43%|████▎     | 150/350 [03:19<04:24,  1.32s/it]

Epoch 150/350, Loss: 0.0717


Training Epochs:  43%|████▎     | 151/350 [03:21<04:20,  1.31s/it]

Epoch 151/350, Loss: 0.0717


Training Epochs:  43%|████▎     | 152/350 [03:22<04:15,  1.29s/it]

Epoch 152/350, Loss: 0.0717


Training Epochs:  44%|████▎     | 153/350 [03:23<04:14,  1.29s/it]

Epoch 153/350, Loss: 0.0717


Training Epochs:  44%|████▍     | 154/350 [03:25<04:56,  1.51s/it]

Epoch 154/350, Loss: 0.0720


Training Epochs:  44%|████▍     | 155/350 [03:27<05:25,  1.67s/it]

Epoch 155/350, Loss: 0.0723


Training Epochs:  45%|████▍     | 156/350 [03:28<05:01,  1.56s/it]

Epoch 156/350, Loss: 0.0720


Training Epochs:  45%|████▍     | 157/350 [03:30<04:43,  1.47s/it]

Epoch 157/350, Loss: 0.0719


Training Epochs:  45%|████▌     | 158/350 [03:31<04:29,  1.41s/it]

Epoch 158/350, Loss: 0.0724


Training Epochs:  45%|████▌     | 159/350 [03:32<04:21,  1.37s/it]

Epoch 159/350, Loss: 0.0716


Training Epochs:  46%|████▌     | 160/350 [03:34<04:14,  1.34s/it]

Epoch 160/350, Loss: 0.0728


Training Epochs:  46%|████▌     | 161/350 [03:35<04:08,  1.32s/it]

Epoch 161/350, Loss: 0.0718


Training Epochs:  46%|████▋     | 162/350 [03:36<04:02,  1.29s/it]

Epoch 162/350, Loss: 0.0715


Training Epochs:  47%|████▋     | 163/350 [03:37<04:01,  1.29s/it]

Epoch 163/350, Loss: 0.0717


Training Epochs:  47%|████▋     | 164/350 [03:39<04:37,  1.49s/it]

Epoch 164/350, Loss: 0.0733


Training Epochs:  47%|████▋     | 165/350 [03:41<05:03,  1.64s/it]

Epoch 165/350, Loss: 0.0718


Training Epochs:  47%|████▋     | 166/350 [03:43<04:46,  1.56s/it]

Epoch 166/350, Loss: 0.0716


Training Epochs:  48%|████▊     | 167/350 [03:44<04:32,  1.49s/it]

Epoch 167/350, Loss: 0.0719


Training Epochs:  48%|████▊     | 168/350 [03:45<04:19,  1.43s/it]

Epoch 168/350, Loss: 0.0717


Training Epochs:  48%|████▊     | 169/350 [03:47<04:09,  1.38s/it]

Epoch 169/350, Loss: 0.0738


Training Epochs:  49%|████▊     | 170/350 [03:48<04:02,  1.35s/it]

Epoch 170/350, Loss: 0.0723


Training Epochs:  49%|████▉     | 171/350 [03:49<03:57,  1.33s/it]

Epoch 171/350, Loss: 0.0717


Training Epochs:  49%|████▉     | 172/350 [03:50<03:52,  1.31s/it]

Epoch 172/350, Loss: 0.0727


Training Epochs:  49%|████▉     | 173/350 [03:52<04:00,  1.36s/it]

Epoch 173/350, Loss: 0.0720


Training Epochs:  50%|████▉     | 174/350 [03:54<04:33,  1.55s/it]

Epoch 174/350, Loss: 0.0716


Training Epochs:  50%|█████     | 175/350 [03:55<04:27,  1.53s/it]

Epoch 175/350, Loss: 0.0737


Training Epochs:  50%|█████     | 176/350 [03:57<04:16,  1.48s/it]

Epoch 176/350, Loss: 0.0721


Training Epochs:  51%|█████     | 177/350 [03:58<04:05,  1.42s/it]

Epoch 177/350, Loss: 0.0718


Training Epochs:  51%|█████     | 178/350 [03:59<03:58,  1.39s/it]

Epoch 178/350, Loss: 0.0716


Training Epochs:  51%|█████     | 179/350 [04:01<03:51,  1.35s/it]

Epoch 179/350, Loss: 0.0716


Training Epochs:  51%|█████▏    | 180/350 [04:02<03:46,  1.33s/it]

Epoch 180/350, Loss: 0.0732


Training Epochs:  52%|█████▏    | 181/350 [04:03<03:42,  1.32s/it]

Epoch 181/350, Loss: 0.0719


Training Epochs:  52%|█████▏    | 182/350 [04:04<03:42,  1.32s/it]

Epoch 182/350, Loss: 0.0718


Training Epochs:  52%|█████▏    | 183/350 [04:06<04:13,  1.52s/it]

Epoch 183/350, Loss: 0.0720


Training Epochs:  53%|█████▎    | 184/350 [04:08<04:37,  1.67s/it]

Epoch 184/350, Loss: 0.0717


Training Epochs:  53%|█████▎    | 185/350 [04:10<04:17,  1.56s/it]

Epoch 185/350, Loss: 0.0718


Training Epochs:  53%|█████▎    | 186/350 [04:11<04:02,  1.48s/it]

Epoch 186/350, Loss: 0.0721


Training Epochs:  53%|█████▎    | 187/350 [04:12<03:52,  1.43s/it]

Epoch 187/350, Loss: 0.0734


Training Epochs:  54%|█████▎    | 188/350 [04:14<03:47,  1.40s/it]

Epoch 188/350, Loss: 0.0719


Training Epochs:  54%|█████▍    | 189/350 [04:15<03:42,  1.38s/it]

Epoch 189/350, Loss: 0.0719


Training Epochs:  54%|█████▍    | 190/350 [04:16<03:39,  1.37s/it]

Epoch 190/350, Loss: 0.0718


Training Epochs:  55%|█████▍    | 191/350 [04:18<03:35,  1.36s/it]

Epoch 191/350, Loss: 0.0717


Training Epochs:  55%|█████▍    | 192/350 [04:19<03:39,  1.39s/it]

Epoch 192/350, Loss: 0.0717


Training Epochs:  55%|█████▌    | 193/350 [04:21<04:11,  1.60s/it]

Epoch 193/350, Loss: 0.0721


Training Epochs:  55%|█████▌    | 194/350 [04:23<04:02,  1.55s/it]

Epoch 194/350, Loss: 0.0726


Training Epochs:  56%|█████▌    | 195/350 [04:24<03:50,  1.49s/it]

Epoch 195/350, Loss: 0.0717


Training Epochs:  56%|█████▌    | 196/350 [04:25<03:44,  1.46s/it]

Epoch 196/350, Loss: 0.0721


Training Epochs:  56%|█████▋    | 197/350 [04:27<03:37,  1.42s/it]

Epoch 197/350, Loss: 0.0719


Training Epochs:  57%|█████▋    | 198/350 [04:28<03:33,  1.41s/it]

Epoch 198/350, Loss: 0.0728


Training Epochs:  57%|█████▋    | 199/350 [04:30<03:33,  1.41s/it]

Epoch 199/350, Loss: 0.0720


Training Epochs:  57%|█████▋    | 200/350 [04:31<03:29,  1.40s/it]

Epoch 200/350, Loss: 0.0716


Training Epochs:  57%|█████▋    | 201/350 [04:33<03:39,  1.47s/it]

Epoch 201/350, Loss: 0.0717


Training Epochs:  58%|█████▊    | 202/350 [04:35<04:06,  1.67s/it]

Epoch 202/350, Loss: 0.0719


Training Epochs:  58%|█████▊    | 203/350 [04:36<03:54,  1.60s/it]

Epoch 203/350, Loss: 0.0723


Training Epochs:  58%|█████▊    | 204/350 [04:38<03:43,  1.53s/it]

Epoch 204/350, Loss: 0.0717


Training Epochs:  59%|█████▊    | 205/350 [04:39<03:35,  1.48s/it]

Epoch 205/350, Loss: 0.0716


Training Epochs:  59%|█████▉    | 206/350 [04:40<03:29,  1.46s/it]

Epoch 206/350, Loss: 0.0719


Training Epochs:  59%|█████▉    | 207/350 [04:42<03:24,  1.43s/it]

Epoch 207/350, Loss: 0.0718


Training Epochs:  59%|█████▉    | 208/350 [04:43<03:22,  1.42s/it]

Epoch 208/350, Loss: 0.0718


Training Epochs:  60%|█████▉    | 209/350 [04:44<03:18,  1.41s/it]

Epoch 209/350, Loss: 0.0719


Training Epochs:  60%|██████    | 210/350 [04:46<03:36,  1.55s/it]

Epoch 210/350, Loss: 0.0717


Training Epochs:  60%|██████    | 211/350 [04:48<03:59,  1.72s/it]

Epoch 211/350, Loss: 0.0716


Training Epochs:  61%|██████    | 212/350 [04:50<03:44,  1.63s/it]

Epoch 212/350, Loss: 0.0724


Training Epochs:  61%|██████    | 213/350 [04:51<03:30,  1.54s/it]

Epoch 213/350, Loss: 0.0726


Training Epochs:  61%|██████    | 214/350 [04:52<03:20,  1.48s/it]

Epoch 214/350, Loss: 0.0717


Training Epochs:  61%|██████▏   | 215/350 [04:54<03:13,  1.43s/it]

Epoch 215/350, Loss: 0.0722


Training Epochs:  62%|██████▏   | 216/350 [04:55<03:07,  1.40s/it]

Epoch 216/350, Loss: 0.0717


Training Epochs:  62%|██████▏   | 217/350 [04:57<03:07,  1.41s/it]

Epoch 217/350, Loss: 0.0719


Training Epochs:  62%|██████▏   | 218/350 [04:58<03:01,  1.37s/it]

Epoch 218/350, Loss: 0.0717


Training Epochs:  63%|██████▎   | 219/350 [05:00<03:12,  1.47s/it]

Epoch 219/350, Loss: 0.0721


Training Epochs:  63%|██████▎   | 220/350 [05:02<03:33,  1.64s/it]

Epoch 220/350, Loss: 0.0727


Training Epochs:  63%|██████▎   | 221/350 [05:03<03:23,  1.58s/it]

Epoch 221/350, Loss: 0.0718


Training Epochs:  63%|██████▎   | 222/350 [05:04<03:14,  1.52s/it]

Epoch 222/350, Loss: 0.0729


Training Epochs:  64%|██████▎   | 223/350 [05:06<03:07,  1.48s/it]

Epoch 223/350, Loss: 0.0726


Training Epochs:  64%|██████▍   | 224/350 [05:07<03:02,  1.45s/it]

Epoch 224/350, Loss: 0.0718


Training Epochs:  64%|██████▍   | 225/350 [05:09<02:58,  1.42s/it]

Epoch 225/350, Loss: 0.0727


Training Epochs:  65%|██████▍   | 226/350 [05:10<02:55,  1.41s/it]

Epoch 226/350, Loss: 0.0715


Training Epochs:  65%|██████▍   | 227/350 [05:11<02:51,  1.39s/it]

Epoch 227/350, Loss: 0.0718


Training Epochs:  65%|██████▌   | 228/350 [05:13<03:06,  1.53s/it]

Epoch 228/350, Loss: 0.0720


Training Epochs:  65%|██████▌   | 229/350 [05:15<03:28,  1.73s/it]

Epoch 229/350, Loss: 0.0716


Training Epochs:  66%|██████▌   | 230/350 [05:17<03:15,  1.63s/it]

Epoch 230/350, Loss: 0.0720


Training Epochs:  66%|██████▌   | 231/350 [05:18<03:05,  1.56s/it]

Epoch 231/350, Loss: 0.0717


Training Epochs:  66%|██████▋   | 232/350 [05:19<02:58,  1.51s/it]

Epoch 232/350, Loss: 0.0719


Training Epochs:  67%|██████▋   | 233/350 [05:21<02:51,  1.46s/it]

Epoch 233/350, Loss: 0.0717


Training Epochs:  67%|██████▋   | 234/350 [05:22<02:47,  1.45s/it]

Epoch 234/350, Loss: 0.0723


Training Epochs:  67%|██████▋   | 235/350 [05:24<02:43,  1.42s/it]

Epoch 235/350, Loss: 0.0718


Training Epochs:  67%|██████▋   | 236/350 [05:25<02:40,  1.41s/it]

Epoch 236/350, Loss: 0.0716


Training Epochs:  68%|██████▊   | 237/350 [05:27<02:55,  1.55s/it]

Epoch 237/350, Loss: 0.0717


Training Epochs:  68%|██████▊   | 238/350 [05:29<03:12,  1.72s/it]

Epoch 238/350, Loss: 0.0718


Training Epochs:  68%|██████▊   | 239/350 [05:30<02:59,  1.62s/it]

Epoch 239/350, Loss: 0.0716


Training Epochs:  69%|██████▊   | 240/350 [05:32<02:48,  1.53s/it]

Epoch 240/350, Loss: 0.0725


Training Epochs:  69%|██████▉   | 241/350 [05:33<02:39,  1.46s/it]

Epoch 241/350, Loss: 0.0716


Training Epochs:  69%|██████▉   | 242/350 [05:34<02:33,  1.42s/it]

Epoch 242/350, Loss: 0.0718


Training Epochs:  69%|██████▉   | 243/350 [05:36<02:29,  1.40s/it]

Epoch 243/350, Loss: 0.0719


Training Epochs:  70%|██████▉   | 244/350 [05:37<02:26,  1.38s/it]

Epoch 244/350, Loss: 0.0717


Training Epochs:  70%|███████   | 245/350 [05:38<02:23,  1.37s/it]

Epoch 245/350, Loss: 0.0716


Training Epochs:  70%|███████   | 246/350 [05:40<02:29,  1.43s/it]

Epoch 246/350, Loss: 0.0722


Training Epochs:  71%|███████   | 247/350 [05:42<02:47,  1.63s/it]

Epoch 247/350, Loss: 0.0718


Training Epochs:  71%|███████   | 248/350 [05:43<02:38,  1.55s/it]

Epoch 248/350, Loss: 0.0718


Training Epochs:  71%|███████   | 249/350 [05:45<02:29,  1.48s/it]

Epoch 249/350, Loss: 0.0726


Training Epochs:  71%|███████▏  | 250/350 [05:46<02:24,  1.45s/it]

Epoch 250/350, Loss: 0.0719


Training Epochs:  72%|███████▏  | 251/350 [05:47<02:19,  1.41s/it]

Epoch 251/350, Loss: 0.0720


Training Epochs:  72%|███████▏  | 252/350 [05:49<02:16,  1.39s/it]

Epoch 252/350, Loss: 0.0726


Training Epochs:  72%|███████▏  | 253/350 [05:50<02:12,  1.37s/it]

Epoch 253/350, Loss: 0.0719


Training Epochs:  73%|███████▎  | 254/350 [05:51<02:10,  1.36s/it]

Epoch 254/350, Loss: 0.0716


Training Epochs:  73%|███████▎  | 255/350 [05:53<02:18,  1.46s/it]

Epoch 255/350, Loss: 0.0718


Training Epochs:  73%|███████▎  | 256/350 [05:55<02:33,  1.64s/it]

Epoch 256/350, Loss: 0.0721


Training Epochs:  73%|███████▎  | 257/350 [05:57<02:24,  1.56s/it]

Epoch 257/350, Loss: 0.0724


Training Epochs:  74%|███████▎  | 258/350 [05:58<02:21,  1.54s/it]

Epoch 258/350, Loss: 0.0717


Training Epochs:  74%|███████▍  | 259/350 [05:59<02:15,  1.49s/it]

Epoch 259/350, Loss: 0.0718


Training Epochs:  74%|███████▍  | 260/350 [06:01<02:09,  1.44s/it]

Epoch 260/350, Loss: 0.0719


Training Epochs:  75%|███████▍  | 261/350 [06:02<02:05,  1.41s/it]

Epoch 261/350, Loss: 0.0718


Training Epochs:  75%|███████▍  | 262/350 [06:03<02:02,  1.39s/it]

Epoch 262/350, Loss: 0.0718


Training Epochs:  75%|███████▌  | 263/350 [06:05<01:59,  1.37s/it]

Epoch 263/350, Loss: 0.0718


Training Epochs:  75%|███████▌  | 264/350 [06:07<02:09,  1.51s/it]

Epoch 264/350, Loss: 0.0717


Training Epochs:  76%|███████▌  | 265/350 [06:09<02:24,  1.70s/it]

Epoch 265/350, Loss: 0.0723


Training Epochs:  76%|███████▌  | 266/350 [06:10<02:14,  1.60s/it]

Epoch 266/350, Loss: 0.0718


Training Epochs:  76%|███████▋  | 267/350 [06:11<02:06,  1.52s/it]

Epoch 267/350, Loss: 0.0728


Training Epochs:  77%|███████▋  | 268/350 [06:13<02:00,  1.47s/it]

Epoch 268/350, Loss: 0.0720


Training Epochs:  77%|███████▋  | 269/350 [06:14<01:55,  1.43s/it]

Epoch 269/350, Loss: 0.0716


Training Epochs:  77%|███████▋  | 270/350 [06:15<01:52,  1.41s/it]

Epoch 270/350, Loss: 0.0717


Training Epochs:  77%|███████▋  | 271/350 [06:17<01:49,  1.39s/it]

Epoch 271/350, Loss: 0.0717


Training Epochs:  78%|███████▊  | 272/350 [06:18<01:46,  1.36s/it]

Epoch 272/350, Loss: 0.0717


Training Epochs:  78%|███████▊  | 273/350 [06:20<01:53,  1.47s/it]

Epoch 273/350, Loss: 0.0718


Training Epochs:  78%|███████▊  | 274/350 [06:22<02:07,  1.68s/it]

Epoch 274/350, Loss: 0.0718


Training Epochs:  79%|███████▊  | 275/350 [06:23<01:58,  1.58s/it]

Epoch 275/350, Loss: 0.0721


Training Epochs:  79%|███████▉  | 276/350 [06:25<01:51,  1.51s/it]

Epoch 276/350, Loss: 0.0716


Training Epochs:  79%|███████▉  | 277/350 [06:26<01:46,  1.46s/it]

Epoch 277/350, Loss: 0.0716


Training Epochs:  79%|███████▉  | 278/350 [06:27<01:42,  1.42s/it]

Epoch 278/350, Loss: 0.0717


Training Epochs:  80%|███████▉  | 279/350 [06:29<01:40,  1.41s/it]

Epoch 279/350, Loss: 0.0717


Training Epochs:  80%|████████  | 280/350 [06:30<01:37,  1.39s/it]

Epoch 280/350, Loss: 0.0728


Training Epochs:  80%|████████  | 281/350 [06:31<01:35,  1.38s/it]

Epoch 281/350, Loss: 0.0718


Training Epochs:  81%|████████  | 282/350 [06:33<01:41,  1.49s/it]

Epoch 282/350, Loss: 0.0718


Training Epochs:  81%|████████  | 283/350 [06:35<01:54,  1.70s/it]

Epoch 283/350, Loss: 0.0720


Training Epochs:  81%|████████  | 284/350 [06:37<01:46,  1.61s/it]

Epoch 284/350, Loss: 0.0718


Training Epochs:  81%|████████▏ | 285/350 [06:38<01:39,  1.53s/it]

Epoch 285/350, Loss: 0.0730


Training Epochs:  82%|████████▏ | 286/350 [06:39<01:34,  1.48s/it]

Epoch 286/350, Loss: 0.0728


Training Epochs:  82%|████████▏ | 287/350 [06:41<01:32,  1.46s/it]

Epoch 287/350, Loss: 0.0724


Training Epochs:  82%|████████▏ | 288/350 [06:42<01:28,  1.43s/it]

Epoch 288/350, Loss: 0.0716


Training Epochs:  83%|████████▎ | 289/350 [06:44<01:25,  1.40s/it]

Epoch 289/350, Loss: 0.0717


Training Epochs:  83%|████████▎ | 290/350 [06:45<01:24,  1.40s/it]

Epoch 290/350, Loss: 0.0715


Training Epochs:  83%|████████▎ | 291/350 [06:47<01:30,  1.53s/it]

Epoch 291/350, Loss: 0.0717


Training Epochs:  83%|████████▎ | 292/350 [06:49<01:39,  1.72s/it]

Epoch 292/350, Loss: 0.0719


Training Epochs:  84%|████████▎ | 293/350 [06:50<01:32,  1.62s/it]

Epoch 293/350, Loss: 0.0715


Training Epochs:  84%|████████▍ | 294/350 [06:52<01:26,  1.54s/it]

Epoch 294/350, Loss: 0.0717


Training Epochs:  84%|████████▍ | 295/350 [06:53<01:21,  1.48s/it]

Epoch 295/350, Loss: 0.0721


Training Epochs:  85%|████████▍ | 296/350 [06:54<01:18,  1.44s/it]

Epoch 296/350, Loss: 0.0718


Training Epochs:  85%|████████▍ | 297/350 [06:56<01:15,  1.42s/it]

Epoch 297/350, Loss: 0.0715


Training Epochs:  85%|████████▌ | 298/350 [06:57<01:13,  1.41s/it]

Epoch 298/350, Loss: 0.0718


Training Epochs:  85%|████████▌ | 299/350 [06:59<01:13,  1.45s/it]

Epoch 299/350, Loss: 0.0725


Training Epochs:  86%|████████▌ | 300/350 [07:01<01:18,  1.57s/it]

Epoch 300/350, Loss: 0.0718


Training Epochs:  86%|████████▌ | 301/350 [07:03<01:24,  1.73s/it]

Epoch 301/350, Loss: 0.0717


Training Epochs:  86%|████████▋ | 302/350 [07:04<01:18,  1.64s/it]

Epoch 302/350, Loss: 0.0720


Training Epochs:  87%|████████▋ | 303/350 [07:08<01:44,  2.22s/it]

Epoch 303/350, Loss: 0.0720


Training Epochs:  87%|████████▋ | 304/350 [07:09<01:30,  1.97s/it]

Epoch 304/350, Loss: 0.0719


Training Epochs:  87%|████████▋ | 305/350 [07:10<01:20,  1.79s/it]

Epoch 305/350, Loss: 0.0716


Training Epochs:  87%|████████▋ | 306/350 [07:12<01:12,  1.66s/it]

Epoch 306/350, Loss: 0.0718


Training Epochs:  88%|████████▊ | 307/350 [07:13<01:09,  1.62s/it]

Epoch 307/350, Loss: 0.0730


Training Epochs:  88%|████████▊ | 308/350 [07:15<01:15,  1.79s/it]

Epoch 308/350, Loss: 0.0719


Training Epochs:  88%|████████▊ | 309/350 [07:17<01:09,  1.69s/it]

Epoch 309/350, Loss: 0.0716


Training Epochs:  89%|████████▊ | 310/350 [07:18<01:03,  1.59s/it]

Epoch 310/350, Loss: 0.0723


Training Epochs:  89%|████████▉ | 311/350 [07:20<00:59,  1.53s/it]

Epoch 311/350, Loss: 0.0717


Training Epochs:  89%|████████▉ | 312/350 [07:21<00:56,  1.49s/it]

Epoch 312/350, Loss: 0.0732


Training Epochs:  89%|████████▉ | 313/350 [07:22<00:53,  1.46s/it]

Epoch 313/350, Loss: 0.0718


Training Epochs:  90%|████████▉ | 314/350 [07:24<00:51,  1.44s/it]

Epoch 314/350, Loss: 0.0717


Training Epochs:  90%|█████████ | 315/350 [07:25<00:49,  1.41s/it]

Epoch 315/350, Loss: 0.0719


Training Epochs:  90%|█████████ | 316/350 [07:27<00:51,  1.52s/it]

Epoch 316/350, Loss: 0.0724


Training Epochs:  91%|█████████ | 317/350 [07:29<00:57,  1.73s/it]

Epoch 317/350, Loss: 0.0724


Training Epochs:  91%|█████████ | 318/350 [07:31<00:53,  1.67s/it]

Epoch 318/350, Loss: 0.0722


Training Epochs:  91%|█████████ | 319/350 [07:32<00:48,  1.58s/it]

Epoch 319/350, Loss: 0.0718


Training Epochs:  91%|█████████▏| 320/350 [07:34<00:45,  1.52s/it]

Epoch 320/350, Loss: 0.0716


Training Epochs:  92%|█████████▏| 321/350 [07:35<00:43,  1.49s/it]

Epoch 321/350, Loss: 0.0718


Training Epochs:  92%|█████████▏| 322/350 [07:36<00:40,  1.45s/it]

Epoch 322/350, Loss: 0.0723


Training Epochs:  92%|█████████▏| 323/350 [07:38<00:38,  1.43s/it]

Epoch 323/350, Loss: 0.0720


Training Epochs:  93%|█████████▎| 324/350 [07:39<00:36,  1.41s/it]

Epoch 324/350, Loss: 0.0728


Training Epochs:  93%|█████████▎| 325/350 [07:41<00:37,  1.52s/it]

Epoch 325/350, Loss: 0.0730


Training Epochs:  93%|█████████▎| 326/350 [07:43<00:41,  1.71s/it]

Epoch 326/350, Loss: 0.0717


Training Epochs:  93%|█████████▎| 327/350 [07:44<00:37,  1.64s/it]

Epoch 327/350, Loss: 0.0723


Training Epochs:  94%|█████████▎| 328/350 [07:46<00:34,  1.57s/it]

Epoch 328/350, Loss: 0.0716


Training Epochs:  94%|█████████▍| 329/350 [07:47<00:32,  1.54s/it]

Epoch 329/350, Loss: 0.0721


Training Epochs:  94%|█████████▍| 330/350 [07:49<00:30,  1.50s/it]

Epoch 330/350, Loss: 0.0716


Training Epochs:  95%|█████████▍| 331/350 [07:50<00:27,  1.46s/it]

Epoch 331/350, Loss: 0.0715


Training Epochs:  95%|█████████▍| 332/350 [07:52<00:26,  1.45s/it]

Epoch 332/350, Loss: 0.0717


Training Epochs:  95%|█████████▌| 333/350 [07:53<00:24,  1.43s/it]

Epoch 333/350, Loss: 0.0730


Training Epochs:  95%|█████████▌| 334/350 [07:55<00:25,  1.59s/it]

Epoch 334/350, Loss: 0.0733


Training Epochs:  96%|█████████▌| 335/350 [07:57<00:26,  1.75s/it]

Epoch 335/350, Loss: 0.0726


Training Epochs:  96%|█████████▌| 336/350 [07:58<00:23,  1.67s/it]

Epoch 336/350, Loss: 0.0721


Training Epochs:  96%|█████████▋| 337/350 [08:00<00:21,  1.68s/it]

Epoch 337/350, Loss: 0.0727


Training Epochs:  97%|█████████▋| 338/350 [08:02<00:19,  1.59s/it]

Epoch 338/350, Loss: 0.0715


Training Epochs:  97%|█████████▋| 339/350 [08:03<00:16,  1.53s/it]

Epoch 339/350, Loss: 0.0723


Training Epochs:  97%|█████████▋| 340/350 [08:04<00:14,  1.49s/it]

Epoch 340/350, Loss: 0.0716


Training Epochs:  97%|█████████▋| 341/350 [08:06<00:13,  1.47s/it]

Epoch 341/350, Loss: 0.0725


Training Epochs:  98%|█████████▊| 342/350 [08:07<00:11,  1.45s/it]

Epoch 342/350, Loss: 0.0718


Training Epochs:  98%|█████████▊| 343/350 [08:09<00:11,  1.67s/it]

Epoch 343/350, Loss: 0.0718


Training Epochs:  98%|█████████▊| 344/350 [08:12<00:11,  1.86s/it]

Epoch 344/350, Loss: 0.0717


Training Epochs:  99%|█████████▊| 345/350 [08:13<00:08,  1.72s/it]

Epoch 345/350, Loss: 0.0720


Training Epochs:  99%|█████████▉| 346/350 [08:14<00:06,  1.63s/it]

Epoch 346/350, Loss: 0.0718


Training Epochs:  99%|█████████▉| 347/350 [08:16<00:04,  1.56s/it]

Epoch 347/350, Loss: 0.0718


Training Epochs:  99%|█████████▉| 348/350 [08:17<00:03,  1.51s/it]

Epoch 348/350, Loss: 0.0717


Training Epochs: 100%|█████████▉| 349/350 [08:19<00:01,  1.47s/it]

Epoch 349/350, Loss: 0.0728


Training Epochs: 100%|██████████| 350/350 [08:20<00:00,  1.43s/it]

Epoch 350/350, Loss: 0.0721





Test Accuracy: 93.27%


In [15]:
# Display results
print("\nExperiment Results:")
for key, value in results.items():
    print(f"{key}: {value}")


Experiment Results:
hidden_size: {32: 0.9381107491856677, 64: 0.9261672095548317}
pooling_type: {'max': 0.9391965255157437, 'avg': 0.9348534201954397}
optimizer: {'SGD': 0.9109663409337676, 'RMSProp': 0.9283387622149837, 'Adam': 0.9283387622149837}
epochs: {5: 0.9348534201954397, 50: 0.9326818675352877, 100: 0.9326818675352877, 250: 0.9413680781758957, 350: 0.9326818675352877}
