<a href="https://colab.research.google.com/github/VSM5002/AI-Powered-Thermal-Fire-Detection-System/blob/master/Image_conversion_thermal_8bit.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("deepnewbie/flir-thermal-images-dataset")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/deepnewbie/flir-thermal-images-dataset?dataset_version_number=1...


100%|██████████| 15.3G/15.3G [06:40<00:00, 40.9MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1


In [2]:
# List files and folders in the dataset directory
import os

files = os.listdir(path)
print("Files and folders in dataset root:")
for f in files:
    print(f)

# Print all folders and subfolders recursively
print("\nAll folders and subfolders:")
for root, dirs, files in os.walk(path):
    print(f"Folder: {root}")
    for d in dirs:
        print(f"  Subfolder: ",d)

Files and folders in dataset root:
FLIR_ADAS_1_3

All folders and subfolders:
Folder: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1
  Subfolder:  FLIR_ADAS_1_3
Folder: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3
  Subfolder:  train
  Subfolder:  video
  Subfolder:  val
Folder: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/train
  Subfolder:  RGB
  Subfolder:  thermal_8_bit
  Subfolder:  thermal_16_bit
  Subfolder:  Annotated_thermal_8_bit
Folder: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/train/RGB
Folder: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/train/thermal_8_bit
Folder: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/train/thermal_16_bit
Folder: /root/.cache/kagglehub/datasets/deepnewbie/flir-

In [3]:
# Set RGB and thermal image directories for train and val splits
train_rgb_dir = os.path.join(path, "FLIR_ADAS_1_3", "train", "RGB")
train_thermal_dir = os.path.join(path, "FLIR_ADAS_1_3", "train", "thermal_8_bit")
val_rgb_dir = os.path.join(path, "FLIR_ADAS_1_3", "val", "RGB")
val_thermal_dir = os.path.join(path, "FLIR_ADAS_1_3", "val", "thermal_8_bit")

print("Train RGB directory:", train_rgb_dir)
print("Train Thermal directory:", train_thermal_dir)
print("Val RGB directory:", val_rgb_dir)
print("Val Thermal directory:", val_thermal_dir)

# Match images by filename prefix (without extension, ignoring extension differences)
def get_matched_files_by_prefix(rgb_dir, thermal_dir, exclude_prefix=None):
    rgb_files = [f for f in os.listdir(rgb_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    thermal_files = [f for f in os.listdir(thermal_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
    rgb_prefix = {os.path.splitext(f)[0]: f for f in rgb_files}
    thermal_prefix = {os.path.splitext(f)[0]: f for f in thermal_files}
    common_prefixes = sorted(set(rgb_prefix.keys()) & set(thermal_prefix.keys()))
    if exclude_prefix is not None:
        common_prefixes = [p for p in common_prefixes if p != exclude_prefix]
    matched_rgb = [rgb_prefix[p] for p in common_prefixes]
    matched_thermal = [thermal_prefix[p] for p in common_prefixes]
    return matched_rgb, matched_thermal, common_prefixes

# Exclude FLIR_00001 from training
matched_rgb, matched_thermal, matched_prefixes = get_matched_files_by_prefix(train_rgb_dir, train_thermal_dir, exclude_prefix="FLIR_00001")
val_matched_rgb, val_matched_thermal, val_matched_prefixes = get_matched_files_by_prefix(val_rgb_dir, val_thermal_dir)
print(f"Number of matched train RGB images: {len(matched_rgb)}")
print(f"Number of matched train thermal images: {len(matched_thermal)}")
print(f"Number of matched val RGB images: {len(val_matched_rgb)}")
print(f"Number of matched val thermal images: {len(val_matched_thermal)}")

Train RGB directory: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/train/RGB
Train Thermal directory: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/train/thermal_8_bit
Val RGB directory: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/val/RGB
Val Thermal directory: /root/.cache/kagglehub/datasets/deepnewbie/flir-thermal-images-dataset/versions/1/FLIR_ADAS_1_3/val/thermal_8_bit
Number of matched train RGB images: 8362
Number of matched train thermal images: 8362
Number of matched val RGB images: 1257
Number of matched val thermal images: 1257


In [4]:
# Define the custom dataset and U-Net model for RGB to thermal translation
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from PIL import Image
import torchvision.transforms as T

class FLIRRGB2ThermalDataset(Dataset):
    def __init__(self, rgb_dir, thermal_dir, img_size=256, matched_rgb=None, matched_thermal=None):
        self.rgb_dir = rgb_dir
        self.thermal_dir = thermal_dir
        self.img_size = img_size
        # Use matched file lists if provided, else match by prefix
        if matched_rgb is not None and matched_thermal is not None:
            self.rgb_files = matched_rgb
            self.thermal_files = matched_thermal
        else:
            rgb_files = [f for f in os.listdir(rgb_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            thermal_files = [f for f in os.listdir(thermal_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            rgb_prefix = {os.path.splitext(f)[0]: f for f in rgb_files}
            thermal_prefix = {os.path.splitext(f)[0]: f for f in thermal_files}
            common_prefixes = sorted(set(rgb_prefix.keys()) & set(thermal_prefix.keys()))
            self.rgb_files = [rgb_prefix[p] for p in common_prefixes]
            self.thermal_files = [thermal_prefix[p] for p in common_prefixes]
        print(f"Paired {len(self.rgb_files)} RGB and thermal images.")
        if len(self.rgb_files) == 0:
            raise ValueError("No matching RGB and thermal image pairs found!")
        self.transform_rgb = T.Compose([
            T.Resize((img_size, img_size)),
            T.ToTensor(),
        ])
        self.transform_thermal = T.Compose([
            T.Resize((img_size, img_size)),
            T.ToTensor(),
        ])

    def __len__(self):
        return len(self.rgb_files)

    def __getitem__(self, idx):
        rgb_path = os.path.join(self.rgb_dir, self.rgb_files[idx])
        thermal_path = os.path.join(self.thermal_dir, self.thermal_files[idx])
        rgb = Image.open(rgb_path).convert('RGB')
        thermal = Image.open(thermal_path).convert('L')
        rgb = self.transform_rgb(rgb)
        thermal = self.transform_thermal(thermal)
        return rgb, thermal

class UNet(nn.Module):
    def __init__(self, in_channels=3, out_channels=1, features=[64, 128, 256, 512]):
        super().__init__()
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        for feature in features:
            self.downs.append(self.conv_block(in_channels, feature))
            in_channels = feature
        for feature in reversed(features):
            self.ups.append(nn.ConvTranspose2d(feature*2, feature, kernel_size=2, stride=2))
            self.ups.append(self.conv_block(feature*2, feature))
        self.bottleneck = self.conv_block(features[-1], features[-1]*2)
        self.final_conv = nn.Conv2d(features[0], out_channels, kernel_size=1)

    def conv_block(self, in_c, out_c):
        return nn.Sequential(
            nn.Conv2d(in_c, out_c, 3, padding=1),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_c, out_c, 3, padding=1),
            nn.BatchNorm2d(out_c),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        skip_connections = []
        for down in self.downs:
            x = down(x)
            skip_connections.append(x)
            x = nn.MaxPool2d(2)(x)
        x = self.bottleneck(x)
        skip_connections = skip_connections[::-1]
        for idx in range(0, len(self.ups), 2):
            x = self.ups[idx](x)
            skip = skip_connections[idx//2]
            if x.shape != skip.shape:
                x = T.functional.resize(x, skip.shape[2:])
            x = torch.cat((skip, x), dim=1)
            x = self.ups[idx+1](x)
        return self.final_conv(x)

In [5]:
# Training loop for the U-Net model with diagnostics and quick test mode
def train_model(
    rgb_dir, thermal_dir, matched_rgb, matched_thermal, epochs=10, batch_size=8, lr=5e-4, img_size=256, save_path='unet_rgb2thermal.pth', max_samples=100
):
    import time
    from tqdm import tqdm

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    dataset = FLIRRGB2ThermalDataset(rgb_dir, thermal_dir, img_size, matched_rgb=matched_rgb, matched_thermal=matched_thermal)
    if max_samples is not None:
        indices = list(range(min(max_samples, len(dataset))))
        from torch.utils.data import Subset
        dataset = Subset(dataset, indices)
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    model = UNet().to(device)
    criterion = nn.L1Loss()  # Try nn.MSELoss() for even smoother results
    optimizer = optim.Adam(model.parameters(), lr=lr)
    best_loss = float('inf')
    train_losses = []
    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        start_time = time.time()
        for i, (rgb, thermal) in enumerate(tqdm(loader, desc=f"Epoch {epoch+1}")):
            rgb, thermal = rgb.to(device), thermal.to(device)
            pred = model(rgb)
            loss = criterion(pred, thermal)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * rgb.size(0)
            if (i+1) % 2 == 0:
                print(f"  Batch {i+1}/{len(loader)} - Batch Loss: {loss.item():.4f}")
        epoch_loss = running_loss / len(dataset)
        train_losses.append(epoch_loss)
        elapsed = time.time() - start_time
        print(f"Epoch {epoch+1}/{epochs} - Loss: {epoch_loss:.4f} - Time: {elapsed:.1f}s")
        if epoch_loss < best_loss:
            best_loss = epoch_loss
            torch.save(model.state_dict(), save_path)
    print("Training complete. Best model saved.")
    return train_losses

# Example usage:
#train_losses = train_model(train_rgb_dir, train_thermal_dir, matched_rgb, matched_thermal, epochs=10, batch_size=8, lr=5e-4, max_samples=100)

In [None]:
# Retrain the model from scratch
train_losses = train_model(
    train_rgb_dir,
    train_thermal_dir,
    matched_rgb,
    matched_thermal,
    epochs=10,
    batch_size=32,
    lr=5e-4,
    max_samples=None)

Paired 8362 RGB and thermal images.


Epoch 1:   1%|          | 2/262 [00:07<15:32,  3.59s/it]

  Batch 2/262 - Batch Loss: 0.4663


Epoch 1:   2%|▏         | 4/262 [00:13<13:53,  3.23s/it]

  Batch 4/262 - Batch Loss: 0.2521


Epoch 1:   2%|▏         | 6/262 [00:19<12:36,  2.95s/it]

  Batch 6/262 - Batch Loss: 0.1931


Epoch 1:   3%|▎         | 8/262 [00:24<12:08,  2.87s/it]

  Batch 8/262 - Batch Loss: 0.1603


Epoch 1:   4%|▍         | 10/262 [00:30<12:12,  2.91s/it]

  Batch 10/262 - Batch Loss: 0.1602


Epoch 1:   5%|▍         | 12/262 [00:36<11:57,  2.87s/it]

  Batch 12/262 - Batch Loss: 0.1557


Epoch 1:   5%|▌         | 14/262 [00:42<12:18,  2.98s/it]

  Batch 14/262 - Batch Loss: 0.1575


Epoch 1:   6%|▌         | 16/262 [00:48<11:55,  2.91s/it]

  Batch 16/262 - Batch Loss: 0.1483


Epoch 1:   7%|▋         | 18/262 [00:54<12:05,  2.97s/it]

  Batch 18/262 - Batch Loss: 0.1460


Epoch 1:   8%|▊         | 20/262 [00:59<11:43,  2.91s/it]

  Batch 20/262 - Batch Loss: 0.1424


Epoch 1:   8%|▊         | 22/262 [01:06<12:04,  3.02s/it]

  Batch 22/262 - Batch Loss: 0.1468


Epoch 1:   9%|▉         | 24/262 [01:12<11:41,  2.95s/it]

  Batch 24/262 - Batch Loss: 0.1376


Epoch 1:  10%|▉         | 26/262 [01:18<12:03,  3.07s/it]

  Batch 26/262 - Batch Loss: 0.1385


Epoch 1:  11%|█         | 28/262 [01:24<11:41,  3.00s/it]

  Batch 28/262 - Batch Loss: 0.1333


Epoch 1:  11%|█▏        | 30/262 [01:30<11:51,  3.07s/it]

  Batch 30/262 - Batch Loss: 0.1410


Epoch 1:  12%|█▏        | 32/262 [01:36<11:26,  2.99s/it]

  Batch 32/262 - Batch Loss: 0.1237


Epoch 1:  13%|█▎        | 34/262 [01:42<11:39,  3.07s/it]

  Batch 34/262 - Batch Loss: 0.1267


Epoch 1:  14%|█▎        | 36/262 [01:48<11:28,  3.05s/it]

  Batch 36/262 - Batch Loss: 0.0986


Epoch 1:  15%|█▍        | 38/262 [01:55<11:49,  3.17s/it]

  Batch 38/262 - Batch Loss: 0.1048


Epoch 1:  15%|█▌        | 40/262 [02:01<11:29,  3.11s/it]

  Batch 40/262 - Batch Loss: 0.1239


Epoch 1:  16%|█▌        | 42/262 [02:08<11:37,  3.17s/it]

  Batch 42/262 - Batch Loss: 0.1149


Epoch 1:  17%|█▋        | 44/262 [02:14<11:25,  3.14s/it]

  Batch 44/262 - Batch Loss: 0.1015


Epoch 1:  18%|█▊        | 46/262 [02:20<11:18,  3.14s/it]

  Batch 46/262 - Batch Loss: 0.1193


Epoch 1:  18%|█▊        | 48/262 [02:26<10:56,  3.07s/it]

  Batch 48/262 - Batch Loss: 0.1107


Epoch 1:  19%|█▉        | 50/262 [02:32<10:43,  3.03s/it]

  Batch 50/262 - Batch Loss: 0.1143


Epoch 1:  20%|█▉        | 52/262 [02:38<10:32,  3.01s/it]

  Batch 52/262 - Batch Loss: 0.1153


Epoch 1:  21%|██        | 54/262 [02:44<10:32,  3.04s/it]

  Batch 54/262 - Batch Loss: 0.1076


Epoch 1:  21%|██▏       | 56/262 [02:50<10:23,  3.03s/it]

  Batch 56/262 - Batch Loss: 0.1102


Epoch 1:  22%|██▏       | 58/262 [02:57<10:22,  3.05s/it]

  Batch 58/262 - Batch Loss: 0.1029


Epoch 1:  23%|██▎       | 60/262 [03:03<10:11,  3.03s/it]

  Batch 60/262 - Batch Loss: 0.1137


Epoch 1:  24%|██▎       | 62/262 [03:09<10:08,  3.04s/it]

  Batch 62/262 - Batch Loss: 0.1087


Epoch 1:  24%|██▍       | 64/262 [03:15<09:58,  3.02s/it]

  Batch 64/262 - Batch Loss: 0.1051


Epoch 1:  25%|██▌       | 66/262 [03:21<09:50,  3.01s/it]

  Batch 66/262 - Batch Loss: 0.1030


Epoch 1:  26%|██▌       | 68/262 [03:27<09:42,  3.00s/it]

  Batch 68/262 - Batch Loss: 0.1089


Epoch 1:  27%|██▋       | 70/262 [03:33<09:42,  3.04s/it]

  Batch 70/262 - Batch Loss: 0.1076


Epoch 1:  27%|██▋       | 72/262 [03:39<09:27,  2.99s/it]

  Batch 72/262 - Batch Loss: 0.1103


Epoch 1:  28%|██▊       | 74/262 [03:45<09:30,  3.03s/it]

  Batch 74/262 - Batch Loss: 0.1143


Epoch 1:  29%|██▉       | 76/262 [03:51<09:18,  3.00s/it]

  Batch 76/262 - Batch Loss: 0.1167


Epoch 1:  30%|██▉       | 78/262 [03:57<09:21,  3.05s/it]

  Batch 78/262 - Batch Loss: 0.1166


Epoch 1:  31%|███       | 80/262 [04:03<09:09,  3.02s/it]

  Batch 80/262 - Batch Loss: 0.1103


Epoch 1:  31%|███▏      | 82/262 [04:09<09:06,  3.04s/it]

  Batch 82/262 - Batch Loss: 0.1076


Epoch 1:  32%|███▏      | 84/262 [04:15<08:51,  2.99s/it]

  Batch 84/262 - Batch Loss: 0.1075


Epoch 1:  33%|███▎      | 86/262 [04:21<08:49,  3.01s/it]

  Batch 86/262 - Batch Loss: 0.0979


Epoch 1:  34%|███▎      | 88/262 [04:27<08:39,  2.99s/it]

  Batch 88/262 - Batch Loss: 0.1053


Epoch 1:  34%|███▍      | 90/262 [04:34<08:44,  3.05s/it]

  Batch 90/262 - Batch Loss: 0.1127


Epoch 1:  35%|███▌      | 92/262 [04:39<08:25,  2.97s/it]

  Batch 92/262 - Batch Loss: 0.0993


Epoch 1:  36%|███▌      | 94/262 [04:46<08:28,  3.03s/it]

  Batch 94/262 - Batch Loss: 0.1028


Epoch 1:  37%|███▋      | 96/262 [04:51<08:15,  2.99s/it]

  Batch 96/262 - Batch Loss: 0.1103


Epoch 1:  37%|███▋      | 98/262 [04:58<08:24,  3.08s/it]

  Batch 98/262 - Batch Loss: 0.1185


Epoch 1:  38%|███▊      | 100/262 [05:04<08:09,  3.02s/it]

  Batch 100/262 - Batch Loss: 0.1073


Epoch 1:  39%|███▉      | 102/262 [05:10<08:12,  3.08s/it]

  Batch 102/262 - Batch Loss: 0.1063


Epoch 1:  40%|███▉      | 104/262 [05:16<08:01,  3.05s/it]

  Batch 104/262 - Batch Loss: 0.1042


Epoch 1:  40%|████      | 106/262 [05:22<07:49,  3.01s/it]

  Batch 106/262 - Batch Loss: 0.1106


Epoch 1:  41%|████      | 108/262 [05:28<07:41,  2.99s/it]

  Batch 108/262 - Batch Loss: 0.1025


Epoch 1:  42%|████▏     | 110/262 [05:34<07:38,  3.02s/it]

  Batch 110/262 - Batch Loss: 0.1043


Epoch 1:  43%|████▎     | 112/262 [05:40<07:35,  3.03s/it]

  Batch 112/262 - Batch Loss: 0.1179


Epoch 1:  44%|████▎     | 114/262 [05:47<07:39,  3.10s/it]

  Batch 114/262 - Batch Loss: 0.0993


Epoch 1:  44%|████▍     | 116/262 [05:53<07:26,  3.06s/it]

  Batch 116/262 - Batch Loss: 0.1034


Epoch 1:  45%|████▌     | 118/262 [05:59<07:27,  3.11s/it]

  Batch 118/262 - Batch Loss: 0.1001


Epoch 1:  46%|████▌     | 120/262 [06:05<07:19,  3.09s/it]

  Batch 120/262 - Batch Loss: 0.0987


Epoch 1:  47%|████▋     | 122/262 [06:11<07:15,  3.11s/it]

  Batch 122/262 - Batch Loss: 0.1031


Epoch 1:  47%|████▋     | 124/262 [06:18<07:21,  3.20s/it]

  Batch 124/262 - Batch Loss: 0.1039


Epoch 1:  48%|████▊     | 126/262 [06:24<07:10,  3.17s/it]

  Batch 126/262 - Batch Loss: 0.0995


Epoch 1:  49%|████▉     | 128/262 [06:31<07:16,  3.26s/it]

  Batch 128/262 - Batch Loss: 0.0993


Epoch 1:  50%|████▉     | 130/262 [06:37<06:57,  3.16s/it]

  Batch 130/262 - Batch Loss: 0.1079


Epoch 1:  50%|█████     | 132/262 [06:43<06:58,  3.22s/it]

  Batch 132/262 - Batch Loss: 0.1072


Epoch 1:  51%|█████     | 134/262 [06:50<06:44,  3.16s/it]

  Batch 134/262 - Batch Loss: 0.1087


Epoch 1:  52%|█████▏    | 136/262 [06:57<07:01,  3.35s/it]

  Batch 136/262 - Batch Loss: 0.1106


Epoch 1:  53%|█████▎    | 138/262 [07:03<06:42,  3.24s/it]

  Batch 138/262 - Batch Loss: 0.0912


Epoch 1:  53%|█████▎    | 140/262 [07:10<06:54,  3.40s/it]

  Batch 140/262 - Batch Loss: 0.1026


Epoch 1:  54%|█████▍    | 142/262 [07:16<06:28,  3.23s/it]

  Batch 142/262 - Batch Loss: 0.0965


Epoch 1:  55%|█████▍    | 144/262 [07:23<06:26,  3.27s/it]

  Batch 144/262 - Batch Loss: 0.1065


Epoch 1:  56%|█████▌    | 146/262 [07:29<06:07,  3.17s/it]

  Batch 146/262 - Batch Loss: 0.0978


Epoch 1:  56%|█████▋    | 148/262 [07:35<05:59,  3.15s/it]

  Batch 148/262 - Batch Loss: 0.1042


Epoch 1:  57%|█████▋    | 150/262 [07:41<05:39,  3.03s/it]

  Batch 150/262 - Batch Loss: 0.1065


Epoch 1:  58%|█████▊    | 152/262 [07:47<05:47,  3.16s/it]

  Batch 152/262 - Batch Loss: 0.1086


Epoch 1:  59%|█████▉    | 154/262 [07:53<05:29,  3.05s/it]

  Batch 154/262 - Batch Loss: 0.1068


Epoch 1:  60%|█████▉    | 156/262 [08:00<05:31,  3.12s/it]

  Batch 156/262 - Batch Loss: 0.1035


Epoch 1:  60%|██████    | 158/262 [08:06<05:17,  3.06s/it]

  Batch 158/262 - Batch Loss: 0.1031


Epoch 1:  61%|██████    | 160/262 [08:12<05:19,  3.13s/it]

  Batch 160/262 - Batch Loss: 0.0893


Epoch 1:  62%|██████▏   | 162/262 [08:18<05:06,  3.07s/it]

  Batch 162/262 - Batch Loss: 0.1183


Epoch 1:  63%|██████▎   | 164/262 [08:24<05:06,  3.12s/it]

  Batch 164/262 - Batch Loss: 0.1021


Epoch 1:  63%|██████▎   | 166/262 [08:30<04:49,  3.02s/it]

  Batch 166/262 - Batch Loss: 0.1043


Epoch 1:  64%|██████▍   | 168/262 [08:36<04:50,  3.09s/it]

  Batch 168/262 - Batch Loss: 0.1041


Epoch 1:  65%|██████▍   | 170/262 [08:42<04:40,  3.05s/it]

  Batch 170/262 - Batch Loss: 0.0920


Epoch 1:  66%|██████▌   | 172/262 [08:49<04:38,  3.10s/it]

  Batch 172/262 - Batch Loss: 0.1050


Epoch 1:  66%|██████▋   | 174/262 [08:55<04:30,  3.07s/it]

  Batch 174/262 - Batch Loss: 0.1022


Epoch 1:  67%|██████▋   | 176/262 [09:01<04:28,  3.12s/it]

  Batch 176/262 - Batch Loss: 0.1037


Epoch 1:  68%|██████▊   | 178/262 [09:07<04:13,  3.01s/it]

  Batch 178/262 - Batch Loss: 0.1111


Epoch 1:  69%|██████▊   | 180/262 [09:13<04:13,  3.09s/it]

  Batch 180/262 - Batch Loss: 0.0920


Epoch 1:  69%|██████▉   | 182/262 [09:19<03:59,  3.00s/it]

  Batch 182/262 - Batch Loss: 0.1015


Epoch 1:  70%|███████   | 184/262 [09:25<04:02,  3.11s/it]

  Batch 184/262 - Batch Loss: 0.1034


Epoch 1:  71%|███████   | 186/262 [09:31<03:48,  3.01s/it]

  Batch 186/262 - Batch Loss: 0.0991


Epoch 1:  72%|███████▏  | 188/262 [09:37<03:49,  3.10s/it]

  Batch 188/262 - Batch Loss: 0.1123


Epoch 1:  73%|███████▎  | 190/262 [09:43<03:38,  3.03s/it]

  Batch 190/262 - Batch Loss: 0.0933


Epoch 1:  73%|███████▎  | 192/262 [09:50<03:40,  3.15s/it]

  Batch 192/262 - Batch Loss: 0.1004


Epoch 1:  74%|███████▍  | 194/262 [09:56<03:26,  3.04s/it]

  Batch 194/262 - Batch Loss: 0.0919


Epoch 1:  75%|███████▍  | 196/262 [10:02<03:24,  3.10s/it]

  Batch 196/262 - Batch Loss: 0.0924


Epoch 1:  76%|███████▌  | 198/262 [10:08<03:12,  3.00s/it]

  Batch 198/262 - Batch Loss: 0.1032


Epoch 1:  76%|███████▋  | 200/262 [10:14<03:12,  3.10s/it]

  Batch 200/262 - Batch Loss: 0.1018


Epoch 1:  77%|███████▋  | 202/262 [10:20<03:00,  3.01s/it]

  Batch 202/262 - Batch Loss: 0.0876


Epoch 1:  78%|███████▊  | 204/262 [10:26<03:02,  3.15s/it]

  Batch 204/262 - Batch Loss: 0.1031


Epoch 1:  79%|███████▊  | 206/262 [10:32<02:48,  3.01s/it]

  Batch 206/262 - Batch Loss: 0.0960


Epoch 1:  79%|███████▉  | 208/262 [10:38<02:45,  3.07s/it]

  Batch 208/262 - Batch Loss: 0.1101


Epoch 1:  80%|████████  | 210/262 [10:44<02:34,  2.97s/it]

  Batch 210/262 - Batch Loss: 0.1002


Epoch 1:  81%|████████  | 212/262 [10:50<02:34,  3.10s/it]

  Batch 212/262 - Batch Loss: 0.0958


Epoch 1:  82%|████████▏ | 214/262 [10:56<02:24,  3.02s/it]

  Batch 214/262 - Batch Loss: 0.1009


Epoch 1:  82%|████████▏ | 216/262 [11:03<02:24,  3.15s/it]

  Batch 216/262 - Batch Loss: 0.0971


Epoch 1:  83%|████████▎ | 218/262 [11:08<02:13,  3.05s/it]

  Batch 218/262 - Batch Loss: 0.0929


Epoch 1:  84%|████████▍ | 220/262 [11:15<02:12,  3.16s/it]

  Batch 220/262 - Batch Loss: 0.0955


Epoch 1:  85%|████████▍ | 222/262 [11:21<02:02,  3.05s/it]

  Batch 222/262 - Batch Loss: 0.0971


Epoch 1:  85%|████████▌ | 224/262 [11:27<02:01,  3.19s/it]

  Batch 224/262 - Batch Loss: 0.0947


Epoch 1:  86%|████████▋ | 226/262 [11:33<01:49,  3.04s/it]

  Batch 226/262 - Batch Loss: 0.1011


Epoch 1:  87%|████████▋ | 228/262 [11:39<01:46,  3.12s/it]

  Batch 228/262 - Batch Loss: 0.0943


Epoch 1:  88%|████████▊ | 230/262 [11:45<01:36,  3.02s/it]

  Batch 230/262 - Batch Loss: 0.0977


Epoch 1:  89%|████████▊ | 232/262 [11:52<01:34,  3.14s/it]

  Batch 232/262 - Batch Loss: 0.1079


Epoch 1:  89%|████████▉ | 234/262 [11:58<01:25,  3.05s/it]

  Batch 234/262 - Batch Loss: 0.1009


Epoch 1:  90%|█████████ | 236/262 [12:04<01:21,  3.15s/it]

  Batch 236/262 - Batch Loss: 0.0924


Epoch 1:  91%|█████████ | 238/262 [12:10<01:12,  3.01s/it]

  Batch 238/262 - Batch Loss: 0.0954


Epoch 1:  92%|█████████▏| 240/262 [12:16<01:09,  3.17s/it]

  Batch 240/262 - Batch Loss: 0.0962


Epoch 1:  92%|█████████▏| 242/262 [12:22<01:00,  3.00s/it]

  Batch 242/262 - Batch Loss: 0.1055


Epoch 1:  93%|█████████▎| 244/262 [12:28<00:56,  3.13s/it]

  Batch 244/262 - Batch Loss: 0.0969


Epoch 1:  94%|█████████▍| 246/262 [12:34<00:48,  3.02s/it]

  Batch 246/262 - Batch Loss: 0.0848


Epoch 1:  95%|█████████▍| 248/262 [12:41<00:43,  3.12s/it]

  Batch 248/262 - Batch Loss: 0.0935


Epoch 1:  95%|█████████▌| 250/262 [12:46<00:35,  3.00s/it]

  Batch 250/262 - Batch Loss: 0.1071


Epoch 1:  96%|█████████▌| 252/262 [12:52<00:30,  3.06s/it]

  Batch 252/262 - Batch Loss: 0.0963


Epoch 1:  97%|█████████▋| 254/262 [12:58<00:23,  2.99s/it]

  Batch 254/262 - Batch Loss: 0.0950


Epoch 1:  98%|█████████▊| 256/262 [13:04<00:17,  3.00s/it]

  Batch 256/262 - Batch Loss: 0.0922


Epoch 1:  98%|█████████▊| 258/262 [13:10<00:11,  2.94s/it]

  Batch 258/262 - Batch Loss: 0.1078


Epoch 1:  99%|█████████▉| 260/262 [13:16<00:05,  2.91s/it]

  Batch 260/262 - Batch Loss: 0.1108


Epoch 1: 100%|██████████| 262/262 [13:20<00:00,  3.05s/it]


  Batch 262/262 - Batch Loss: 0.0754
Epoch 1/10 - Loss: 0.1126 - Time: 800.3s


Epoch 2:   1%|          | 2/262 [00:05<12:34,  2.90s/it]

  Batch 2/262 - Batch Loss: 0.0891


Epoch 2:   2%|▏         | 4/262 [00:11<12:51,  2.99s/it]

  Batch 4/262 - Batch Loss: 0.1007


Epoch 2:   2%|▏         | 6/262 [00:17<12:32,  2.94s/it]

  Batch 6/262 - Batch Loss: 0.0969


Epoch 2:   3%|▎         | 8/262 [00:23<12:39,  2.99s/it]

  Batch 8/262 - Batch Loss: 0.0992


Epoch 2:   4%|▍         | 10/262 [00:29<12:10,  2.90s/it]

  Batch 10/262 - Batch Loss: 0.1094


Epoch 2:   5%|▍         | 12/262 [00:35<12:15,  2.94s/it]

  Batch 12/262 - Batch Loss: 0.0987


Epoch 2:   5%|▌         | 14/262 [00:41<11:55,  2.89s/it]

  Batch 14/262 - Batch Loss: 0.1124


Epoch 2:   6%|▌         | 16/262 [00:47<12:12,  2.98s/it]

  Batch 16/262 - Batch Loss: 0.1020


Epoch 2:   7%|▋         | 18/262 [00:52<11:45,  2.89s/it]

  Batch 18/262 - Batch Loss: 0.0915


Epoch 2:   8%|▊         | 20/262 [00:59<12:18,  3.05s/it]

  Batch 20/262 - Batch Loss: 0.0979


Epoch 2:   8%|▊         | 22/262 [01:04<11:45,  2.94s/it]

  Batch 22/262 - Batch Loss: 0.0982


Epoch 2:   9%|▉         | 24/262 [01:11<12:16,  3.09s/it]

  Batch 24/262 - Batch Loss: 0.0985


Epoch 2:  10%|▉         | 26/262 [01:16<11:42,  2.98s/it]

  Batch 26/262 - Batch Loss: 0.0915


Epoch 2:  11%|█         | 28/262 [01:23<11:55,  3.06s/it]

  Batch 28/262 - Batch Loss: 0.0985


Epoch 2:  11%|█▏        | 30/262 [01:28<11:28,  2.97s/it]

  Batch 30/262 - Batch Loss: 0.0938


Epoch 2:  12%|█▏        | 32/262 [01:35<11:48,  3.08s/it]

  Batch 32/262 - Batch Loss: 0.0964


Epoch 2:  13%|█▎        | 34/262 [01:40<11:19,  2.98s/it]

  Batch 34/262 - Batch Loss: 0.0901


Epoch 2:  14%|█▎        | 36/262 [01:46<11:23,  3.03s/it]

  Batch 36/262 - Batch Loss: 0.0953


Epoch 2:  15%|█▍        | 38/262 [01:52<10:56,  2.93s/it]

  Batch 38/262 - Batch Loss: 0.1054


Epoch 2:  15%|█▌        | 40/262 [01:58<10:51,  2.93s/it]

  Batch 40/262 - Batch Loss: 0.0869


Epoch 2:  16%|█▌        | 42/262 [02:04<10:28,  2.86s/it]

  Batch 42/262 - Batch Loss: 0.1061


Epoch 2:  17%|█▋        | 44/262 [02:09<10:15,  2.82s/it]

  Batch 44/262 - Batch Loss: 0.0992


Epoch 2:  18%|█▊        | 46/262 [02:15<10:26,  2.90s/it]

  Batch 46/262 - Batch Loss: 0.0916


Epoch 2:  18%|█▊        | 48/262 [02:21<10:11,  2.86s/it]

  Batch 48/262 - Batch Loss: 0.1008


Epoch 2:  19%|█▉        | 50/262 [02:27<10:29,  2.97s/it]

  Batch 50/262 - Batch Loss: 0.1067


Epoch 2:  20%|█▉        | 52/262 [02:33<10:04,  2.88s/it]

  Batch 52/262 - Batch Loss: 0.1077


Epoch 2:  21%|██        | 54/262 [02:39<10:21,  2.99s/it]

  Batch 54/262 - Batch Loss: 0.0947


Epoch 2:  21%|██▏       | 56/262 [02:45<09:59,  2.91s/it]

  Batch 56/262 - Batch Loss: 0.0868


Epoch 2:  22%|██▏       | 58/262 [02:51<10:05,  2.97s/it]

  Batch 58/262 - Batch Loss: 0.0960


Epoch 2:  23%|██▎       | 60/262 [02:57<09:52,  2.93s/it]

  Batch 60/262 - Batch Loss: 0.0965


Epoch 2:  24%|██▎       | 62/262 [03:03<09:49,  2.95s/it]

  Batch 62/262 - Batch Loss: 0.0989


Epoch 2:  24%|██▍       | 64/262 [03:08<09:41,  2.94s/it]

  Batch 64/262 - Batch Loss: 0.1063


Epoch 2:  25%|██▌       | 66/262 [03:14<09:35,  2.94s/it]

  Batch 66/262 - Batch Loss: 0.0921


Epoch 2:  26%|██▌       | 68/262 [03:20<09:26,  2.92s/it]

  Batch 68/262 - Batch Loss: 0.0958


Epoch 2:  27%|██▋       | 70/262 [03:26<09:30,  2.97s/it]

  Batch 70/262 - Batch Loss: 0.0998


Epoch 2:  27%|██▋       | 72/262 [03:32<09:12,  2.91s/it]

  Batch 72/262 - Batch Loss: 0.0937


Epoch 2:  28%|██▊       | 74/262 [03:38<09:27,  3.02s/it]

  Batch 74/262 - Batch Loss: 0.0956


Epoch 2:  29%|██▉       | 76/262 [03:44<09:03,  2.92s/it]

  Batch 76/262 - Batch Loss: 0.0959


Epoch 2:  30%|██▉       | 78/262 [03:50<09:27,  3.08s/it]

  Batch 78/262 - Batch Loss: 0.0890


Epoch 2:  31%|███       | 80/262 [03:56<09:00,  2.97s/it]

  Batch 80/262 - Batch Loss: 0.0936


Epoch 2:  31%|███▏      | 82/262 [04:02<09:16,  3.09s/it]

  Batch 82/262 - Batch Loss: 0.0974


Epoch 2:  32%|███▏      | 84/262 [04:08<08:45,  2.95s/it]

  Batch 84/262 - Batch Loss: 0.0967


Epoch 2:  33%|███▎      | 86/262 [04:14<08:58,  3.06s/it]

  Batch 86/262 - Batch Loss: 0.1077


Epoch 2:  34%|███▎      | 88/262 [04:20<08:39,  2.98s/it]

  Batch 88/262 - Batch Loss: 0.0914


Epoch 2:  34%|███▍      | 90/262 [04:26<08:39,  3.02s/it]

  Batch 90/262 - Batch Loss: 0.0925


Epoch 2:  35%|███▌      | 92/262 [04:31<08:17,  2.92s/it]

  Batch 92/262 - Batch Loss: 0.0952


Epoch 2:  36%|███▌      | 94/262 [04:37<08:10,  2.92s/it]

  Batch 94/262 - Batch Loss: 0.0901


Epoch 2:  37%|███▋      | 96/262 [04:43<08:03,  2.91s/it]

  Batch 96/262 - Batch Loss: 0.0915


Epoch 2:  37%|███▋      | 98/262 [04:49<07:57,  2.91s/it]

  Batch 98/262 - Batch Loss: 0.0922


Epoch 2:  38%|███▊      | 100/262 [04:55<07:59,  2.96s/it]

  Batch 100/262 - Batch Loss: 0.0988


Epoch 2:  39%|███▉      | 102/262 [05:01<07:48,  2.93s/it]

  Batch 102/262 - Batch Loss: 0.0928


Epoch 2:  40%|███▉      | 104/262 [05:07<07:45,  2.95s/it]

  Batch 104/262 - Batch Loss: 0.0915


Epoch 2:  40%|████      | 106/262 [05:12<07:34,  2.91s/it]

  Batch 106/262 - Batch Loss: 0.1059


Epoch 2:  41%|████      | 108/262 [05:19<07:42,  3.01s/it]

  Batch 108/262 - Batch Loss: 0.0839


Epoch 2:  42%|████▏     | 110/262 [05:24<07:26,  2.93s/it]

  Batch 110/262 - Batch Loss: 0.0963


Epoch 2:  43%|████▎     | 112/262 [05:31<07:33,  3.02s/it]

  Batch 112/262 - Batch Loss: 0.0860


Epoch 2:  44%|████▎     | 114/262 [05:36<07:09,  2.90s/it]

  Batch 114/262 - Batch Loss: 0.0977


Epoch 2:  44%|████▍     | 116/262 [05:43<07:18,  3.00s/it]

  Batch 116/262 - Batch Loss: 0.0962


Epoch 2:  45%|████▌     | 118/262 [05:48<07:03,  2.94s/it]

  Batch 118/262 - Batch Loss: 0.0916


Epoch 2:  46%|████▌     | 120/262 [05:55<07:08,  3.02s/it]

  Batch 120/262 - Batch Loss: 0.0968


Epoch 2:  47%|████▋     | 122/262 [06:00<06:54,  2.96s/it]

  Batch 122/262 - Batch Loss: 0.0917


Epoch 2:  47%|████▋     | 124/262 [06:07<06:53,  3.00s/it]

  Batch 124/262 - Batch Loss: 0.0970


Epoch 2:  48%|████▊     | 126/262 [06:12<06:38,  2.93s/it]

  Batch 126/262 - Batch Loss: 0.1041


Epoch 2:  49%|████▉     | 128/262 [06:18<06:33,  2.94s/it]

  Batch 128/262 - Batch Loss: 0.0892


Epoch 2:  50%|████▉     | 130/262 [06:24<06:25,  2.92s/it]

  Batch 130/262 - Batch Loss: 0.0944


Epoch 2:  50%|█████     | 132/262 [06:30<06:18,  2.92s/it]

  Batch 132/262 - Batch Loss: 0.0957


Epoch 2:  51%|█████     | 134/262 [06:35<06:07,  2.87s/it]

  Batch 134/262 - Batch Loss: 0.0989


Epoch 2:  52%|█████▏    | 136/262 [06:42<06:20,  3.02s/it]

  Batch 136/262 - Batch Loss: 0.0951


Epoch 2:  53%|█████▎    | 138/262 [06:47<06:05,  2.95s/it]

  Batch 138/262 - Batch Loss: 0.0987


Epoch 2:  53%|█████▎    | 140/262 [06:54<06:10,  3.04s/it]

  Batch 140/262 - Batch Loss: 0.1051


Epoch 2:  54%|█████▍    | 142/262 [06:59<05:52,  2.94s/it]

  Batch 142/262 - Batch Loss: 0.1110


Epoch 2:  55%|█████▍    | 144/262 [07:06<06:09,  3.13s/it]

  Batch 144/262 - Batch Loss: 0.0867


Epoch 2:  56%|█████▌    | 146/262 [07:12<05:46,  2.99s/it]

  Batch 146/262 - Batch Loss: 0.0913


Epoch 2:  56%|█████▋    | 148/262 [07:18<05:51,  3.08s/it]

  Batch 148/262 - Batch Loss: 0.0932


Epoch 2:  57%|█████▋    | 150/262 [07:24<05:35,  3.00s/it]

  Batch 150/262 - Batch Loss: 0.0975


Epoch 2:  58%|█████▊    | 152/262 [07:30<05:40,  3.09s/it]

  Batch 152/262 - Batch Loss: 0.1025


Epoch 2:  59%|█████▉    | 154/262 [07:36<05:22,  2.98s/it]

  Batch 154/262 - Batch Loss: 0.1001


Epoch 2:  60%|█████▉    | 156/262 [07:42<05:30,  3.12s/it]

  Batch 156/262 - Batch Loss: 0.0992


Epoch 2:  60%|██████    | 158/262 [07:48<05:12,  3.01s/it]

  Batch 158/262 - Batch Loss: 0.0901


Epoch 2:  61%|██████    | 160/262 [07:54<05:21,  3.15s/it]

  Batch 160/262 - Batch Loss: 0.0883


Epoch 2:  62%|██████▏   | 162/262 [08:00<05:02,  3.02s/it]

  Batch 162/262 - Batch Loss: 0.0951


Epoch 2:  63%|██████▎   | 164/262 [08:06<05:05,  3.11s/it]

  Batch 164/262 - Batch Loss: 0.0958


Epoch 2:  63%|██████▎   | 166/262 [08:12<04:43,  2.96s/it]

  Batch 166/262 - Batch Loss: 0.0903


Epoch 2:  64%|██████▍   | 168/262 [08:18<04:41,  3.00s/it]

  Batch 168/262 - Batch Loss: 0.0962


Epoch 2:  65%|██████▍   | 170/262 [08:24<04:31,  2.95s/it]

  Batch 170/262 - Batch Loss: 0.1031


Epoch 2:  66%|██████▌   | 172/262 [08:30<04:23,  2.93s/it]

  Batch 172/262 - Batch Loss: 0.0920


Epoch 2:  66%|██████▋   | 174/262 [08:35<04:14,  2.90s/it]

  Batch 174/262 - Batch Loss: 0.0892


Epoch 2:  67%|██████▋   | 176/262 [08:41<04:06,  2.87s/it]

  Batch 176/262 - Batch Loss: 0.1037


Epoch 2:  68%|██████▊   | 178/262 [08:47<04:08,  2.95s/it]

  Batch 178/262 - Batch Loss: 0.0999


Epoch 2:  69%|██████▊   | 180/262 [08:53<04:00,  2.93s/it]

  Batch 180/262 - Batch Loss: 0.0920


Epoch 2:  69%|██████▉   | 182/262 [08:59<03:59,  3.00s/it]

  Batch 182/262 - Batch Loss: 0.0888


Epoch 2:  70%|███████   | 184/262 [09:05<03:48,  2.93s/it]

  Batch 184/262 - Batch Loss: 0.0905


Epoch 2:  71%|███████   | 186/262 [09:11<03:48,  3.00s/it]

  Batch 186/262 - Batch Loss: 0.1000


Epoch 2:  72%|███████▏  | 188/262 [09:17<03:40,  2.98s/it]

  Batch 188/262 - Batch Loss: 0.0834


Epoch 2:  73%|███████▎  | 190/262 [09:23<03:39,  3.05s/it]

  Batch 190/262 - Batch Loss: 0.0930


Epoch 2:  73%|███████▎  | 192/262 [09:29<03:26,  2.95s/it]

  Batch 192/262 - Batch Loss: 0.0953


Epoch 2:  74%|███████▍  | 194/262 [09:35<03:28,  3.06s/it]

  Batch 194/262 - Batch Loss: 0.0819


Epoch 2:  75%|███████▍  | 196/262 [09:41<03:16,  2.98s/it]

  Batch 196/262 - Batch Loss: 0.1025


Epoch 2:  76%|███████▌  | 198/262 [09:48<03:15,  3.06s/it]

  Batch 198/262 - Batch Loss: 0.0905


Epoch 2:  76%|███████▋  | 200/262 [09:53<03:03,  2.97s/it]

  Batch 200/262 - Batch Loss: 0.0938


Epoch 2:  77%|███████▋  | 202/262 [10:00<03:04,  3.08s/it]

  Batch 202/262 - Batch Loss: 0.0934


Epoch 2:  78%|███████▊  | 204/262 [10:06<02:53,  2.99s/it]

  Batch 204/262 - Batch Loss: 0.0876


Epoch 2:  79%|███████▊  | 206/262 [10:12<02:51,  3.07s/it]

  Batch 206/262 - Batch Loss: 0.0837


Epoch 2:  79%|███████▉  | 208/262 [10:18<02:40,  2.97s/it]

  Batch 208/262 - Batch Loss: 0.0894


Epoch 2:  80%|████████  | 210/262 [10:24<02:40,  3.09s/it]

  Batch 210/262 - Batch Loss: 0.0946


Epoch 2:  81%|████████  | 212/262 [10:30<02:30,  3.01s/it]

  Batch 212/262 - Batch Loss: 0.0980


Epoch 2:  82%|████████▏ | 214/262 [10:37<02:28,  3.09s/it]

  Batch 214/262 - Batch Loss: 0.0978


Epoch 2:  82%|████████▏ | 216/262 [10:42<02:17,  2.98s/it]

  Batch 216/262 - Batch Loss: 0.0940


Epoch 2:  83%|████████▎ | 218/262 [10:49<02:13,  3.04s/it]

  Batch 218/262 - Batch Loss: 0.0952


Epoch 2:  84%|████████▍ | 220/262 [10:55<02:06,  3.01s/it]

  Batch 220/262 - Batch Loss: 0.0960


Epoch 2:  85%|████████▍ | 222/262 [11:01<02:02,  3.06s/it]

  Batch 222/262 - Batch Loss: 0.0921


Epoch 2:  85%|████████▌ | 224/262 [11:07<01:53,  2.98s/it]

  Batch 224/262 - Batch Loss: 0.0875


Epoch 2:  86%|████████▋ | 226/262 [11:13<01:50,  3.06s/it]

  Batch 226/262 - Batch Loss: 0.0883


Epoch 2:  87%|████████▋ | 228/262 [11:19<01:41,  2.98s/it]

  Batch 228/262 - Batch Loss: 0.0871


Epoch 2:  88%|████████▊ | 230/262 [11:25<01:37,  3.05s/it]

  Batch 230/262 - Batch Loss: 0.0958


Epoch 2:  89%|████████▊ | 232/262 [11:31<01:29,  2.99s/it]

  Batch 232/262 - Batch Loss: 0.0958


Epoch 2:  89%|████████▉ | 234/262 [11:38<01:25,  3.06s/it]

  Batch 234/262 - Batch Loss: 0.0931


Epoch 2:  90%|█████████ | 236/262 [11:43<01:16,  2.96s/it]

  Batch 236/262 - Batch Loss: 0.0947


Epoch 2:  91%|█████████ | 238/262 [11:49<01:11,  2.99s/it]

  Batch 238/262 - Batch Loss: 0.0932


Epoch 2:  92%|█████████▏| 240/262 [11:55<01:05,  2.97s/it]

  Batch 240/262 - Batch Loss: 0.0949


Epoch 2:  92%|█████████▏| 242/262 [12:01<00:59,  3.00s/it]

  Batch 242/262 - Batch Loss: 0.0959


Epoch 2:  93%|█████████▎| 244/262 [12:07<00:53,  2.95s/it]

  Batch 244/262 - Batch Loss: 0.0996


Epoch 2:  94%|█████████▍| 246/262 [12:13<00:47,  2.97s/it]

  Batch 246/262 - Batch Loss: 0.0896


Epoch 2:  95%|█████████▍| 248/262 [12:19<00:40,  2.93s/it]

  Batch 248/262 - Batch Loss: 0.0942


Epoch 2:  95%|█████████▌| 250/262 [12:25<00:35,  2.98s/it]

  Batch 250/262 - Batch Loss: 0.0835


Epoch 2:  96%|█████████▌| 252/262 [12:31<00:29,  2.95s/it]

  Batch 252/262 - Batch Loss: 0.0893


Epoch 2:  97%|█████████▋| 254/262 [12:37<00:23,  3.00s/it]

  Batch 254/262 - Batch Loss: 0.0886


Epoch 2:  98%|█████████▊| 256/262 [12:43<00:17,  2.96s/it]

  Batch 256/262 - Batch Loss: 0.0891


Epoch 2:  98%|█████████▊| 258/262 [12:49<00:12,  3.07s/it]

  Batch 258/262 - Batch Loss: 0.0805


Epoch 2:  99%|█████████▉| 260/262 [12:55<00:06,  3.01s/it]

  Batch 260/262 - Batch Loss: 0.0934


Epoch 2: 100%|██████████| 262/262 [12:59<00:00,  2.98s/it]

  Batch 262/262 - Batch Loss: 0.0954
Epoch 2/10 - Loss: 0.0949 - Time: 779.6s



Epoch 3:   1%|          | 2/262 [00:05<12:07,  2.80s/it]

  Batch 2/262 - Batch Loss: 0.1064


Epoch 3:   2%|▏         | 4/262 [00:11<12:13,  2.84s/it]

  Batch 4/262 - Batch Loss: 0.0879


Epoch 3:   2%|▏         | 6/262 [00:17<12:18,  2.89s/it]

  Batch 6/262 - Batch Loss: 0.0975


Epoch 3:   3%|▎         | 8/262 [00:22<12:04,  2.85s/it]

  Batch 8/262 - Batch Loss: 0.0969


Epoch 3:   4%|▍         | 10/262 [00:29<12:31,  2.98s/it]

  Batch 10/262 - Batch Loss: 0.0840


Epoch 3:   5%|▍         | 12/262 [00:34<12:09,  2.92s/it]

  Batch 12/262 - Batch Loss: 0.0926


Epoch 3:   5%|▌         | 14/262 [00:41<12:22,  2.99s/it]

  Batch 14/262 - Batch Loss: 0.0973


Epoch 3:   6%|▌         | 16/262 [00:46<12:09,  2.97s/it]

  Batch 16/262 - Batch Loss: 0.0821


Epoch 3:   7%|▋         | 18/262 [00:53<12:24,  3.05s/it]

  Batch 18/262 - Batch Loss: 0.0833


Epoch 3:   8%|▊         | 20/262 [00:59<11:55,  2.96s/it]

  Batch 20/262 - Batch Loss: 0.0913


Epoch 3:   8%|▊         | 22/262 [01:05<12:17,  3.07s/it]

  Batch 22/262 - Batch Loss: 0.0890


Epoch 3:   9%|▉         | 24/262 [01:11<11:47,  2.97s/it]

  Batch 24/262 - Batch Loss: 0.0965


Epoch 3:  10%|▉         | 26/262 [01:17<12:03,  3.07s/it]

  Batch 26/262 - Batch Loss: 0.0956


Epoch 3:  11%|█         | 28/262 [01:23<11:31,  2.95s/it]

  Batch 28/262 - Batch Loss: 0.1067


Epoch 3:  11%|█▏        | 30/262 [01:29<11:35,  3.00s/it]

  Batch 30/262 - Batch Loss: 0.1011


Epoch 3:  12%|█▏        | 32/262 [01:35<11:12,  2.92s/it]

  Batch 32/262 - Batch Loss: 0.0933


Epoch 3:  13%|█▎        | 34/262 [01:41<11:07,  2.93s/it]

  Batch 34/262 - Batch Loss: 0.0897


Epoch 3:  14%|█▎        | 36/262 [01:46<10:53,  2.89s/it]

  Batch 36/262 - Batch Loss: 0.0861


Epoch 3:  15%|█▍        | 38/262 [01:52<10:55,  2.93s/it]

  Batch 38/262 - Batch Loss: 0.0870


Epoch 3:  15%|█▌        | 40/262 [01:58<10:45,  2.91s/it]

  Batch 40/262 - Batch Loss: 0.0901


Epoch 3:  16%|█▌        | 42/262 [02:04<10:57,  2.99s/it]

  Batch 42/262 - Batch Loss: 0.0893


Epoch 3:  17%|█▋        | 44/262 [02:10<10:39,  2.93s/it]

  Batch 44/262 - Batch Loss: 0.0910


Epoch 3:  18%|█▊        | 46/262 [02:16<10:58,  3.05s/it]

  Batch 46/262 - Batch Loss: 0.0849


Epoch 3:  18%|█▊        | 48/262 [02:22<10:43,  3.01s/it]

  Batch 48/262 - Batch Loss: 0.0874


Epoch 3:  19%|█▉        | 50/262 [02:28<10:57,  3.10s/it]

  Batch 50/262 - Batch Loss: 0.0943


Epoch 3:  20%|█▉        | 52/262 [02:34<10:27,  2.99s/it]

  Batch 52/262 - Batch Loss: 0.0921


Epoch 3:  21%|██        | 54/262 [02:40<10:43,  3.09s/it]

  Batch 54/262 - Batch Loss: 0.0847


Epoch 3:  21%|██▏       | 56/262 [02:46<10:18,  3.00s/it]

  Batch 56/262 - Batch Loss: 0.0930


Epoch 3:  22%|██▏       | 58/262 [02:53<10:39,  3.14s/it]

  Batch 58/262 - Batch Loss: 0.0964


Epoch 3:  23%|██▎       | 60/262 [02:58<10:04,  2.99s/it]

  Batch 60/262 - Batch Loss: 0.1017


Epoch 3:  24%|██▎       | 62/262 [03:05<10:18,  3.09s/it]

  Batch 62/262 - Batch Loss: 0.0944


Epoch 3:  24%|██▍       | 64/262 [03:10<09:49,  2.98s/it]

  Batch 64/262 - Batch Loss: 0.0919


Epoch 3:  25%|██▌       | 66/262 [03:17<10:12,  3.12s/it]

  Batch 66/262 - Batch Loss: 0.0865


Epoch 3:  26%|██▌       | 68/262 [03:23<09:40,  2.99s/it]

  Batch 68/262 - Batch Loss: 0.0887


Epoch 3:  27%|██▋       | 70/262 [03:29<09:41,  3.03s/it]

  Batch 70/262 - Batch Loss: 0.0975


Epoch 3:  27%|██▋       | 72/262 [03:34<09:19,  2.95s/it]

  Batch 72/262 - Batch Loss: 0.0917


Epoch 3:  28%|██▊       | 74/262 [03:40<09:15,  2.96s/it]

  Batch 74/262 - Batch Loss: 0.1004


Epoch 3:  29%|██▉       | 76/262 [03:46<08:58,  2.90s/it]

  Batch 76/262 - Batch Loss: 0.0898


Epoch 3:  30%|██▉       | 78/262 [03:52<08:48,  2.87s/it]

  Batch 78/262 - Batch Loss: 0.0920


Epoch 3:  31%|███       | 80/262 [03:58<08:58,  2.96s/it]

  Batch 80/262 - Batch Loss: 0.0851


Epoch 3:  31%|███▏      | 82/262 [04:04<08:47,  2.93s/it]

  Batch 82/262 - Batch Loss: 0.0877


Epoch 3:  32%|███▏      | 84/262 [04:10<08:48,  2.97s/it]

  Batch 84/262 - Batch Loss: 0.0947


Epoch 3:  33%|███▎      | 86/262 [04:15<08:34,  2.92s/it]

  Batch 86/262 - Batch Loss: 0.0980


Epoch 3:  34%|███▎      | 88/262 [04:22<08:42,  3.00s/it]

  Batch 88/262 - Batch Loss: 0.0995


Epoch 3:  34%|███▍      | 90/262 [04:27<08:30,  2.97s/it]

  Batch 90/262 - Batch Loss: 0.0898


Epoch 3:  35%|███▌      | 92/262 [04:34<08:35,  3.03s/it]

  Batch 92/262 - Batch Loss: 0.0951


Epoch 3:  36%|███▌      | 94/262 [04:40<08:16,  2.95s/it]

  Batch 94/262 - Batch Loss: 0.0927


Epoch 3:  37%|███▋      | 96/262 [04:46<08:25,  3.05s/it]

  Batch 96/262 - Batch Loss: 0.0889


Epoch 3:  37%|███▋      | 98/262 [04:52<08:07,  2.97s/it]

  Batch 98/262 - Batch Loss: 0.0808


Epoch 3:  38%|███▊      | 100/262 [04:58<08:12,  3.04s/it]

  Batch 100/262 - Batch Loss: 0.0981


Epoch 3:  39%|███▉      | 102/262 [05:04<07:56,  2.98s/it]

  Batch 102/262 - Batch Loss: 0.0833


Epoch 3:  40%|███▉      | 104/262 [05:10<08:06,  3.08s/it]

  Batch 104/262 - Batch Loss: 0.0905


Epoch 3:  40%|████      | 106/262 [05:16<07:42,  2.96s/it]

  Batch 106/262 - Batch Loss: 0.0835


Epoch 3:  41%|████      | 108/262 [05:22<07:41,  3.00s/it]

  Batch 108/262 - Batch Loss: 0.0870


Epoch 3:  42%|████▏     | 110/262 [05:28<07:23,  2.92s/it]

  Batch 110/262 - Batch Loss: 0.0902


Epoch 3:  43%|████▎     | 112/262 [05:34<07:29,  2.99s/it]

  Batch 112/262 - Batch Loss: 0.0927


Epoch 3:  44%|████▎     | 114/262 [05:40<07:13,  2.93s/it]

  Batch 114/262 - Batch Loss: 0.0976


Epoch 3:  44%|████▍     | 116/262 [05:46<07:09,  2.94s/it]

  Batch 116/262 - Batch Loss: 0.0907


Epoch 3:  45%|████▌     | 118/262 [05:51<06:58,  2.90s/it]

  Batch 118/262 - Batch Loss: 0.0923


Epoch 3:  46%|████▌     | 120/262 [05:57<07:00,  2.96s/it]

  Batch 120/262 - Batch Loss: 0.1024


Epoch 3:  47%|████▋     | 122/262 [06:03<06:47,  2.91s/it]

  Batch 122/262 - Batch Loss: 0.0938


Epoch 3:  47%|████▋     | 124/262 [06:09<06:56,  3.02s/it]

  Batch 124/262 - Batch Loss: 0.0848


Epoch 3:  48%|████▊     | 126/262 [06:15<06:41,  2.95s/it]

  Batch 126/262 - Batch Loss: 0.0828


Epoch 3:  49%|████▉     | 128/262 [06:21<06:52,  3.08s/it]

  Batch 128/262 - Batch Loss: 0.0875


Epoch 3:  50%|████▉     | 130/262 [06:27<06:30,  2.95s/it]

  Batch 130/262 - Batch Loss: 0.0843


Epoch 3:  50%|█████     | 132/262 [06:33<06:37,  3.06s/it]

  Batch 132/262 - Batch Loss: 0.0967


Epoch 3:  51%|█████     | 134/262 [06:39<06:24,  3.00s/it]

  Batch 134/262 - Batch Loss: 0.0823


Epoch 3:  52%|█████▏    | 136/262 [06:46<06:31,  3.11s/it]

  Batch 136/262 - Batch Loss: 0.0879


Epoch 3:  53%|█████▎    | 138/262 [06:51<06:10,  2.99s/it]

  Batch 138/262 - Batch Loss: 0.0899


Epoch 3:  53%|█████▎    | 140/262 [06:58<06:19,  3.11s/it]

  Batch 140/262 - Batch Loss: 0.0881


Epoch 3:  54%|█████▍    | 142/262 [07:03<05:57,  2.98s/it]

  Batch 142/262 - Batch Loss: 0.0948


Epoch 3:  55%|█████▍    | 144/262 [07:09<05:59,  3.05s/it]

  Batch 144/262 - Batch Loss: 0.0804


Epoch 3:  56%|█████▌    | 146/262 [07:15<05:45,  2.98s/it]

  Batch 146/262 - Batch Loss: 0.0844


Epoch 3:  56%|█████▋    | 148/262 [07:21<05:40,  2.99s/it]

  Batch 148/262 - Batch Loss: 0.0952


Epoch 3:  57%|█████▋    | 150/262 [07:27<05:27,  2.93s/it]

  Batch 150/262 - Batch Loss: 0.0858


Epoch 3:  58%|█████▊    | 152/262 [07:33<05:18,  2.90s/it]

  Batch 152/262 - Batch Loss: 0.0828


Epoch 3:  59%|█████▉    | 154/262 [07:39<05:17,  2.94s/it]

  Batch 154/262 - Batch Loss: 0.0854


Epoch 3:  60%|█████▉    | 156/262 [07:44<05:06,  2.89s/it]

  Batch 156/262 - Batch Loss: 0.0883


Epoch 3:  60%|██████    | 158/262 [07:50<05:06,  2.94s/it]

  Batch 158/262 - Batch Loss: 0.0891


Epoch 3:  61%|██████    | 160/262 [07:56<04:59,  2.94s/it]

  Batch 160/262 - Batch Loss: 0.0829


Epoch 3:  62%|██████▏   | 162/262 [08:02<04:59,  3.00s/it]

  Batch 162/262 - Batch Loss: 0.0897


Epoch 3:  63%|██████▎   | 164/262 [08:08<04:47,  2.93s/it]

  Batch 164/262 - Batch Loss: 0.0861


Epoch 3:  63%|██████▎   | 166/262 [08:15<04:52,  3.05s/it]

  Batch 166/262 - Batch Loss: 0.0959


Epoch 3:  64%|██████▍   | 168/262 [08:20<04:35,  2.93s/it]

  Batch 168/262 - Batch Loss: 0.0886


Epoch 3:  65%|██████▍   | 170/262 [08:26<04:35,  2.99s/it]

  Batch 170/262 - Batch Loss: 0.0901


Epoch 3:  66%|██████▌   | 172/262 [08:32<04:25,  2.95s/it]

  Batch 172/262 - Batch Loss: 0.0877


Epoch 3:  66%|██████▋   | 174/262 [08:39<04:27,  3.04s/it]

  Batch 174/262 - Batch Loss: 0.0903


Epoch 3:  67%|██████▋   | 176/262 [08:44<04:14,  2.96s/it]

  Batch 176/262 - Batch Loss: 0.0935


Epoch 3:  68%|██████▊   | 178/262 [08:51<04:14,  3.03s/it]

  Batch 178/262 - Batch Loss: 0.0923


Epoch 3:  69%|██████▊   | 180/262 [08:56<04:00,  2.93s/it]

  Batch 180/262 - Batch Loss: 0.0859


Epoch 3:  69%|██████▉   | 182/262 [09:02<03:55,  2.94s/it]

  Batch 182/262 - Batch Loss: 0.0996


Epoch 3:  70%|███████   | 184/262 [09:08<03:46,  2.91s/it]

  Batch 184/262 - Batch Loss: 0.0898


Epoch 3:  71%|███████   | 186/262 [09:14<03:41,  2.91s/it]

  Batch 186/262 - Batch Loss: 0.0977


Epoch 3:  72%|███████▏  | 188/262 [09:20<03:32,  2.88s/it]

  Batch 188/262 - Batch Loss: 0.0870


Epoch 3:  73%|███████▎  | 190/262 [09:26<03:36,  3.01s/it]

  Batch 190/262 - Batch Loss: 0.0945


Epoch 3:  73%|███████▎  | 192/262 [09:31<03:24,  2.92s/it]

  Batch 192/262 - Batch Loss: 0.0912


Epoch 3:  74%|███████▍  | 194/262 [09:38<03:26,  3.03s/it]

  Batch 194/262 - Batch Loss: 0.0977


Epoch 3:  75%|███████▍  | 196/262 [09:43<03:13,  2.93s/it]

  Batch 196/262 - Batch Loss: 0.0987


Epoch 3:  76%|███████▌  | 198/262 [09:50<03:17,  3.09s/it]

  Batch 198/262 - Batch Loss: 0.0983


Epoch 3:  76%|███████▋  | 200/262 [09:55<03:05,  2.99s/it]

  Batch 200/262 - Batch Loss: 0.0844


Epoch 3:  77%|███████▋  | 202/262 [10:02<03:05,  3.10s/it]

  Batch 202/262 - Batch Loss: 0.0903


Epoch 3:  78%|███████▊  | 204/262 [10:08<02:55,  3.02s/it]

  Batch 204/262 - Batch Loss: 0.0846


Epoch 3:  79%|███████▊  | 206/262 [10:14<02:53,  3.09s/it]

  Batch 206/262 - Batch Loss: 0.1026


Epoch 3:  79%|███████▉  | 208/262 [10:20<02:42,  3.01s/it]

  Batch 208/262 - Batch Loss: 0.0874


Epoch 3:  80%|████████  | 210/262 [10:26<02:42,  3.12s/it]

  Batch 210/262 - Batch Loss: 0.0784


Epoch 3:  81%|████████  | 212/262 [10:32<02:29,  2.99s/it]

  Batch 212/262 - Batch Loss: 0.0875


Epoch 3:  82%|████████▏ | 214/262 [10:38<02:28,  3.10s/it]

  Batch 214/262 - Batch Loss: 0.0913


Epoch 3:  82%|████████▏ | 216/262 [10:44<02:16,  2.97s/it]

  Batch 216/262 - Batch Loss: 0.0882


Epoch 3:  83%|████████▎ | 218/262 [10:50<02:13,  3.04s/it]

  Batch 218/262 - Batch Loss: 0.1045


Epoch 3:  84%|████████▍ | 220/262 [10:56<02:02,  2.92s/it]

  Batch 220/262 - Batch Loss: 0.0897


Epoch 3:  85%|████████▍ | 222/262 [11:02<01:58,  2.97s/it]

  Batch 222/262 - Batch Loss: 0.0931


Epoch 3:  85%|████████▌ | 224/262 [11:07<01:51,  2.92s/it]

  Batch 224/262 - Batch Loss: 0.0838


Epoch 3:  86%|████████▋ | 226/262 [11:13<01:43,  2.87s/it]

  Batch 226/262 - Batch Loss: 0.0925


Epoch 3:  87%|████████▋ | 228/262 [11:19<01:38,  2.91s/it]

  Batch 228/262 - Batch Loss: 0.0869


Epoch 3:  88%|████████▊ | 230/262 [11:25<01:32,  2.89s/it]

  Batch 230/262 - Batch Loss: 0.0901


Epoch 3:  89%|████████▊ | 232/262 [11:31<01:28,  2.94s/it]

  Batch 232/262 - Batch Loss: 0.0911


Epoch 3:  89%|████████▉ | 234/262 [11:36<01:21,  2.91s/it]

  Batch 234/262 - Batch Loss: 0.0831


Epoch 3:  90%|█████████ | 236/262 [11:43<01:17,  3.00s/it]

  Batch 236/262 - Batch Loss: 0.0869


Epoch 3:  91%|█████████ | 238/262 [11:48<01:09,  2.91s/it]

  Batch 238/262 - Batch Loss: 0.0950


Epoch 3:  92%|█████████▏| 240/262 [11:55<01:05,  2.98s/it]

  Batch 240/262 - Batch Loss: 0.0886


Epoch 3:  92%|█████████▏| 242/262 [12:00<00:58,  2.91s/it]

  Batch 242/262 - Batch Loss: 0.0888


Epoch 3:  93%|█████████▎| 244/262 [12:07<00:54,  3.00s/it]

  Batch 244/262 - Batch Loss: 0.0892


Epoch 3:  94%|█████████▍| 246/262 [12:12<00:46,  2.93s/it]

  Batch 246/262 - Batch Loss: 0.0920


Epoch 3:  95%|█████████▍| 248/262 [12:18<00:42,  3.00s/it]

  Batch 248/262 - Batch Loss: 0.0827


Epoch 3:  95%|█████████▌| 250/262 [12:24<00:34,  2.91s/it]

  Batch 250/262 - Batch Loss: 0.0959


Epoch 3:  96%|█████████▌| 252/262 [12:30<00:29,  2.91s/it]

  Batch 252/262 - Batch Loss: 0.0863


Epoch 3:  97%|█████████▋| 254/262 [12:36<00:22,  2.87s/it]

  Batch 254/262 - Batch Loss: 0.0898


Epoch 3:  98%|█████████▊| 256/262 [12:41<00:17,  2.90s/it]

  Batch 256/262 - Batch Loss: 0.0913


Epoch 3:  98%|█████████▊| 258/262 [12:47<00:11,  2.89s/it]

  Batch 258/262 - Batch Loss: 0.0818


Epoch 3:  99%|█████████▉| 260/262 [12:53<00:06,  3.01s/it]

  Batch 260/262 - Batch Loss: 0.1009


Epoch 3: 100%|██████████| 262/262 [12:57<00:00,  2.97s/it]

  Batch 262/262 - Batch Loss: 0.1167
Epoch 3/10 - Loss: 0.0914 - Time: 777.6s



Epoch 4:   1%|          | 2/262 [00:05<12:24,  2.87s/it]

  Batch 2/262 - Batch Loss: 0.0836


Epoch 4:   2%|▏         | 4/262 [00:11<12:26,  2.90s/it]

  Batch 4/262 - Batch Loss: 0.0913


Epoch 4:   2%|▏         | 6/262 [00:17<12:13,  2.87s/it]

  Batch 6/262 - Batch Loss: 0.0850


Epoch 4:   3%|▎         | 8/262 [00:23<12:27,  2.94s/it]

  Batch 8/262 - Batch Loss: 0.0825


Epoch 4:   4%|▍         | 10/262 [00:29<12:14,  2.91s/it]

  Batch 10/262 - Batch Loss: 0.0947


Epoch 4:   5%|▍         | 12/262 [00:35<12:27,  2.99s/it]

  Batch 12/262 - Batch Loss: 0.0972


Epoch 4:   5%|▌         | 14/262 [00:40<11:57,  2.89s/it]

  Batch 14/262 - Batch Loss: 0.0895


Epoch 4:   6%|▌         | 16/262 [00:47<12:10,  2.97s/it]

  Batch 16/262 - Batch Loss: 0.0885


Epoch 4:   7%|▋         | 18/262 [00:52<11:56,  2.94s/it]

  Batch 18/262 - Batch Loss: 0.0893


Epoch 4:   8%|▊         | 20/262 [00:59<12:11,  3.02s/it]

  Batch 20/262 - Batch Loss: 0.0865


Epoch 4:   8%|▊         | 22/262 [01:04<11:43,  2.93s/it]

  Batch 22/262 - Batch Loss: 0.0960


Epoch 4:   9%|▉         | 24/262 [01:10<11:47,  2.97s/it]

  Batch 24/262 - Batch Loss: 0.0775


Epoch 4:  10%|▉         | 26/262 [01:16<11:25,  2.91s/it]

  Batch 26/262 - Batch Loss: 0.0815


Epoch 4:  11%|█         | 28/262 [01:22<11:18,  2.90s/it]

  Batch 28/262 - Batch Loss: 0.0781


Epoch 4:  11%|█▏        | 30/262 [01:28<11:09,  2.89s/it]

  Batch 30/262 - Batch Loss: 0.0841


Epoch 4:  12%|█▏        | 32/262 [01:34<11:17,  2.95s/it]

  Batch 32/262 - Batch Loss: 0.0922


Epoch 4:  13%|█▎        | 34/262 [01:39<11:03,  2.91s/it]

  Batch 34/262 - Batch Loss: 0.1027


Epoch 4:  14%|█▎        | 36/262 [01:46<11:22,  3.02s/it]

  Batch 36/262 - Batch Loss: 0.0873


Epoch 4:  15%|█▍        | 38/262 [01:51<11:00,  2.95s/it]

  Batch 38/262 - Batch Loss: 0.0793


Epoch 4:  15%|█▌        | 40/262 [01:58<11:25,  3.09s/it]

  Batch 40/262 - Batch Loss: 0.0796


Epoch 4:  16%|█▌        | 42/262 [02:03<10:56,  2.98s/it]

  Batch 42/262 - Batch Loss: 0.0813


Epoch 4:  17%|█▋        | 44/262 [02:10<11:08,  3.06s/it]

  Batch 44/262 - Batch Loss: 0.0914


Epoch 4:  18%|█▊        | 46/262 [02:15<10:43,  2.98s/it]

  Batch 46/262 - Batch Loss: 0.0830


Epoch 4:  18%|█▊        | 48/262 [02:22<10:58,  3.08s/it]

  Batch 48/262 - Batch Loss: 0.0870


Epoch 4:  19%|█▉        | 50/262 [02:27<10:25,  2.95s/it]

  Batch 50/262 - Batch Loss: 0.0917


Epoch 4:  20%|█▉        | 52/262 [02:33<10:25,  2.98s/it]

  Batch 52/262 - Batch Loss: 0.1011


Epoch 4:  21%|██        | 54/262 [02:39<10:05,  2.91s/it]

  Batch 54/262 - Batch Loss: 0.0846


Epoch 4:  21%|██▏       | 56/262 [02:45<10:05,  2.94s/it]

  Batch 56/262 - Batch Loss: 0.0885


Epoch 4:  22%|██▏       | 58/262 [02:51<09:54,  2.92s/it]

  Batch 58/262 - Batch Loss: 0.0921


Epoch 4:  23%|██▎       | 60/262 [02:56<09:42,  2.89s/it]

  Batch 60/262 - Batch Loss: 0.0841


Epoch 4:  24%|██▎       | 62/262 [03:02<09:43,  2.92s/it]

  Batch 62/262 - Batch Loss: 0.0983


Epoch 4:  24%|██▍       | 64/262 [03:08<09:31,  2.89s/it]

  Batch 64/262 - Batch Loss: 0.0877


Epoch 4:  25%|██▌       | 66/262 [03:14<09:45,  2.99s/it]

  Batch 66/262 - Batch Loss: 0.0944


Epoch 4:  26%|██▌       | 68/262 [03:20<09:26,  2.92s/it]

  Batch 68/262 - Batch Loss: 0.0856


Epoch 4:  27%|██▋       | 70/262 [03:26<09:43,  3.04s/it]

  Batch 70/262 - Batch Loss: 0.0958


Epoch 4:  27%|██▋       | 72/262 [03:32<09:17,  2.93s/it]

  Batch 72/262 - Batch Loss: 0.0811


Epoch 4:  28%|██▊       | 74/262 [03:38<09:27,  3.02s/it]

  Batch 74/262 - Batch Loss: 0.0851


Epoch 4:  29%|██▉       | 76/262 [03:44<09:07,  2.94s/it]

  Batch 76/262 - Batch Loss: 0.0852


Epoch 4:  30%|██▉       | 78/262 [03:50<09:11,  3.00s/it]

  Batch 78/262 - Batch Loss: 0.0944


Epoch 4:  31%|███       | 80/262 [03:56<08:56,  2.95s/it]

  Batch 80/262 - Batch Loss: 0.0866


Epoch 4:  31%|███▏      | 82/262 [04:02<08:55,  2.98s/it]

  Batch 82/262 - Batch Loss: 0.0809


Epoch 4:  32%|███▏      | 84/262 [04:08<08:39,  2.92s/it]

  Batch 84/262 - Batch Loss: 0.0825


Epoch 4:  33%|███▎      | 86/262 [04:14<08:37,  2.94s/it]

  Batch 86/262 - Batch Loss: 0.0780


Epoch 4:  34%|███▎      | 88/262 [04:20<08:21,  2.88s/it]

  Batch 88/262 - Batch Loss: 0.0857


Epoch 4:  34%|███▍      | 90/262 [04:26<08:27,  2.95s/it]

  Batch 90/262 - Batch Loss: 0.0934


Epoch 4:  35%|███▌      | 92/262 [04:31<08:15,  2.92s/it]

  Batch 92/262 - Batch Loss: 0.0857


Epoch 4:  36%|███▌      | 94/262 [04:37<08:26,  3.02s/it]

  Batch 94/262 - Batch Loss: 0.0870


Epoch 4:  37%|███▋      | 96/262 [04:43<08:05,  2.92s/it]

  Batch 96/262 - Batch Loss: 0.0854


Epoch 4:  37%|███▋      | 98/262 [04:49<08:24,  3.07s/it]

  Batch 98/262 - Batch Loss: 0.0859


Epoch 4:  38%|███▊      | 100/262 [04:55<08:00,  2.96s/it]

  Batch 100/262 - Batch Loss: 0.0826


Epoch 4:  39%|███▉      | 102/262 [05:02<08:14,  3.09s/it]

  Batch 102/262 - Batch Loss: 0.0863


Epoch 4:  40%|███▉      | 104/262 [05:07<07:48,  2.96s/it]

  Batch 104/262 - Batch Loss: 0.0991


Epoch 4:  40%|████      | 106/262 [05:14<08:04,  3.11s/it]

  Batch 106/262 - Batch Loss: 0.0904


Epoch 4:  41%|████      | 108/262 [05:19<07:39,  2.99s/it]

  Batch 108/262 - Batch Loss: 0.0871


Epoch 4:  42%|████▏     | 110/262 [05:25<07:44,  3.05s/it]

  Batch 110/262 - Batch Loss: 0.0788


Epoch 4:  43%|████▎     | 112/262 [05:31<07:20,  2.94s/it]

  Batch 112/262 - Batch Loss: 0.0863


Epoch 4:  44%|████▎     | 114/262 [05:37<07:17,  2.95s/it]

  Batch 114/262 - Batch Loss: 0.1030


Epoch 4:  44%|████▍     | 116/262 [05:43<07:01,  2.89s/it]

  Batch 116/262 - Batch Loss: 0.0921


Epoch 4:  45%|████▌     | 118/262 [05:48<06:51,  2.86s/it]

  Batch 118/262 - Batch Loss: 0.0853


Epoch 4:  46%|████▌     | 120/262 [05:54<06:55,  2.93s/it]

  Batch 120/262 - Batch Loss: 0.0946


Epoch 4:  47%|████▋     | 122/262 [06:00<06:40,  2.86s/it]

  Batch 122/262 - Batch Loss: 0.0991


Epoch 4:  47%|████▋     | 124/262 [06:06<06:46,  2.95s/it]

  Batch 124/262 - Batch Loss: 0.0875


Epoch 4:  48%|████▊     | 126/262 [06:12<06:33,  2.89s/it]

  Batch 126/262 - Batch Loss: 0.0828


Epoch 4:  49%|████▉     | 128/262 [06:18<06:39,  2.98s/it]

  Batch 128/262 - Batch Loss: 0.0905


Epoch 4:  50%|████▉     | 130/262 [06:24<06:25,  2.92s/it]

  Batch 130/262 - Batch Loss: 0.0917


Epoch 4:  50%|█████     | 132/262 [06:30<06:38,  3.07s/it]

  Batch 132/262 - Batch Loss: 0.0884


Epoch 4:  51%|█████     | 134/262 [06:36<06:20,  2.97s/it]

  Batch 134/262 - Batch Loss: 0.0962


Epoch 4:  52%|█████▏    | 136/262 [06:43<06:26,  3.07s/it]

  Batch 136/262 - Batch Loss: 0.0899


Epoch 4:  53%|█████▎    | 138/262 [06:48<06:08,  2.97s/it]

  Batch 138/262 - Batch Loss: 0.0905


Epoch 4:  53%|█████▎    | 140/262 [06:54<06:06,  3.01s/it]

  Batch 140/262 - Batch Loss: 0.0879


Epoch 4:  54%|█████▍    | 142/262 [07:00<05:56,  2.97s/it]

  Batch 142/262 - Batch Loss: 0.0783


Epoch 4:  55%|█████▍    | 144/262 [07:06<05:55,  3.01s/it]

  Batch 144/262 - Batch Loss: 0.0822


Epoch 4:  56%|█████▌    | 146/262 [07:12<05:42,  2.95s/it]

  Batch 146/262 - Batch Loss: 0.0887


Epoch 4:  56%|█████▋    | 148/262 [07:18<05:34,  2.94s/it]

  Batch 148/262 - Batch Loss: 0.0881


Epoch 4:  57%|█████▋    | 150/262 [07:24<05:25,  2.91s/it]

  Batch 150/262 - Batch Loss: 0.0806


Epoch 4:  58%|█████▊    | 152/262 [07:30<05:26,  2.97s/it]

  Batch 152/262 - Batch Loss: 0.0823


Epoch 4:  59%|█████▉    | 154/262 [07:36<05:20,  2.97s/it]

  Batch 154/262 - Batch Loss: 0.0815


Epoch 4:  60%|█████▉    | 156/262 [07:42<05:25,  3.07s/it]

  Batch 156/262 - Batch Loss: 0.0864


Epoch 4:  60%|██████    | 158/262 [07:48<05:10,  2.99s/it]

  Batch 158/262 - Batch Loss: 0.0905


Epoch 4:  61%|██████    | 160/262 [07:54<05:15,  3.10s/it]

  Batch 160/262 - Batch Loss: 0.0829


Epoch 4:  62%|██████▏   | 162/262 [08:00<05:02,  3.03s/it]

  Batch 162/262 - Batch Loss: 0.0827


Epoch 4:  63%|██████▎   | 164/262 [08:06<05:03,  3.10s/it]

  Batch 164/262 - Batch Loss: 0.0891


Epoch 4:  63%|██████▎   | 166/262 [08:12<04:45,  2.98s/it]

  Batch 166/262 - Batch Loss: 0.0915


Epoch 4:  64%|██████▍   | 168/262 [08:18<04:48,  3.07s/it]

  Batch 168/262 - Batch Loss: 0.0873


Epoch 4:  65%|██████▍   | 170/262 [08:24<04:32,  2.96s/it]

  Batch 170/262 - Batch Loss: 0.0932


Epoch 4:  66%|██████▌   | 172/262 [08:30<04:38,  3.10s/it]

  Batch 172/262 - Batch Loss: 0.0872


Epoch 4:  66%|██████▋   | 174/262 [08:36<04:23,  2.99s/it]

  Batch 174/262 - Batch Loss: 0.0848


Epoch 4:  67%|██████▋   | 176/262 [08:42<04:27,  3.11s/it]

  Batch 176/262 - Batch Loss: 0.0948


Epoch 4:  68%|██████▊   | 178/262 [08:48<04:10,  2.98s/it]

  Batch 178/262 - Batch Loss: 0.0897


Epoch 4:  69%|██████▊   | 180/262 [08:55<04:14,  3.10s/it]

  Batch 180/262 - Batch Loss: 0.0863


Epoch 4:  69%|██████▉   | 182/262 [09:00<03:59,  3.00s/it]

  Batch 182/262 - Batch Loss: 0.0765


Epoch 4:  70%|███████   | 184/262 [09:07<03:59,  3.07s/it]

  Batch 184/262 - Batch Loss: 0.0778


Epoch 4:  71%|███████   | 186/262 [09:12<03:47,  2.99s/it]

  Batch 186/262 - Batch Loss: 0.0840


Epoch 4:  72%|███████▏  | 188/262 [09:18<03:41,  2.99s/it]

  Batch 188/262 - Batch Loss: 0.0868


Epoch 4:  73%|███████▎  | 190/262 [09:24<03:30,  2.93s/it]

  Batch 190/262 - Batch Loss: 0.0882


Epoch 4:  73%|███████▎  | 192/262 [09:30<03:22,  2.90s/it]

  Batch 192/262 - Batch Loss: 0.0884


Epoch 4:  74%|███████▍  | 194/262 [09:36<03:19,  2.93s/it]

  Batch 194/262 - Batch Loss: 0.0904


Epoch 4:  75%|███████▍  | 196/262 [09:41<03:10,  2.89s/it]

  Batch 196/262 - Batch Loss: 0.0889


Epoch 4:  76%|███████▌  | 198/262 [09:48<03:09,  2.95s/it]

  Batch 198/262 - Batch Loss: 0.0815


Epoch 4:  76%|███████▋  | 200/262 [09:53<03:00,  2.91s/it]

  Batch 200/262 - Batch Loss: 0.0864


Epoch 4:  77%|███████▋  | 202/262 [10:00<03:00,  3.01s/it]

  Batch 202/262 - Batch Loss: 0.0883


Epoch 4:  78%|███████▊  | 204/262 [10:05<02:53,  2.99s/it]

  Batch 204/262 - Batch Loss: 0.0817


Epoch 4:  79%|███████▊  | 206/262 [10:12<02:52,  3.08s/it]

  Batch 206/262 - Batch Loss: 0.0917


Epoch 4:  79%|███████▉  | 208/262 [10:18<02:43,  3.03s/it]

  Batch 208/262 - Batch Loss: 0.0815


Epoch 4:  80%|████████  | 210/262 [10:24<02:40,  3.08s/it]

  Batch 210/262 - Batch Loss: 0.0895


Epoch 4:  81%|████████  | 212/262 [10:30<02:29,  2.99s/it]

  Batch 212/262 - Batch Loss: 0.0801


Epoch 4:  82%|████████▏ | 214/262 [10:36<02:27,  3.07s/it]

  Batch 214/262 - Batch Loss: 0.0859


Epoch 4:  82%|████████▏ | 216/262 [10:42<02:15,  2.94s/it]

  Batch 216/262 - Batch Loss: 0.1001


Epoch 4:  83%|████████▎ | 218/262 [10:48<02:13,  3.04s/it]

  Batch 218/262 - Batch Loss: 0.0863


Epoch 4:  84%|████████▍ | 220/262 [10:54<02:05,  2.99s/it]

  Batch 220/262 - Batch Loss: 0.0918


Epoch 4:  85%|████████▍ | 222/262 [11:01<02:03,  3.09s/it]

  Batch 222/262 - Batch Loss: 0.0750


Epoch 4:  85%|████████▌ | 224/262 [11:07<01:53,  2.98s/it]

  Batch 224/262 - Batch Loss: 0.0933


Epoch 4:  86%|████████▋ | 226/262 [11:13<01:49,  3.05s/it]

  Batch 226/262 - Batch Loss: 0.0855


Epoch 4:  87%|████████▋ | 228/262 [11:19<01:42,  3.01s/it]

  Batch 228/262 - Batch Loss: 0.0894


Epoch 4:  88%|████████▊ | 230/262 [11:25<01:38,  3.08s/it]

  Batch 230/262 - Batch Loss: 0.0853


Epoch 4:  89%|████████▊ | 232/262 [11:31<01:29,  2.99s/it]

  Batch 232/262 - Batch Loss: 0.0902


Epoch 4:  89%|████████▉ | 234/262 [11:37<01:25,  3.05s/it]

  Batch 234/262 - Batch Loss: 0.0918


Epoch 4:  90%|█████████ | 236/262 [11:43<01:18,  3.01s/it]

  Batch 236/262 - Batch Loss: 0.0813


Epoch 4:  91%|█████████ | 238/262 [11:50<01:13,  3.05s/it]

  Batch 238/262 - Batch Loss: 0.0937


Epoch 4:  92%|█████████▏| 240/262 [11:56<01:05,  3.00s/it]

  Batch 240/262 - Batch Loss: 0.0804


Epoch 4:  92%|█████████▏| 242/262 [12:02<01:00,  3.03s/it]

  Batch 242/262 - Batch Loss: 0.0852


Epoch 4:  93%|█████████▎| 244/262 [12:08<00:53,  2.97s/it]

  Batch 244/262 - Batch Loss: 0.0986


Epoch 4:  94%|█████████▍| 246/262 [12:14<00:47,  2.98s/it]

  Batch 246/262 - Batch Loss: 0.0913


Epoch 4:  95%|█████████▍| 248/262 [12:19<00:40,  2.93s/it]

  Batch 248/262 - Batch Loss: 0.0988


Epoch 4:  95%|█████████▌| 250/262 [12:25<00:36,  3.03s/it]

  Batch 250/262 - Batch Loss: 0.0782


Epoch 4:  96%|█████████▌| 252/262 [12:31<00:29,  2.98s/it]

  Batch 252/262 - Batch Loss: 0.0875


Epoch 4:  97%|█████████▋| 254/262 [12:38<00:24,  3.10s/it]

  Batch 254/262 - Batch Loss: 0.0878


Epoch 4:  98%|█████████▊| 256/262 [12:44<00:17,  2.99s/it]

  Batch 256/262 - Batch Loss: 0.0960


Epoch 4:  98%|█████████▊| 258/262 [12:50<00:12,  3.09s/it]

  Batch 258/262 - Batch Loss: 0.0842


Epoch 4:  99%|█████████▉| 260/262 [12:56<00:06,  3.02s/it]

  Batch 260/262 - Batch Loss: 0.0883


Epoch 4: 100%|██████████| 262/262 [13:00<00:00,  2.98s/it]

  Batch 262/262 - Batch Loss: 0.0944
Epoch 4/10 - Loss: 0.0880 - Time: 780.2s



Epoch 5:   1%|          | 2/262 [00:05<12:42,  2.93s/it]

  Batch 2/262 - Batch Loss: 0.0819


Epoch 5:   2%|▏         | 4/262 [00:11<12:36,  2.93s/it]

  Batch 4/262 - Batch Loss: 0.0909


Epoch 5:   2%|▏         | 6/262 [00:17<12:31,  2.93s/it]

  Batch 6/262 - Batch Loss: 0.0864


Epoch 5:   3%|▎         | 8/262 [00:23<12:23,  2.93s/it]

  Batch 8/262 - Batch Loss: 0.0965


Epoch 5:   4%|▍         | 10/262 [00:29<12:47,  3.04s/it]

  Batch 10/262 - Batch Loss: 0.0886


Epoch 5:   5%|▍         | 12/262 [00:35<12:22,  2.97s/it]

  Batch 12/262 - Batch Loss: 0.0977


Epoch 5:   5%|▌         | 14/262 [00:41<12:24,  3.00s/it]

  Batch 14/262 - Batch Loss: 0.0811


Epoch 5:   6%|▌         | 16/262 [00:47<11:56,  2.91s/it]

  Batch 16/262 - Batch Loss: 0.0805


Epoch 5:   7%|▋         | 18/262 [00:53<12:04,  2.97s/it]

  Batch 18/262 - Batch Loss: 0.0885


Epoch 5:   8%|▊         | 20/262 [00:59<11:54,  2.95s/it]

  Batch 20/262 - Batch Loss: 0.0862


Epoch 5:   8%|▊         | 22/262 [01:05<12:16,  3.07s/it]

  Batch 22/262 - Batch Loss: 0.0859


Epoch 5:   9%|▉         | 24/262 [01:11<11:37,  2.93s/it]

  Batch 24/262 - Batch Loss: 0.0999


Epoch 5:  10%|▉         | 26/262 [01:17<11:45,  2.99s/it]

  Batch 26/262 - Batch Loss: 0.0848


Epoch 5:  11%|█         | 28/262 [01:23<11:20,  2.91s/it]

  Batch 28/262 - Batch Loss: 0.0888


Epoch 5:  11%|█▏        | 30/262 [01:29<11:35,  3.00s/it]

  Batch 30/262 - Batch Loss: 0.0957


Epoch 5:  12%|█▏        | 32/262 [01:35<11:21,  2.96s/it]

  Batch 32/262 - Batch Loss: 0.0905


Epoch 5:  13%|█▎        | 34/262 [01:41<11:33,  3.04s/it]

  Batch 34/262 - Batch Loss: 0.0841


Epoch 5:  14%|█▎        | 36/262 [01:47<11:20,  3.01s/it]

  Batch 36/262 - Batch Loss: 0.0852


Epoch 5:  15%|█▍        | 38/262 [01:53<11:30,  3.08s/it]

  Batch 38/262 - Batch Loss: 0.0806


Epoch 5:  15%|█▌        | 40/262 [02:00<11:19,  3.06s/it]

  Batch 40/262 - Batch Loss: 0.1005


Epoch 5:  16%|█▌        | 42/262 [02:06<11:17,  3.08s/it]

  Batch 42/262 - Batch Loss: 0.0963


Epoch 5:  17%|█▋        | 44/262 [02:12<11:05,  3.05s/it]

  Batch 44/262 - Batch Loss: 0.0792


Epoch 5:  18%|█▊        | 46/262 [02:18<11:07,  3.09s/it]

  Batch 46/262 - Batch Loss: 0.0850


Epoch 5:  18%|█▊        | 48/262 [02:24<10:51,  3.05s/it]

  Batch 48/262 - Batch Loss: 0.0841


Epoch 5:  19%|█▉        | 50/262 [02:30<10:49,  3.07s/it]

  Batch 50/262 - Batch Loss: 0.0963


Epoch 5:  20%|█▉        | 52/262 [02:36<10:40,  3.05s/it]

  Batch 52/262 - Batch Loss: 0.0886


Epoch 5:  21%|██        | 54/262 [02:43<10:41,  3.08s/it]

  Batch 54/262 - Batch Loss: 0.0866


Epoch 5:  21%|██▏       | 56/262 [02:49<10:27,  3.05s/it]

  Batch 56/262 - Batch Loss: 0.1000


Epoch 5:  22%|██▏       | 58/262 [02:55<10:34,  3.11s/it]

  Batch 58/262 - Batch Loss: 0.0907


Epoch 5:  23%|██▎       | 60/262 [03:01<10:03,  2.99s/it]

  Batch 60/262 - Batch Loss: 0.0840


Epoch 5:  24%|██▎       | 62/262 [03:07<10:13,  3.07s/it]

  Batch 62/262 - Batch Loss: 0.0885


Epoch 5:  24%|██▍       | 64/262 [03:13<09:47,  2.96s/it]

  Batch 64/262 - Batch Loss: 0.0927


Epoch 5:  25%|██▌       | 66/262 [03:19<10:11,  3.12s/it]

  Batch 66/262 - Batch Loss: 0.0806


Epoch 5:  26%|██▌       | 68/262 [03:25<09:39,  2.99s/it]

  Batch 68/262 - Batch Loss: 0.0901


Epoch 5:  27%|██▋       | 70/262 [03:31<09:50,  3.07s/it]

  Batch 70/262 - Batch Loss: 0.0810


Epoch 5:  27%|██▋       | 72/262 [03:37<09:21,  2.95s/it]

  Batch 72/262 - Batch Loss: 0.0970


Epoch 5:  28%|██▊       | 74/262 [03:43<09:39,  3.08s/it]

  Batch 74/262 - Batch Loss: 0.0904


Epoch 5:  29%|██▉       | 76/262 [03:49<09:12,  2.97s/it]

  Batch 76/262 - Batch Loss: 0.0795


Epoch 5:  30%|██▉       | 78/262 [03:55<09:22,  3.06s/it]

  Batch 78/262 - Batch Loss: 0.0992


Epoch 5:  31%|███       | 80/262 [04:01<09:01,  2.98s/it]

  Batch 80/262 - Batch Loss: 0.0773


Epoch 5:  31%|███▏      | 82/262 [04:07<09:09,  3.06s/it]

  Batch 82/262 - Batch Loss: 0.0833


Epoch 5:  32%|███▏      | 84/262 [04:13<08:47,  2.97s/it]

  Batch 84/262 - Batch Loss: 0.0881


Epoch 5:  33%|███▎      | 86/262 [04:18<08:39,  2.95s/it]

  Batch 86/262 - Batch Loss: 0.0965


Epoch 5:  34%|███▎      | 88/262 [04:24<08:23,  2.89s/it]

  Batch 88/262 - Batch Loss: 0.0818


Epoch 5:  34%|███▍      | 90/262 [04:30<08:19,  2.90s/it]

  Batch 90/262 - Batch Loss: 0.0818


Epoch 5:  35%|███▌      | 92/262 [04:36<08:19,  2.94s/it]

  Batch 92/262 - Batch Loss: 0.0945


Epoch 5:  36%|███▌      | 94/262 [04:42<08:05,  2.89s/it]

  Batch 94/262 - Batch Loss: 0.0960


Epoch 5:  37%|███▋      | 96/262 [04:48<08:12,  2.97s/it]

  Batch 96/262 - Batch Loss: 0.0862


Epoch 5:  37%|███▋      | 98/262 [04:53<07:56,  2.91s/it]

  Batch 98/262 - Batch Loss: 0.0818


Epoch 5:  38%|███▊      | 100/262 [05:00<08:06,  3.00s/it]

  Batch 100/262 - Batch Loss: 0.0846


Epoch 5:  39%|███▉      | 102/262 [05:05<07:42,  2.89s/it]

  Batch 102/262 - Batch Loss: 0.0948


Epoch 5:  40%|███▉      | 104/262 [05:12<07:51,  2.99s/it]

  Batch 104/262 - Batch Loss: 0.0868


Epoch 5:  40%|████      | 106/262 [05:17<07:39,  2.94s/it]

  Batch 106/262 - Batch Loss: 0.0769


Epoch 5:  41%|████      | 108/262 [05:24<07:42,  3.00s/it]

  Batch 108/262 - Batch Loss: 0.0823


Epoch 5:  42%|████▏     | 110/262 [05:29<07:26,  2.94s/it]

  Batch 110/262 - Batch Loss: 0.0780


Epoch 5:  43%|████▎     | 112/262 [05:36<07:33,  3.02s/it]

  Batch 112/262 - Batch Loss: 0.0830


Epoch 5:  44%|████▎     | 114/262 [05:41<07:16,  2.95s/it]

  Batch 114/262 - Batch Loss: 0.0761


Epoch 5:  44%|████▍     | 116/262 [05:47<07:17,  3.00s/it]

  Batch 116/262 - Batch Loss: 0.0863


Epoch 5:  45%|████▌     | 118/262 [05:53<07:01,  2.93s/it]

  Batch 118/262 - Batch Loss: 0.0871


Epoch 5:  46%|████▌     | 120/262 [05:59<06:59,  2.96s/it]

  Batch 120/262 - Batch Loss: 0.0920


Epoch 5:  47%|████▋     | 122/262 [06:05<06:49,  2.93s/it]

  Batch 122/262 - Batch Loss: 0.0857


Epoch 5:  47%|████▋     | 124/262 [06:11<06:45,  2.94s/it]

  Batch 124/262 - Batch Loss: 0.0752


Epoch 5:  48%|████▊     | 126/262 [06:17<06:34,  2.90s/it]

  Batch 126/262 - Batch Loss: 0.0834


Epoch 5:  49%|████▉     | 128/262 [06:23<06:32,  2.93s/it]

  Batch 128/262 - Batch Loss: 0.0941


Epoch 5:  50%|████▉     | 130/262 [06:28<06:22,  2.89s/it]

  Batch 130/262 - Batch Loss: 0.0791


Epoch 5:  50%|█████     | 132/262 [06:34<06:28,  2.99s/it]

  Batch 132/262 - Batch Loss: 0.0769


Epoch 5:  51%|█████     | 134/262 [06:40<06:13,  2.91s/it]

  Batch 134/262 - Batch Loss: 0.0829


Epoch 5:  52%|█████▏    | 136/262 [06:46<06:22,  3.03s/it]

  Batch 136/262 - Batch Loss: 0.0865


Epoch 5:  53%|█████▎    | 138/262 [06:52<06:03,  2.93s/it]

  Batch 138/262 - Batch Loss: 0.0808


Epoch 5:  53%|█████▎    | 140/262 [06:58<06:08,  3.02s/it]

  Batch 140/262 - Batch Loss: 0.0852


Epoch 5:  54%|█████▍    | 142/262 [07:04<05:55,  2.96s/it]

  Batch 142/262 - Batch Loss: 0.0901


Epoch 5:  55%|█████▍    | 144/262 [07:10<06:04,  3.09s/it]

  Batch 144/262 - Batch Loss: 0.0827


Epoch 5:  56%|█████▌    | 146/262 [07:16<05:46,  2.99s/it]

  Batch 146/262 - Batch Loss: 0.0843


Epoch 5:  56%|█████▋    | 148/262 [07:22<05:49,  3.07s/it]

  Batch 148/262 - Batch Loss: 0.0731


Epoch 5:  57%|█████▋    | 150/262 [07:28<05:31,  2.96s/it]

  Batch 150/262 - Batch Loss: 0.0836


Epoch 5:  58%|█████▊    | 152/262 [07:34<05:27,  2.97s/it]

  Batch 152/262 - Batch Loss: 0.0832


Epoch 5:  59%|█████▉    | 154/262 [07:39<05:14,  2.91s/it]

  Batch 154/262 - Batch Loss: 0.0806


Epoch 5:  60%|█████▉    | 156/262 [07:45<05:12,  2.95s/it]

  Batch 156/262 - Batch Loss: 0.0862


Epoch 5:  60%|██████    | 158/262 [07:51<05:01,  2.90s/it]

  Batch 158/262 - Batch Loss: 0.0814


Epoch 5:  61%|██████    | 160/262 [07:57<04:55,  2.90s/it]

  Batch 160/262 - Batch Loss: 0.0825


Epoch 5:  62%|██████▏   | 162/262 [08:03<04:53,  2.93s/it]

  Batch 162/262 - Batch Loss: 0.0798


Epoch 5:  63%|██████▎   | 164/262 [08:09<04:45,  2.91s/it]

  Batch 164/262 - Batch Loss: 0.0850


Epoch 5:  63%|██████▎   | 166/262 [08:15<04:46,  2.98s/it]

  Batch 166/262 - Batch Loss: 0.0818


Epoch 5:  64%|██████▍   | 168/262 [08:21<04:33,  2.91s/it]

  Batch 168/262 - Batch Loss: 0.0909


Epoch 5:  65%|██████▍   | 170/262 [08:27<04:34,  2.98s/it]

  Batch 170/262 - Batch Loss: 0.0805


Epoch 5:  66%|██████▌   | 172/262 [08:32<04:20,  2.89s/it]

  Batch 172/262 - Batch Loss: 0.0885


Epoch 5:  66%|██████▋   | 174/262 [08:39<04:22,  2.99s/it]

  Batch 174/262 - Batch Loss: 0.0860


Epoch 5:  67%|██████▋   | 176/262 [08:44<04:11,  2.93s/it]

  Batch 176/262 - Batch Loss: 0.0966


Epoch 5:  68%|██████▊   | 178/262 [08:50<04:09,  2.97s/it]

  Batch 178/262 - Batch Loss: 0.0840


Epoch 5:  69%|██████▊   | 180/262 [08:56<03:57,  2.90s/it]

  Batch 180/262 - Batch Loss: 0.0845


Epoch 5:  69%|██████▉   | 182/262 [09:02<03:55,  2.94s/it]

  Batch 182/262 - Batch Loss: 0.0799


Epoch 5:  70%|███████   | 184/262 [09:08<03:46,  2.90s/it]

  Batch 184/262 - Batch Loss: 0.0911


Epoch 5:  71%|███████   | 186/262 [09:14<03:40,  2.90s/it]

  Batch 186/262 - Batch Loss: 0.0859


Epoch 5:  72%|███████▏  | 188/262 [09:19<03:34,  2.90s/it]

  Batch 188/262 - Batch Loss: 0.0758


Epoch 5:  73%|███████▎  | 190/262 [09:25<03:34,  2.97s/it]

  Batch 190/262 - Batch Loss: 0.0795


Epoch 5:  73%|███████▎  | 192/262 [09:31<03:23,  2.91s/it]

  Batch 192/262 - Batch Loss: 0.0893


Epoch 5:  74%|███████▍  | 194/262 [09:37<03:25,  3.03s/it]

  Batch 194/262 - Batch Loss: 0.0909


Epoch 5:  75%|███████▍  | 196/262 [09:43<03:16,  2.98s/it]

  Batch 196/262 - Batch Loss: 0.0833


Epoch 5:  76%|███████▌  | 198/262 [09:50<03:17,  3.09s/it]

  Batch 198/262 - Batch Loss: 0.0829


Epoch 5:  76%|███████▋  | 200/262 [09:55<03:05,  3.00s/it]

  Batch 200/262 - Batch Loss: 0.0874


Epoch 5:  77%|███████▋  | 202/262 [10:01<03:03,  3.06s/it]

  Batch 202/262 - Batch Loss: 0.0844


Epoch 5:  78%|███████▊  | 204/262 [10:07<02:51,  2.95s/it]

  Batch 204/262 - Batch Loss: 0.0827


Epoch 5:  79%|███████▊  | 206/262 [10:14<02:53,  3.09s/it]

  Batch 206/262 - Batch Loss: 0.0895


Epoch 5:  79%|███████▉  | 208/262 [10:19<02:40,  2.97s/it]

  Batch 208/262 - Batch Loss: 0.0891


Epoch 5:  80%|████████  | 210/262 [10:26<02:41,  3.11s/it]

  Batch 210/262 - Batch Loss: 0.0895


Epoch 5:  81%|████████  | 212/262 [10:31<02:29,  2.99s/it]

  Batch 212/262 - Batch Loss: 0.0827


Epoch 5:  82%|████████▏ | 214/262 [10:37<02:25,  3.03s/it]

  Batch 214/262 - Batch Loss: 0.0890


Epoch 5:  82%|████████▏ | 216/262 [10:43<02:16,  2.96s/it]

  Batch 216/262 - Batch Loss: 0.0832


Epoch 5:  83%|████████▎ | 218/262 [10:49<02:11,  2.98s/it]

  Batch 218/262 - Batch Loss: 0.0857


Epoch 5:  84%|████████▍ | 220/262 [10:55<02:02,  2.92s/it]

  Batch 220/262 - Batch Loss: 0.0735


Epoch 5:  85%|████████▍ | 222/262 [11:01<01:55,  2.88s/it]

  Batch 222/262 - Batch Loss: 0.0804


Epoch 5:  85%|████████▌ | 224/262 [11:07<01:51,  2.93s/it]

  Batch 224/262 - Batch Loss: 0.0878


Epoch 5:  86%|████████▋ | 226/262 [11:12<01:43,  2.87s/it]

  Batch 226/262 - Batch Loss: 0.0807


Epoch 5:  87%|████████▋ | 228/262 [11:18<01:38,  2.89s/it]

  Batch 228/262 - Batch Loss: 0.0946


Epoch 5:  88%|████████▊ | 230/262 [11:24<01:32,  2.88s/it]

  Batch 230/262 - Batch Loss: 0.0839


Epoch 5:  89%|████████▊ | 232/262 [11:30<01:29,  3.00s/it]

  Batch 232/262 - Batch Loss: 0.0794


Epoch 5:  89%|████████▉ | 234/262 [11:36<01:21,  2.92s/it]

  Batch 234/262 - Batch Loss: 0.0849


Epoch 5:  90%|█████████ | 236/262 [11:42<01:17,  3.00s/it]

  Batch 236/262 - Batch Loss: 0.0845


Epoch 5:  91%|█████████ | 238/262 [11:48<01:09,  2.91s/it]

  Batch 238/262 - Batch Loss: 0.0905


Epoch 5:  92%|█████████▏| 240/262 [11:54<01:05,  2.98s/it]

  Batch 240/262 - Batch Loss: 0.0844


Epoch 5:  92%|█████████▏| 242/262 [11:59<00:57,  2.86s/it]

  Batch 242/262 - Batch Loss: 0.0864


Epoch 5:  93%|█████████▎| 244/262 [12:05<00:52,  2.89s/it]

  Batch 244/262 - Batch Loss: 0.0760


Epoch 5:  94%|█████████▍| 246/262 [12:11<00:45,  2.87s/it]

  Batch 246/262 - Batch Loss: 0.0905


Epoch 5:  95%|█████████▍| 248/262 [12:17<00:41,  2.93s/it]

  Batch 248/262 - Batch Loss: 0.0866


Epoch 5:  95%|█████████▌| 250/262 [12:23<00:34,  2.88s/it]

  Batch 250/262 - Batch Loss: 0.0939


Epoch 5:  96%|█████████▌| 252/262 [12:29<00:30,  3.06s/it]

  Batch 252/262 - Batch Loss: 0.0946


Epoch 5:  97%|█████████▋| 254/262 [12:35<00:23,  2.99s/it]

  Batch 254/262 - Batch Loss: 0.0820


Epoch 5:  98%|█████████▊| 256/262 [12:41<00:18,  3.04s/it]

  Batch 256/262 - Batch Loss: 0.0825


Epoch 5:  98%|█████████▊| 258/262 [12:47<00:11,  2.98s/it]

  Batch 258/262 - Batch Loss: 0.0830


In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Use validation set from val/RGB and val/thermal_8_bit folders directly
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = UNet().to(device)
model.load_state_dict(torch.load('unet_rgb2thermal.pth', map_location=device))
model.eval()

val_rgb_dir = os.path.join(path, "FLIR_ADAS_1_3", "val", "RGB")
val_thermal_dir = os.path.join(path, "FLIR_ADAS_1_3", "val", "thermal_8_bit")

val_matched_rgb, val_matched_thermal, _ = get_matched_files_by_prefix(val_rgb_dir, val_thermal_dir)

val_dataset = FLIRRGB2ThermalDataset(val_rgb_dir, val_thermal_dir, img_size=256, matched_rgb=val_matched_rgb, matched_thermal=val_matched_thermal)
val_loader = DataLoader(val_dataset, batch_size=1, shuffle=False)

mae_list = []
mse_list = []
outputs = []
gts = []

with torch.no_grad():
    for rgb, thermal in val_loader:
        rgb, thermal = rgb.to(device), thermal.to(device)
        pred = model(rgb)
        pred_np = pred.squeeze().cpu().numpy()
        gt_np = thermal.squeeze().cpu().numpy()
        outputs.append(pred_np)
        gts.append(gt_np)
        mae_list.append(mean_absolute_error(gt_np.flatten(), pred_np.flatten()))
        mse_list.append(mean_squared_error(gt_np.flatten(), pred_np.flatten()))

print(f"Validation MAE: {sum(mae_list)/len(mae_list):.4f}")
print(f"Validation MSE: {sum(mse_list)/len(mse_list):.4f}")


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Binarize outputs and ground truth for classification metrics
threshold = 0.5
all_preds = []
all_gts = []

for pred_np, gt_np in zip(outputs, gts):
    pred_bin = (pred_np > threshold).astype(int).flatten()
    gt_bin = (gt_np > threshold).astype(int).flatten()
    all_preds.extend(pred_bin)
    all_gts.extend(gt_bin)

accuracy = accuracy_score(all_gts, all_preds)
precision = precision_score(all_gts, all_preds, zero_division=0)
recall = recall_score(all_gts, all_preds, zero_division=0)
f1 = f1_score(all_gts, all_preds, zero_division=0)

print(f"Validation Accuracy: {accuracy:.4f}")
print(f"Validation Precision: {precision:.4f}")
print(f"Validation Recall: {recall:.4f}")
print(f"Validation F1 Score: {f1:.4f}")

In [None]:
# Visualize a few predictions vs. ground truth
num_show = 3
plt.figure(figsize=(10, num_show * 3))
for i in range(num_show):
    plt.subplot(num_show, 2, 2*i+1)
    plt.imshow(gts[i], cmap='gray')
    plt.title('Ground Truth')
    plt.axis('off')
    plt.subplot(num_show, 2, 2*i+2)
    plt.imshow(outputs[i], cmap='gray')
    plt.title('Predicted')
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
# Plot training loss
plt.plot(train_losses)
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training Loss Curve')
plt.show()