In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from tqdm import tqdm
from torchvision import transforms
import torch
import os
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torchvision.transforms import ToPILImage
from models import *

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [3]:
class rellis_dataset(Dataset):
    """
    Using 6 classes
    """
    def __init__(self, data_root_dir, txt_file, transform=None, target_transform=None):
        """
        data_root_dit: root directory of the dataset, e.g. './data/rellis'
        txt_file: path to the train.txt listing image files
        
        """
        self.data_root_dir = data_root_dir
        self.transform = transform
        self.target_transform = target_transform
        with open(txt_file, 'r') as f:
            self.image_paths = [line.strip() for line in f.readlines()]
    
    def __len__(self):
        return len(self.image_paths)
    
    def __getitem__(self, index):
        img_path = os.path.join(self.data_root_dir, 'image', self.image_paths[index] + '.jpg')
        ann_path = os.path.join(self.data_root_dir, 'annotation', self.image_paths[index] + '_group6.png')
        image = Image.open(img_path).convert('RGB')
        annotation = Image.open(ann_path)
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            annotation = self.target_transform(annotation)
        annotation = (annotation*256).long() # turn the label to the interger
        return image, annotation

In [4]:
train_transforms = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize to desired size
    transforms.ToTensor(),
])

target_transform = transforms.Compose([
    transforms.Resize((256, 256), interpolation=transforms.InterpolationMode.NEAREST),
    transforms.ToTensor()
]) 

val_transforms = transforms.Compose([
    transforms.Resize((256, 256)),  # Resize to the same size as training
    transforms.ToTensor(),
])
train_dataset = rellis_dataset(data_root_dir='./data/rellis', txt_file='./data/rellis/train.txt', transform=train_transforms, target_transform=target_transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=4)

val_dataset = rellis_dataset(data_root_dir='./data/rellis', txt_file='./data/rellis/val.txt', transform=val_transforms,target_transform=target_transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=4)


In [5]:
model = make_SegFormerB1(num_classes=6)
model = model.to(device)

In [6]:
criterion = nn.CrossEntropyLoss()  
optimizer = optim.AdamW(model.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)

In [7]:
def train_one_epoch(model, loader, optimizer, criterion, device):
    model.train()
    running_loss = 0.0
    total_iterations = 0  
    for images, masks in tqdm(loader, desc="Training"):
        images = images.to(device)
        masks = masks.squeeze(1).to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, masks)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * images.size(0)
        total_iterations += 1  

    
        if total_iterations % 10 == 0:
            avg_loss = running_loss / (total_iterations * loader.batch_size)
            print(f"Iteration {total_iterations}: Training Loss: {avg_loss:.4f}")

    return running_loss / len(loader.dataset)

def validate(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    total_iterations = 0 

    with torch.no_grad():
        for images, masks in tqdm(loader, desc="Validation"):
            images = images.to(device)
            masks = masks.squeeze(1).to(device)

            outputs = model(images)
            loss = criterion(outputs, masks)

            running_loss += loss.item() * images.size(0)
            total_iterations += 1  

         
            if total_iterations % 10 == 0:
                avg_loss = running_loss / (total_iterations * loader.batch_size)
                print(f"Iteration {total_iterations}: Validation Loss: {avg_loss:.4f}")

    return running_loss / len(loader.dataset)


In [8]:
num_epochs = 25
best_val_loss = np.inf

for epoch in range(num_epochs):
    print(f'Epoch {epoch + 1}/{num_epochs}')
    
    train_loss = train_one_epoch(model, train_loader, optimizer, criterion, device)
    val_loss = validate(model, val_loader, criterion, device)
    
    scheduler.step()

    print(f'Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')
    
    # Save the best model
    if val_loss < best_val_loss:
        print("Saving best model...")
        best_val_loss = val_loss
        torch.save(model.state_dict(), 'best_segmentation_model.pth')

Epoch 1/25


Training:  10%|▉         | 10/104 [00:06<00:49,  1.92it/s]

Iteration 10: Training Loss: 1.4177


Training:  19%|█▉        | 20/104 [00:11<00:42,  1.98it/s]

Iteration 20: Training Loss: 1.2181


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.98it/s]

Iteration 30: Training Loss: 1.0947


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.98it/s]

Iteration 40: Training Loss: 1.0154


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.98it/s]

Iteration 50: Training Loss: 0.9599


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.98it/s]

Iteration 60: Training Loss: 0.9126


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.98it/s]

Iteration 70: Training Loss: 0.8740


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.98it/s]

Iteration 80: Training Loss: 0.8387


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.98it/s]

Iteration 90: Training Loss: 0.8151


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.98it/s]

Iteration 100: Training Loss: 0.7933


Training: 100%|██████████| 104/104 [00:53<00:00,  1.96it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.30it/s]

Iteration 10: Validation Loss: 0.7636


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.10it/s]

Iteration 20: Validation Loss: 0.6372


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.86it/s]

Iteration 30: Validation Loss: 0.6447
Train Loss: 0.7847, Val Loss: 0.6397
Saving best model...





Epoch 2/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.5195


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.98it/s]

Iteration 20: Training Loss: 0.5103


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.98it/s]

Iteration 30: Training Loss: 0.5041


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.4934


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.4876


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.4829


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.98it/s]

Iteration 70: Training Loss: 0.4757


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.4690


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.98it/s]

Iteration 90: Training Loss: 0.4623


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.98it/s]

Iteration 100: Training Loss: 0.4602


Training: 100%|██████████| 104/104 [00:53<00:00,  1.96it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.21it/s]

Iteration 10: Validation Loss: 0.7418


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.97it/s]

Iteration 20: Validation Loss: 0.6902


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.74it/s]


Iteration 30: Validation Loss: 0.7552
Train Loss: 0.4601, Val Loss: 0.7479
Epoch 3/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.94it/s]

Iteration 10: Training Loss: 0.4383


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.4226


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.4188


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.4122


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.4067


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.4016


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.3975


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.3946


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.3906


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.3869


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.35it/s]

Iteration 10: Validation Loss: 0.7333


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.03it/s]

Iteration 20: Validation Loss: 0.8874


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.88it/s]


Iteration 30: Validation Loss: 1.0067
Train Loss: 0.3859, Val Loss: 0.9924
Epoch 4/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.3614


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.3527


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.3469


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.3452


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.3433


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.3414


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.3403


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.3406


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.3403


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.3401


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.21it/s]

Iteration 10: Validation Loss: 0.6641


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.91it/s]

Iteration 20: Validation Loss: 0.6117


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.70it/s]


Iteration 30: Validation Loss: 0.6602
Train Loss: 0.3395, Val Loss: 0.6539
Epoch 5/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.3228


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.3205


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.3136


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.3127


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.3119


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.3109


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.3115


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.3113


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.3111


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.3114


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.33it/s]

Iteration 10: Validation Loss: 0.7255


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.07it/s]

Iteration 20: Validation Loss: 0.6303


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.87it/s]


Iteration 30: Validation Loss: 0.6923
Train Loss: 0.3111, Val Loss: 0.6853
Epoch 6/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.3257


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.3272


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.3205


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.3151


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.3111


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.3095


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.3071


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.3037


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.3026


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.3013


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.22it/s]

Iteration 10: Validation Loss: 0.9004


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.98it/s]

Iteration 20: Validation Loss: 0.7704


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.74it/s]


Iteration 30: Validation Loss: 0.7720
Train Loss: 0.3006, Val Loss: 0.7626
Epoch 7/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2980


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.2889


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2877


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2852


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2838


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2853


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2837


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2816


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2808


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2815


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.09it/s]

Iteration 10: Validation Loss: 0.6404


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.93it/s]

Iteration 20: Validation Loss: 0.5488


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.69it/s]

Iteration 30: Validation Loss: 0.6037
Train Loss: 0.2812, Val Loss: 0.6004
Saving best model...





Epoch 8/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2758


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2864


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2840


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2824


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2821


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2799


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2781


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2773


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2763


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2756


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.24it/s]

Iteration 10: Validation Loss: 0.6666


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.02it/s]

Iteration 20: Validation Loss: 0.6083


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.77it/s]


Iteration 30: Validation Loss: 0.6672
Train Loss: 0.2756, Val Loss: 0.6612
Epoch 9/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2669


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.2629


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2603


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2586


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2609


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2592


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2590


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2599


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2614


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2623


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.20it/s]

Iteration 10: Validation Loss: 0.6431


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.94it/s]

Iteration 20: Validation Loss: 0.5594


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.74it/s]


Iteration 30: Validation Loss: 0.6163
Train Loss: 0.2623, Val Loss: 0.6105
Epoch 10/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.92it/s]

Iteration 10: Training Loss: 0.2491


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.2547


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2550


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2572


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2569


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2558


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2561


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2579


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2573


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2562


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.19it/s]

Iteration 10: Validation Loss: 0.6436


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.88it/s]

Iteration 20: Validation Loss: 0.6165


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.71it/s]


Iteration 30: Validation Loss: 0.6913
Train Loss: 0.2568, Val Loss: 0.6842
Epoch 11/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.92it/s]

Iteration 10: Training Loss: 0.2482


Training:  19%|█▉        | 20/104 [00:11<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2461


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2415


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2383


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2378


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2372


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2374


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2364


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2366


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2367


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.22it/s]

Iteration 10: Validation Loss: 0.6568


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.97it/s]

Iteration 20: Validation Loss: 0.6063


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.75it/s]


Iteration 30: Validation Loss: 0.6703
Train Loss: 0.2368, Val Loss: 0.6632
Epoch 12/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2359


Training:  19%|█▉        | 20/104 [00:11<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2320


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2331


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2332


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2320


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2317


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2311


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2311


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2320


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2323


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.17it/s]

Iteration 10: Validation Loss: 0.6414


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.93it/s]

Iteration 20: Validation Loss: 0.5953


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.72it/s]


Iteration 30: Validation Loss: 0.6665
Train Loss: 0.2322, Val Loss: 0.6594
Epoch 13/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2224


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2250


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2256


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2271


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2281


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2281


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2289


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2296


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2298


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2297


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.34it/s]

Iteration 10: Validation Loss: 0.6440


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.05it/s]

Iteration 20: Validation Loss: 0.5953


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.86it/s]


Iteration 30: Validation Loss: 0.6656
Train Loss: 0.2298, Val Loss: 0.6588
Epoch 14/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2286


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2293


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2290


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2287


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2277


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2284


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2284


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2285


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2281


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2278


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.23it/s]

Iteration 10: Validation Loss: 0.6619


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.93it/s]

Iteration 20: Validation Loss: 0.6029


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.72it/s]


Iteration 30: Validation Loss: 0.6699
Train Loss: 0.2283, Val Loss: 0.6626
Epoch 15/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2325


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.2308


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2283


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2275


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2268


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2280


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2271


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2271


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2271


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2271


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.19it/s]

Iteration 10: Validation Loss: 0.6651


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.94it/s]

Iteration 20: Validation Loss: 0.6066


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.74it/s]


Iteration 30: Validation Loss: 0.6765
Train Loss: 0.2267, Val Loss: 0.6693
Epoch 16/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2260


Training:  19%|█▉        | 20/104 [00:11<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2250


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2268


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2261


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2263


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2260


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2262


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2258


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2258


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2256


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.32it/s]

Iteration 10: Validation Loss: 0.6068


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.07it/s]

Iteration 20: Validation Loss: 0.5557


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.87it/s]


Iteration 30: Validation Loss: 0.6303
Train Loss: 0.2255, Val Loss: 0.6240
Epoch 17/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2175


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.2214


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2205


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2181


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2213


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2219


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2230


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2232


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2237


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2236


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.34it/s]

Iteration 10: Validation Loss: 0.5948


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.06it/s]

Iteration 20: Validation Loss: 0.5539


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.85it/s]


Iteration 30: Validation Loss: 0.6347
Train Loss: 0.2238, Val Loss: 0.6284
Epoch 18/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2164


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2169


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2195


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2190


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2210


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2231


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2235


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2230


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.97it/s]

Iteration 90: Training Loss: 0.2236


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2231


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.17it/s]

Iteration 10: Validation Loss: 0.6490


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.94it/s]

Iteration 20: Validation Loss: 0.6112


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.73it/s]


Iteration 30: Validation Loss: 0.6852
Train Loss: 0.2229, Val Loss: 0.6773
Epoch 19/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.92it/s]

Iteration 10: Training Loss: 0.2196


Training:  19%|█▉        | 20/104 [00:11<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2178


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2178


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2196


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2221


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2219


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2218


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2216


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2221


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2213


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.25it/s]

Iteration 10: Validation Loss: 0.6051


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.96it/s]

Iteration 20: Validation Loss: 0.5598


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.73it/s]


Iteration 30: Validation Loss: 0.6416
Train Loss: 0.2212, Val Loss: 0.6350
Epoch 20/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2183


Training:  19%|█▉        | 20/104 [00:11<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2168


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2187


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2217


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2210


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.96it/s]

Iteration 60: Training Loss: 0.2206


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2216


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2211


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2201


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2192


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.20it/s]

Iteration 10: Validation Loss: 0.5880


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.96it/s]

Iteration 20: Validation Loss: 0.5437


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.73it/s]


Iteration 30: Validation Loss: 0.6254
Train Loss: 0.2194, Val Loss: 0.6188
Epoch 21/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2285


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.2220


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2198


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2191


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2189


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2171


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2171


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2160


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2162


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.96it/s]

Iteration 100: Training Loss: 0.2163


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.30it/s]

Iteration 10: Validation Loss: 0.5900


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.06it/s]

Iteration 20: Validation Loss: 0.5504


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.86it/s]


Iteration 30: Validation Loss: 0.6343
Train Loss: 0.2165, Val Loss: 0.6278
Epoch 22/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2159


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2184


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2171


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2175


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2168


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2174


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2176


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2161


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2156


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2158


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.31it/s]

Iteration 10: Validation Loss: 0.6017


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.06it/s]

Iteration 20: Validation Loss: 0.5570


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.86it/s]


Iteration 30: Validation Loss: 0.6387
Train Loss: 0.2160, Val Loss: 0.6319
Epoch 23/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2180


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2147


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2143


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2151


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.97it/s]

Iteration 50: Training Loss: 0.2140


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2163


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.96it/s]

Iteration 70: Training Loss: 0.2163


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.96it/s]

Iteration 80: Training Loss: 0.2156


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2158


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2161


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.25it/s]

Iteration 10: Validation Loss: 0.5977


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  6.00it/s]

Iteration 20: Validation Loss: 0.5570


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.76it/s]


Iteration 30: Validation Loss: 0.6408
Train Loss: 0.2158, Val Loss: 0.6341
Epoch 24/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2186


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.97it/s]

Iteration 20: Training Loss: 0.2175


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.97it/s]

Iteration 30: Training Loss: 0.2147


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.96it/s]

Iteration 40: Training Loss: 0.2138


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2156


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2163


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2155


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2156


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2157


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.96it/s]

Iteration 100: Training Loss: 0.2154


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.25it/s]

Iteration 10: Validation Loss: 0.5919


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.96it/s]

Iteration 20: Validation Loss: 0.5516


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.73it/s]


Iteration 30: Validation Loss: 0.6362
Train Loss: 0.2156, Val Loss: 0.6296
Epoch 25/25


Training:  10%|▉         | 10/104 [00:05<00:48,  1.93it/s]

Iteration 10: Training Loss: 0.2122


Training:  19%|█▉        | 20/104 [00:10<00:42,  1.96it/s]

Iteration 20: Training Loss: 0.2129


Training:  29%|██▉       | 30/104 [00:16<00:37,  1.96it/s]

Iteration 30: Training Loss: 0.2125


Training:  38%|███▊      | 40/104 [00:21<00:32,  1.97it/s]

Iteration 40: Training Loss: 0.2110


Training:  48%|████▊     | 50/104 [00:26<00:27,  1.96it/s]

Iteration 50: Training Loss: 0.2121


Training:  58%|█████▊    | 60/104 [00:31<00:22,  1.97it/s]

Iteration 60: Training Loss: 0.2132


Training:  67%|██████▋   | 70/104 [00:36<00:17,  1.97it/s]

Iteration 70: Training Loss: 0.2133


Training:  77%|███████▋  | 80/104 [00:41<00:12,  1.97it/s]

Iteration 80: Training Loss: 0.2153


Training:  87%|████████▋ | 90/104 [00:46<00:07,  1.96it/s]

Iteration 90: Training Loss: 0.2151


Training:  96%|█████████▌| 100/104 [00:51<00:02,  1.97it/s]

Iteration 100: Training Loss: 0.2153


Training: 100%|██████████| 104/104 [00:53<00:00,  1.95it/s]
Validation:  35%|███▌      | 11/31 [00:02<00:03,  5.28it/s]

Iteration 10: Validation Loss: 0.6019


Validation:  65%|██████▍   | 20/31 [00:04<00:01,  5.94it/s]

Iteration 20: Validation Loss: 0.5581


Validation: 100%|██████████| 31/31 [00:06<00:00,  4.73it/s]

Iteration 30: Validation Loss: 0.6425
Train Loss: 0.2155, Val Loss: 0.6358



