In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
import time
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
from tqdm.notebook import tqdm, trange
from PIL import Image
import matplotlib.pyplot as plt
import torchsummary
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import os
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
# import resnet18 model from pytorch
from torchvision.models import resnet18
from torch.utils.tensorboard import SummaryWriter
import mxnet as mx
from mxnet import recordio
import torch.multiprocessing as mp
from sklearn.model_selection import train_test_split
from collections import defaultdict

In [2]:
# data_iter = mx.image.ImageIter(
#     batch_size=4,
#     data_shape=(3, 112, 96),
#     path_imgrec="./faces_webface_112x112/train.rec",
#     path_imgidx="./faces_webface_112x112/train.idx",
# )
# data_iter.reset()
# for j in range(4):
#     batch = data_iter.next()
#     data = batch.data[0]
#     # print(batch)
#     label = batch.label[0].asnumpy()
#     for i in range(4):
#         ax = plt.subplot(1, 4, i + 1)
#         plt.imshow(data[i].asnumpy().astype(np.uint8).transpose((1, 2, 0)))
#         ax.set_title("class: " + str(label[i]))
#         plt.axis("off")
#     plt.show()

# # ======= Code to show single image =======#
# path_imgrec = "./faces_webface_112x112/train.rec"
# path_imgidx = "./faces_webface_112x112/train.idx"
# imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
# # %% 1 ~ 409623
# # for i in range(409623):
# for i in range(10):
#     header, s = recordio.unpack(imgrec.read_idx(i + 1))
#     img = mx.image.imdecode(s).asnumpy()
#     plt.imshow(img)
#     plt.title("id=" + str(i) + "label=" + str(header.label))
#     plt.pause(0.1)

# see how many identities are there in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
ids = []
for i in range(409623):
    header, _ = recordio.unpack(imgrec.read_idx(i + 1))
    ids.append(header.label)
print(len(set(ids)))

# show how many images are in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
count = 0
for i in range(409623):
    count += 1
print(count)

8076


409623


In [3]:
DIM = (112, 96)
BS = 512

In [4]:
class CASIAWebFaceDataset(Dataset):
    def __init__(self, rec_path, idx_path, property_path, transform=None):
        self.transform = transform
        self.imgrec = recordio.MXIndexedRecordIO(idx_path, rec_path, 'r')
        
        # Read property file for dataset metadata
        with open(property_path, "r") as f:
            property_str = f.read().strip()
            self.num_classes, self.img_height, self.img_width = map(int, property_str.split(','))
        
        # Parse class boundaries from idx file
        self.class_boundaries = {}
        with open(idx_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 2:
                    class_id, start_idx = int(parts[0]), int(parts[1])
                    self.class_boundaries[class_id] = start_idx
        
        self.class_ids = sorted(list(self.class_boundaries.keys()))
        
        # Calculate total number of samples
        self.total_samples = 0
        for i in range(len(self.class_ids)-1):
            class_id = self.class_ids[i]
            next_class_id = self.class_ids[i+1]
            class_size = self.class_boundaries[next_class_id] - self.class_boundaries[class_id]
            self.total_samples += class_size
        
        # Add samples from the last class
        last_class_id = self.class_ids[-1]
        # Assuming we know the total number of records from property file
        self.total_samples += 10572
    
    def __len__(self):
        return self.total_samples
    
    def __getitem__(self, idx):
        # Map global index to class ID and local index
        actual_idx = idx + 1  # MXNet indices start from 1
        
        # Read record
        header, s = recordio.unpack(self.imgrec.read_idx(actual_idx))
        img = mx.image.imdecode(s).asnumpy()
        label = int(header.label)
        
        # Convert to PIL and apply transforms
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [5]:
def find_overlapping_identities(casia_dataset, lfw_dataset):
    """Find identities that overlap between CASIA WebFace and LFW"""
    casia_identities = set()
    lfw_identities = set()
    
    # Extract all CASIA identity names
    for idx in tqdm(range(len(casia_dataset)), desc="Scanning CASIA WebFace"):
        try:
            _, label = casia_dataset[idx]
            casia_identities.add(label)
        except Exception as e:
            print(f"Error processing CASIA index {idx}: {e}")
    
    # Extract all LFW identity names
    for sample in tqdm(lfw_dataset, desc="Scanning LFW"):
        lfw_identities.add(sample.ground_truth.label)
    
    # Find overlap
    overlapping = casia_identities.intersection(lfw_identities)
    print(f"Found {len(overlapping)} overlapping identities")
    return overlapping

class FilteredCASIADataset(Dataset):
    """Wrapper dataset that filters out overlapping identities"""
    def __init__(self, base_dataset, excluded_labels):
        self.base_dataset = base_dataset
        self.excluded_labels = set(excluded_labels)
        
        # Pre-filter valid indices
        self.valid_indices = []
        for idx in tqdm(range(len(self.base_dataset)), desc="Filtering dataset"):
            try:
                _, label = self.base_dataset[idx]
                if label not in self.excluded_labels:
                    self.valid_indices.append(idx)
            except Exception as e:
                print(f"Error filtering at index {idx}: {e}")
        
        print(f"Kept {len(self.valid_indices)} out of {len(self.base_dataset)} samples")
    
    def __len__(self):
        return len(self.valid_indices)
    
    def __getitem__(self, idx):
        return self.base_dataset[self.valid_indices[idx]]

In [6]:
class CustomNormalize:
    def __call__(self, img):
        # Convert PIL image to tensor
        img = transforms.ToTensor()(img)
        # Subtract 128 and divide by 128
        img = (img * 255.0 - 128.0) / 128.0
        return img

train_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(45),  # Randomly rotate the image by up to 10 degrees
    transforms.RandomVerticalFlip(),  # Randomly flip the image vertically
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change brightness, contrast, saturation, and hue
    CustomNormalize()
])

test_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    CustomNormalize()
])

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
import time
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
from tqdm.notebook import tqdm, trange
from PIL import Image
import matplotlib.pyplot as plt
import torchsummary
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import os
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
# import resnet18 model from pytorch
from torchvision.models import resnet18
from torch.utils.tensorboard import SummaryWriter
import mxnet as mx
from mxnet import recordio
import torch.multiprocessing as mp
from sklearn.model_selection import train_test_split
from collections import defaultdict

In [8]:
# data_iter = mx.image.ImageIter(
#     batch_size=4,
#     data_shape=(3, 112, 96),
#     path_imgrec="./faces_webface_112x112/train.rec",
#     path_imgidx="./faces_webface_112x112/train.idx",
# )
# data_iter.reset()
# for j in range(4):
#     batch = data_iter.next()
#     data = batch.data[0]
#     # print(batch)
#     label = batch.label[0].asnumpy()
#     for i in range(4):
#         ax = plt.subplot(1, 4, i + 1)
#         plt.imshow(data[i].asnumpy().astype(np.uint8).transpose((1, 2, 0)))
#         ax.set_title("class: " + str(label[i]))
#         plt.axis("off")
#     plt.show()

# # ======= Code to show single image =======#
# path_imgrec = "./faces_webface_112x112/train.rec"
# path_imgidx = "./faces_webface_112x112/train.idx"
# imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
# # %% 1 ~ 409623
# # for i in range(409623):
# for i in range(10):
#     header, s = recordio.unpack(imgrec.read_idx(i + 1))
#     img = mx.image.imdecode(s).asnumpy()
#     plt.imshow(img)
#     plt.title("id=" + str(i) + "label=" + str(header.label))
#     plt.pause(0.1)

# see how many identities are there in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
ids = []
for i in range(409623):
    header, _ = recordio.unpack(imgrec.read_idx(i + 1))
    ids.append(header.label)
print(len(set(ids)))

# show how many images are in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
count = 0
for i in range(409623):
    count += 1
print(count)

8076


409623


In [9]:
DIM = (112, 96)
BS = 512

In [10]:
class CASIAWebFaceDataset(Dataset):
    def __init__(self, rec_path, idx_path, property_path, transform=None):
        self.transform = transform
        self.imgrec = recordio.MXIndexedRecordIO(idx_path, rec_path, 'r')
        
        # Read property file for dataset metadata
        with open(property_path, "r") as f:
            property_str = f.read().strip()
            self.num_classes, self.img_height, self.img_width = map(int, property_str.split(','))
        
        # Parse class boundaries from idx file
        self.class_boundaries = {}
        with open(idx_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 2:
                    class_id, start_idx = int(parts[0]), int(parts[1])
                    self.class_boundaries[class_id] = start_idx
        
        self.class_ids = sorted(list(self.class_boundaries.keys()))
        
        # Calculate total number of samples
        self.total_samples = 0
        for i in range(len(self.class_ids)-1):
            class_id = self.class_ids[i]
            next_class_id = self.class_ids[i+1]
            class_size = self.class_boundaries[next_class_id] - self.class_boundaries[class_id]
            self.total_samples += class_size
        
        # Add samples from the last class
        last_class_id = self.class_ids[-1]
        # Assuming we know the total number of records from property file
        self.total_samples += 10572
    
    def __len__(self):
        return self.total_samples
    
    def __getitem__(self, idx):
        # Map global index to class ID and local index
        actual_idx = idx + 1  # MXNet indices start from 1
        
        # Read record
        header, s = recordio.unpack(self.imgrec.read_idx(actual_idx))
        img = mx.image.imdecode(s).asnumpy()
        label = int(header.label)
        
        # Convert to PIL and apply transforms
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [11]:
def find_overlapping_identities(casia_dataset, lfw_dataset):
    """Find identities that overlap between CASIA WebFace and LFW"""
    casia_identities = set()
    lfw_identities = set()
    
    # Extract all CASIA identity names
    for idx in tqdm(range(len(casia_dataset)), desc="Scanning CASIA WebFace"):
        try:
            _, label = casia_dataset[idx]
            casia_identities.add(label)
        except Exception as e:
            print(f"Error processing CASIA index {idx}: {e}")
    
    # Extract all LFW identity names
    for sample in tqdm(lfw_dataset, desc="Scanning LFW"):
        lfw_identities.add(sample.ground_truth.label)
    
    # Find overlap
    overlapping = casia_identities.intersection(lfw_identities)
    print(f"Found {len(overlapping)} overlapping identities")
    return overlapping

class FilteredCASIADataset(Dataset):
    """Wrapper dataset that filters out overlapping identities"""
    def __init__(self, base_dataset, excluded_labels):
        self.base_dataset = base_dataset
        self.excluded_labels = set(excluded_labels)
        
        # Pre-filter valid indices
        self.valid_indices = []
        for idx in tqdm(range(len(self.base_dataset)), desc="Filtering dataset"):
            try:
                _, label = self.base_dataset[idx]
                if label not in self.excluded_labels:
                    self.valid_indices.append(idx)
            except Exception as e:
                print(f"Error filtering at index {idx}: {e}")
        
        print(f"Kept {len(self.valid_indices)} out of {len(self.base_dataset)} samples")
    
    def __len__(self):
        return len(self.valid_indices)
    
    def __getitem__(self, idx):
        return self.base_dataset[self.valid_indices[idx]]

In [12]:
class CustomNormalize:
    def __call__(self, img):
        # Convert PIL image to tensor
        img = transforms.ToTensor()(img)
        # Subtract 128 and divide by 128
        img = (img * 255.0 - 128.0) / 128.0
        return img

train_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(45),  # Randomly rotate the image by up to 10 degrees
    transforms.RandomVerticalFlip(),  # Randomly flip the image vertically
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change brightness, contrast, saturation, and hue
    CustomNormalize()
])

test_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    CustomNormalize()
])

In [13]:
class AMSoftmax(nn.Module):

    def __init__(self,
                 in_feats,
                 n_classes=10572,
                 m=0.35,
                 s=30):
        super(AMSoftmax, self).__init__()
        self.m = m
        self.s = s
        self.in_feats = in_feats
        self.W = torch.nn.Parameter(torch.randn(in_feats, n_classes), requires_grad=True)
        self.ce = nn.CrossEntropyLoss()
        nn.init.xavier_normal_(self.W, gain=1)

    def forward(self, x, lb):
        assert x.size()[0] == lb.size()[0]
        assert x.size()[1] == self.in_feats
        x_norm = torch.norm(x, p=2, dim=1, keepdim=True).clamp(min=1e-9)
        x_norm = torch.div(x, x_norm)
        w_norm = torch.norm(self.W, p=2, dim=0, keepdim=True).clamp(min=1e-9)
        w_norm = torch.div(self.W, w_norm)
        costh = torch.mm(x_norm, w_norm)
        delt_costh = torch.zeros_like(costh).scatter_(1, lb.unsqueeze(1), self.m)
        costh_m = costh - delt_costh
        costh_m_s = self.s * costh_m
        loss = self.ce(costh_m_s, lb)
        return loss

In [14]:
# class AMSoftmax(nn.Module):
#     '''
#     The am softmax as seen on https://arxiv.org/pdf/1801.05599.pdf,

#         in_features: size of the embedding, eg. 512
#         n_classes: number of classes on the classification task
#         s: s parameter of loss, standard = 30.
#         m: m parameter of loss, standard = 0.4, best between 0.35 and 0.4 according to paper.

#         *inputs: tensor shaped (batch_size X embedding_size)
#         output : tensor shaped (batch_size X n_classes) AM_softmax logits for NLL_loss.

#     '''
#     def __init__(self, in_features, n_classes, s=30, m=0.4):
#         super(AMSoftmax, self).__init__()
#         self.linear = nn.Linear(in_features, n_classes, bias=False)
#         self.s = s
#         self.m = m

#     def forward(self, *inputs):
#         x_vector = F.normalize(inputs[0], p=2, dim=-1)
#         self.linear.weight.data = F.normalize(self.linear.weight.data, p=2, dim=-1)
#         logits = self.linear(x_vector)
#         scaled_logits = (logits - self.m)*self.s
#         return  scaled_logits - self._am_logsumexp(logits)

#     def _am_logsumexp(self, logits):
#         '''
#         logsumexp designed for am_softmax, the computation is numerically stable

#         '''
#         max_x = torch.max(logits, dim=-1)[0].unsqueeze(-1)
#         term1 = (self.s*(logits - (max_x + self.m))).exp()
#         term2 = (self.s * (logits - max_x)).exp().sum(-1).unsqueeze(-1) \
#                 - (self.s * (logits - max_x)).exp()
#         return self.s*max_x + (term2 + term1).log()
    


class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

class ResNet18(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(BasicBlock, 64, 2, stride=1)
        self.layer2 = self._make_layer(BasicBlock, 128, 2, stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, 2, stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, 2, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


# How can I modify the model to output embeddings of size 128?
# 1. Create a new model that outputs embeddings
# 2. Modify the last layer of the model to output embeddings
# 3. Use a hook to extract embeddings from the model
# 4. Use a custom loss function to train the model

class EmbeddingResNet18(nn.Module):
    def __init__(self, embedding_size=128, dropout=0.5):
        super(EmbeddingResNet18, self).__init__()
        self.resnet = resnet18(weights=None)
        self.resnet.fc = nn.Sequential(
            nn.Linear(512, embedding_size),
            nn.BatchNorm1d(embedding_size),
            nn.ReLU(),
            nn.Dropout(dropout)
        )
    
    def forward(self, x):
        return self.resnet(x)


In [15]:
# model = EmbeddingResNet18()
# model = model.to("cuda")
# torchsummary.summary(model, (3, 112, 96))

# model = ResNet18(num_classes=128)
# model = model.to("cuda")
# torchsummary.summary(model, (3, 112, 96))

In [16]:
def test_classifier(model, classifier, data_loader, device, message):
    model.eval()
    classifier.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(data_loader, desc="Testing"):
            images, labels = images.to(device), labels.to(device)
            embeddings = model(images)  # Extract features
            logits = classifier(embeddings)  # Compute AMSoftmax logits
            predictions = torch.argmax(logits, dim=1)  # Get class with max probability
            
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total * 100
    print(f"✅ Classification Accuracy for {message}: {accuracy:.2f}%")
    return accuracy

In [17]:
def test_classifier_cosine_similarity(model, classifier, data_loader, device, message):
    model.eval()
    classifier.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(data_loader, desc="Testing"):
            images, labels = images.to(device), labels.to(device)
            embeddings = model(images)  # Extract features
            embeddings = F.normalize(embeddings, p=2, dim=1)
            weights = F.normalize(classifier.W, p=2, dim=0)
            logits = torch.matmul(embeddings, weights)
            predictions = torch.argmax(logits, dim=1)

            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total * 100


    print(f"✅ Classification Accuracy for {message}: {accuracy:.2f}%")
    return accuracy

In [18]:
def train_AMSoftmax(model: nn.Module, classifier: nn.Module, data_loader: DataLoader, val_loader: DataLoader,
                    optimizer: optim.Optimizer, scheduler: optim.lr_scheduler, 
                    criterion: nn.Module, epochs: int, device: torch.device, 
                    retain_graph: bool):

    train_losses = []
    classifier.eval()
    tmstmp = time.strftime("%Y%m%d-%H%M%S")
    best_loss = np.inf
    vacc = 0

    # tmstmp = "20250313-074759"

    log_dir = f"runs/112x96_ResNet18_AMSoftmax_{tmstmp}"
    writer = SummaryWriter(log_dir=log_dir)

    print(f"Started Training at {tmstmp}")
    
    for e, epoch in enumerate(tqdm(range(epochs), desc="Epochs")):
        model.train()
        running_loss = 0.0
        for i, (images, labels) in enumerate(tqdm(data_loader, desc="Batches")):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            embeddings = model(images)  # Extract embeddings from model
            # logits = classifier(embeddings)  # Compute AMSoftmax logits
            # loss = criterion(logits, labels)  # Compute NLL loss
            loss = classifier(embeddings, labels)

            loss.backward(retain_graph=retain_graph)
            optimizer.step()
            running_loss += loss.item()
            # print(f"Batch {i+1}/{len(data_loader)} - Loss: {loss.item():.4f}")

        scheduler.step()
        avg_loss = running_loss / len(data_loader)
        train_losses.append(avg_loss)

        # Validation
        model.eval()
        running_loss = 0.0
        for i, (images, labels) in enumerate(tqdm(val_loader, desc="Validation")):
            images, labels = images.to(device), labels.to(device)
            embeddings = model(images)
            # logits = classifier(embeddings)
            # loss = criterion(logits, labels)
            loss = classifier(embeddings, labels)
            running_loss += loss.item()
            # print(f"Batch {i+1}/{len(val_loader)} - Loss: {loss.item():.4f}")


        # Log per epoch
        writer.add_scalar('Loss/train', avg_loss, epoch)
        writer.add_scalar('Loss/val', running_loss / len(val_loader), epoch)
        # if (e+1) % 10 == 0:
        #     train_acc = test_classifier_cosine_similarity(model, classifier, data_loader, device, "Training")
        #     val_acc = test_classifier_cosine_similarity(model, classifier, val_loader, device, "Validation")
        #     writer.add_scalar('Accuracy/train', train_acc, epoch)
        #     writer.add_scalar('Accuracy/val', val_acc, epoch)
        #     if val_acc > vacc:
        #         vacc = val_acc
        #         torch.save(model.state_dict(), f"{log_dir}/112x96_ResNet18_AMSoftmax_validation_{tmstmp}.pt")
        #         print(f"Saved best model with validation accuracy {vacc}")
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")
        # Save Best Model
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), f"{log_dir}/112x96_ResNet18_AMSoftmax_{tmstmp}.pt")
            print(f"Saved best model with loss: {best_loss:.4f}")

    print(f"Finished Training at {time.strftime('%Y%m%d-%H%M%S')} with best validation accuracy {vacc:.4f}")
    writer.close()

    return model, train_losses, tmstmp

In [19]:
# file_path = '/home/ichitu/py-files/label_to_indices.txt'
# if os.path.exists(file_path):
#     print(f"File exists, size: {os.path.getsize(file_path)} bytes")
    
#     # Preview file content
#     with open(file_path, 'r') as f:
#         # print size of file
#         print(f.read())
# else:
#     print("File doesn't exist")

In [20]:
def train_on_casia_webface(embedding_model, classifier, device, device_ids):
    # Set up paths
    rec_path = "/home/ichitu/py-files/faces_webface_112x112/train.rec"
    idx_path = "/home/ichitu/py-files/faces_webface_112x112/train.idx"
    property_path = "/home/ichitu/py-files/faces_webface_112x112/property"

    
    # Load datasets
    print("Loading CASIA WebFace dataset...")
    casia_dataset = CASIAWebFaceDataset(
        rec_path=rec_path,
        idx_path=idx_path,
        property_path=property_path,
        transform=train_preprocess
    )

    train_idx, test_idx = train_test_split(range(len(casia_dataset)), test_size=0.15, random_state=42)
    train_dataset = torch.utils.data.Subset(casia_dataset, train_idx)
    test_dataset = torch.utils.data.Subset(casia_dataset, test_idx)
    val_idx, test_idx = train_test_split(test_idx, test_size=0.7, random_state=42)
    val_dataset = torch.utils.data.Subset(casia_dataset, val_idx)
    test_dataset = torch.utils.data.Subset(casia_dataset, test_idx)

    print(len(train_dataset))
    print(len(val_dataset))
    print(len(test_dataset))

    

    print(len(casia_dataset))
    
    # print("Loading LFW dataset...")
    # lfw_dataset = foz.load_zoo_dataset("lfw")
    
    # Find and filter overlapping identities
    # print("Finding overlapping identities...")
    # overlapping_ids = find_overlapping_identities(casia_dataset, lfw_dataset)
    
    # print("Creating filtered dataset...")
    # filtered_dataset = FilteredCASIADataset(casia_dataset, overlapping_ids)
    # filtered_dataset = casia_dataset
    
    # Create data loader
    # train_loader = DataLoader(
    #     train_dataset, 
    #     batch_size=BS * len(device_ids),
    #     shuffle=True, 
    #     num_workers=2,
    #     pin_memory=True
    # )

    # val_loader = DataLoader(
    #     val_dataset, 
    #     batch_size=BS * len(device_ids),
    #     shuffle=True, 
    #     num_workers=2,
    #     pin_memory=True
    # )

    # test_loader = DataLoader(
    #     test_dataset, 
    #     batch_size=BS * len(device_ids),
    #     shuffle=True, 
    #     num_workers=2,
    #     pin_memory=True
    # )

    train_loader = DataLoader(
        casia_dataset,
        batch_size=BS * len(device_ids),
        num_workers=4,
        sampler=torch.utils.data.SubsetRandomSampler(train_idx),
        pin_memory=True
    )

    val_loader = DataLoader(
        CASIAWebFaceDataset(
            rec_path=rec_path,
            idx_path=idx_path,
            property_path=property_path,
            transform=test_preprocess
        ),
        batch_size=BS * len(device_ids),
        num_workers=2,
        sampler=torch.utils.data.SubsetRandomSampler(val_idx),
        pin_memory=True
    )

    test_loader = DataLoader(
        CASIAWebFaceDataset(
            rec_path=rec_path,
            idx_path=idx_path,
            property_path=property_path,
            transform=test_preprocess
        ),
        batch_size=BS * len(device_ids),
        num_workers=2,
        sampler=torch.utils.data.SubsetRandomSampler(test_idx),
        pin_memory=True
    )
    
    # Set up model
    print("Setting up model...")
    
    # Set up optimizer (include both models' parameters)
    optimizer = optim.SGD(
        embedding_model.parameters(),
        lr=0.3,
        momentum=0.9,
        weight_decay=5e-4
    )
    # optimizer = optim.Adam(
    #     embedding_model.parameters(),
    #     lr=0.01,
    #     weight_decay=5e-4
    # )
    
    # Set up scheduler
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
    # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=300, eta_min=1e-6)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 
                                               milestones=[100, 150, 200, 250, 300],
                                               gamma=0.1)
    
    # Set up loss
    criterion = nn.NLLLoss()

    device = torch.device("cuda:{}".format(device_ids[1][0]) if torch.cuda.is_available() else "cpu")
    
    # Train
    print("Starting training...")
    model, losses, timestamp = train_AMSoftmax(
        model=embedding_model,
        classifier=classifier,
        data_loader=train_loader,
        val_loader=val_loader,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=criterion,
        epochs=400,  # Adjust as needed
        device=device,
        retain_graph=False,
    )

    print("Testing model...")
    # test_acc = test_classifier(embedding_model, classifier, test_loader, device, "Test")
    test_acc = 0
    
    return model, losses, timestamp, test_acc

In [21]:
# assert 1==2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device_ids = [[3],[3]]
classifier = AMSoftmax(512, 10572)
# embedding_model = EmbeddingResNet18(128, 0.5).to(device)
embedding_model = ResNet18(512)

if torch.cuda.device_count() > 1:
    print(f"Avaible {torch.cuda.device_count()} GPUs and using {device_ids}")
    embedding_model = nn.DataParallel(embedding_model, device_ids=device_ids[0])
    classifier = nn.DataParallel(classifier, device_ids=device_ids[1])

# embedding_model = embedding_model.to(device)
# classifier = classifier.to(device)
# embedding_model.load_state_dict(torch.load("Models-pt/112x96_ResNet18_AMSoftmax_20250313-074759.pt"))

embedding_model, train_losses, tmstmp, test_acc = train_on_casia_webface(embedding_model, classifier, device, device_ids)


Avaible 4 GPUs and using [[3], [3]]
Loading CASIA WebFace dataset...


368434
19505
45513
433452


Setting up model...
Starting training...
Started Training at 20250323-170553


Epochs:   0%|          | 0/400 [00:00<?, ?it/s]

Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 1/400 - Loss: 20.0403
Saved best model with loss: 20.0403


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 2/400 - Loss: 19.8236
Saved best model with loss: 19.8236


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 3/400 - Loss: 19.5467
Saved best model with loss: 19.5467


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 4/400 - Loss: 19.2337
Saved best model with loss: 19.2337


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 5/400 - Loss: 18.9165
Saved best model with loss: 18.9165


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 6/400 - Loss: 18.6122
Saved best model with loss: 18.6122


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 7/400 - Loss: 18.3361
Saved best model with loss: 18.3361


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 8/400 - Loss: 18.0862
Saved best model with loss: 18.0862


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 9/400 - Loss: 17.8768
Saved best model with loss: 17.8768


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 10/400 - Loss: 17.6870
Saved best model with loss: 17.6870


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 11/400 - Loss: 17.5195
Saved best model with loss: 17.5195


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 12/400 - Loss: 17.3884
Saved best model with loss: 17.3884


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 13/400 - Loss: 17.2546
Saved best model with loss: 17.2546


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 14/400 - Loss: 17.1520
Saved best model with loss: 17.1520


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 15/400 - Loss: 17.0568
Saved best model with loss: 17.0568


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 16/400 - Loss: 16.9792
Saved best model with loss: 16.9792


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 17/400 - Loss: 16.9097
Saved best model with loss: 16.9097


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 18/400 - Loss: 16.8465
Saved best model with loss: 16.8465


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 19/400 - Loss: 16.7817
Saved best model with loss: 16.7817


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 20/400 - Loss: 16.7422
Saved best model with loss: 16.7422


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 21/400 - Loss: 16.7002
Saved best model with loss: 16.7002


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 22/400 - Loss: 16.6623
Saved best model with loss: 16.6623


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 23/400 - Loss: 16.6123
Saved best model with loss: 16.6123


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 24/400 - Loss: 16.5841
Saved best model with loss: 16.5841


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 25/400 - Loss: 16.5532
Saved best model with loss: 16.5532


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 26/400 - Loss: 16.5299
Saved best model with loss: 16.5299


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 27/400 - Loss: 16.4962
Saved best model with loss: 16.4962


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 28/400 - Loss: 16.4725
Saved best model with loss: 16.4725


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 29/400 - Loss: 16.4381
Saved best model with loss: 16.4381


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 30/400 - Loss: 16.4203
Saved best model with loss: 16.4203


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 31/400 - Loss: 16.3949
Saved best model with loss: 16.3949


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 32/400 - Loss: 16.3835
Saved best model with loss: 16.3835


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 33/400 - Loss: 16.3714
Saved best model with loss: 16.3714


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 34/400 - Loss: 16.3438
Saved best model with loss: 16.3438


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 35/400 - Loss: 16.3126
Saved best model with loss: 16.3126


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 36/400 - Loss: 16.3134


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 37/400 - Loss: 16.2933
Saved best model with loss: 16.2933


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 38/400 - Loss: 16.2748
Saved best model with loss: 16.2748


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 39/400 - Loss: 16.2580
Saved best model with loss: 16.2580


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 40/400 - Loss: 16.2483
Saved best model with loss: 16.2483


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 41/400 - Loss: 16.2403
Saved best model with loss: 16.2403


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 42/400 - Loss: 16.2220
Saved best model with loss: 16.2220


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 43/400 - Loss: 16.2163
Saved best model with loss: 16.2163


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 44/400 - Loss: 16.2129
Saved best model with loss: 16.2129


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 45/400 - Loss: 16.1821
Saved best model with loss: 16.1821


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 46/400 - Loss: 16.1924


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 47/400 - Loss: 16.1833


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 48/400 - Loss: 16.1729
Saved best model with loss: 16.1729


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 49/400 - Loss: 16.1541
Saved best model with loss: 16.1541


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 50/400 - Loss: 16.1508
Saved best model with loss: 16.1508


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 51/400 - Loss: 16.1517


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 52/400 - Loss: 16.1320
Saved best model with loss: 16.1320


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 53/400 - Loss: 16.1335


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 54/400 - Loss: 16.1154
Saved best model with loss: 16.1154


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 55/400 - Loss: 16.1118
Saved best model with loss: 16.1118


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 56/400 - Loss: 16.0993
Saved best model with loss: 16.0993


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 57/400 - Loss: 16.1036


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 58/400 - Loss: 16.0987
Saved best model with loss: 16.0987


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 59/400 - Loss: 16.0925
Saved best model with loss: 16.0925


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 60/400 - Loss: 16.0771
Saved best model with loss: 16.0771


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 61/400 - Loss: 16.0811


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 62/400 - Loss: 16.0801


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 63/400 - Loss: 16.0618
Saved best model with loss: 16.0618


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 64/400 - Loss: 16.0550
Saved best model with loss: 16.0550


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 65/400 - Loss: 16.0571


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 66/400 - Loss: 16.0559


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 67/400 - Loss: 16.0281


Saved best model with loss: 16.0281


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 68/400 - Loss: 16.0441


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 69/400 - Loss: 16.0372


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 70/400 - Loss: 16.0219
Saved best model with loss: 16.0219


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 71/400 - Loss: 16.0152
Saved best model with loss: 16.0152


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 72/400 - Loss: 16.0178


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 73/400 - Loss: 16.0086
Saved best model with loss: 16.0086


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 74/400 - Loss: 16.0086


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 75/400 - Loss: 16.0096


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 76/400 - Loss: 16.0088


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 77/400 - Loss: 15.9881
Saved best model with loss: 15.9881


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 78/400 - Loss: 16.0005


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 79/400 - Loss: 16.0062


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 80/400 - Loss: 15.9858
Saved best model with loss: 15.9858


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 81/400 - Loss: 15.9913


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 82/400 - Loss: 15.9880


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 83/400 - Loss: 15.9798
Saved best model with loss: 15.9798


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 84/400 - Loss: 15.9814


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 85/400 - Loss: 15.9799


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 86/400 - Loss: 15.9653
Saved best model with loss: 15.9653


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 87/400 - Loss: 15.9639
Saved best model with loss: 15.9639


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 88/400 - Loss: 15.9720


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 89/400 - Loss: 15.9521
Saved best model with loss: 15.9521


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 90/400 - Loss: 15.9570


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 91/400 - Loss: 15.9533


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 92/400 - Loss: 15.9392
Saved best model with loss: 15.9392


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 93/400 - Loss: 15.9373
Saved best model with loss: 15.9373


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 94/400 - Loss: 15.9485


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 95/400 - Loss: 15.9406


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 96/400 - Loss: 15.9343
Saved best model with loss: 15.9343


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 97/400 - Loss: 15.9252
Saved best model with loss: 15.9252


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 98/400 - Loss: 15.9224
Saved best model with loss: 15.9224


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 99/400 - Loss: 15.9173
Saved best model with loss: 15.9173


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 100/400 - Loss: 15.9422


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 101/400 - Loss: 14.1802
Saved best model with loss: 14.1802


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 102/400 - Loss: 13.5224
Saved best model with loss: 13.5224


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 103/400 - Loss: 13.2075
Saved best model with loss: 13.2075


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 104/400 - Loss: 12.9905
Saved best model with loss: 12.9905


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 105/400 - Loss: 12.8080


Saved best model with loss: 12.8080


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 106/400 - Loss: 12.6652
Saved best model with loss: 12.6652


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 107/400 - Loss: 12.5286
Saved best model with loss: 12.5286


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 108/400 - Loss: 12.4179
Saved best model with loss: 12.4179


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 109/400 - Loss: 12.3095
Saved best model with loss: 12.3095


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 110/400 - Loss: 12.2141
Saved best model with loss: 12.2141


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 111/400 - Loss: 12.1117
Saved best model with loss: 12.1117


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 112/400 - Loss: 12.0197
Saved best model with loss: 12.0197


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 113/400 - Loss: 11.9276
Saved best model with loss: 11.9276


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 114/400 - Loss: 11.8324
Saved best model with loss: 11.8324


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 115/400 - Loss: 11.7454
Saved best model with loss: 11.7454


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 116/400 - Loss: 11.6419
Saved best model with loss: 11.6419


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 117/400 - Loss: 11.5288
Saved best model with loss: 11.5288


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 118/400 - Loss: 11.4439
Saved best model with loss: 11.4439


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 119/400 - Loss: 11.3418
Saved best model with loss: 11.3418


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 120/400 - Loss: 11.2412
Saved best model with loss: 11.2412


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 121/400 - Loss: 11.1587
Saved best model with loss: 11.1587


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 122/400 - Loss: 11.0604
Saved best model with loss: 11.0604


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 123/400 - Loss: 10.9624
Saved best model with loss: 10.9624


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 124/400 - Loss: 10.8740
Saved best model with loss: 10.8740


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 125/400 - Loss: 10.7839
Saved best model with loss: 10.7839


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 126/400 - Loss: 10.6906
Saved best model with loss: 10.6906


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 127/400 - Loss: 10.6148
Saved best model with loss: 10.6148


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 128/400 - Loss: 10.5454
Saved best model with loss: 10.5454


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 129/400 - Loss: 10.4691
Saved best model with loss: 10.4691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 130/400 - Loss: 10.3894
Saved best model with loss: 10.3894


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 131/400 - Loss: 10.3036
Saved best model with loss: 10.3036


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 132/400 - Loss: 10.2343
Saved best model with loss: 10.2343


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 133/400 - Loss: 10.1668
Saved best model with loss: 10.1668


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 134/400 - Loss: 10.1140
Saved best model with loss: 10.1140


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 135/400 - Loss: 10.0426
Saved best model with loss: 10.0426


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 136/400 - Loss: 9.9847
Saved best model with loss: 9.9847


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 137/400 - Loss: 9.9154
Saved best model with loss: 9.9154


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 138/400 - Loss: 9.8725
Saved best model with loss: 9.8725


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 139/400 - Loss: 9.7892
Saved best model with loss: 9.7892


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 140/400 - Loss: 9.7400
Saved best model with loss: 9.7400


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 141/400 - Loss: 9.6996
Saved best model with loss: 9.6996


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 142/400 - Loss: 9.6403
Saved best model with loss: 9.6403


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 143/400 - Loss: 9.6106
Saved best model with loss: 9.6106


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 144/400 - Loss: 9.5504
Saved best model with loss: 9.5504


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 145/400 - Loss: 9.5010
Saved best model with loss: 9.5010


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 146/400 - Loss: 9.4589
Saved best model with loss: 9.4589


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 147/400 - Loss: 9.4185
Saved best model with loss: 9.4185


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 148/400 - Loss: 9.3681
Saved best model with loss: 9.3681


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 149/400 - Loss: 9.3325
Saved best model with loss: 9.3325


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 150/400 - Loss: 9.3031
Saved best model with loss: 9.3031


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 151/400 - Loss: 7.9008
Saved best model with loss: 7.9008


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 152/400 - Loss: 7.4837
Saved best model with loss: 7.4837


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 153/400 - Loss: 7.3055
Saved best model with loss: 7.3055


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 154/400 - Loss: 7.1862
Saved best model with loss: 7.1862


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 155/400 - Loss: 7.0850
Saved best model with loss: 7.0850


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 156/400 - Loss: 7.0071


Saved best model with loss: 7.0071


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 157/400 - Loss: 6.9270
Saved best model with loss: 6.9270


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 158/400 - Loss: 6.8654
Saved best model with loss: 6.8654


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 159/400 - Loss: 6.8069
Saved best model with loss: 6.8069


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 160/400 - Loss: 6.7528
Saved best model with loss: 6.7528


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 161/400 - Loss: 6.6961
Saved best model with loss: 6.6961


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 162/400 - Loss: 6.6522
Saved best model with loss: 6.6522


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 163/400 - Loss: 6.6049
Saved best model with loss: 6.6049


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 164/400 - Loss: 6.5616
Saved best model with loss: 6.5616


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 165/400 - Loss: 6.5155
Saved best model with loss: 6.5155


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 166/400 - Loss: 6.4761
Saved best model with loss: 6.4761


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 167/400 - Loss: 6.4395
Saved best model with loss: 6.4395


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 168/400 - Loss: 6.3999
Saved best model with loss: 6.3999


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 169/400 - Loss: 6.3660
Saved best model with loss: 6.3660


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 170/400 - Loss: 6.3272
Saved best model with loss: 6.3272


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 171/400 - Loss: 6.2984
Saved best model with loss: 6.2984


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 172/400 - Loss: 6.2651
Saved best model with loss: 6.2651


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 173/400 - Loss: 6.2319
Saved best model with loss: 6.2319


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 174/400 - Loss: 6.2018
Saved best model with loss: 6.2018


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 175/400 - Loss: 6.1686
Saved best model with loss: 6.1686


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 176/400 - Loss: 6.1365
Saved best model with loss: 6.1365


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 177/400 - Loss: 6.1046
Saved best model with loss: 6.1046


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 178/400 - Loss: 6.0776
Saved best model with loss: 6.0776


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 179/400 - Loss: 6.0509
Saved best model with loss: 6.0509


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 180/400 - Loss: 6.0214
Saved best model with loss: 6.0214


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 181/400 - Loss: 5.9967
Saved best model with loss: 5.9967


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 182/400 - Loss: 5.9686
Saved best model with loss: 5.9686


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 183/400 - Loss: 5.9455
Saved best model with loss: 5.9455


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 184/400 - Loss: 5.9175
Saved best model with loss: 5.9175


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 185/400 - Loss: 5.8873
Saved best model with loss: 5.8873


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 186/400 - Loss: 5.8652
Saved best model with loss: 5.8652


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 187/400 - Loss: 5.8423
Saved best model with loss: 5.8423


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 188/400 - Loss: 5.8215
Saved best model with loss: 5.8215


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 189/400 - Loss: 5.7967
Saved best model with loss: 5.7967


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 190/400 - Loss: 5.7683
Saved best model with loss: 5.7683


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 191/400 - Loss: 5.7439
Saved best model with loss: 5.7439


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 192/400 - Loss: 5.7261
Saved best model with loss: 5.7261


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 193/400 - Loss: 5.7091
Saved best model with loss: 5.7091


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 194/400 - Loss: 5.6817
Saved best model with loss: 5.6817


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 195/400 - Loss: 5.6635
Saved best model with loss: 5.6635


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 196/400 - Loss: 5.6433
Saved best model with loss: 5.6433


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 197/400 - Loss: 5.6244
Saved best model with loss: 5.6244


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 198/400 - Loss: 5.6033
Saved best model with loss: 5.6033


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 199/400 - Loss: 5.5848
Saved best model with loss: 5.5848


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 200/400 - Loss: 5.5600
Saved best model with loss: 5.5600


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 201/400 - Loss: 5.2829
Saved best model with loss: 5.2829


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 202/400 - Loss: 5.2165
Saved best model with loss: 5.2165


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 203/400 - Loss: 5.1886
Saved best model with loss: 5.1886


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 204/400 - Loss: 5.1734
Saved best model with loss: 5.1734


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 205/400 - Loss: 5.1578
Saved best model with loss: 5.1578


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 206/400 - Loss: 5.1458
Saved best model with loss: 5.1458


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 207/400 - Loss: 5.1373
Saved best model with loss: 5.1373


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 208/400 - Loss: 5.1287
Saved best model with loss: 5.1287


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 209/400 - Loss: 5.1201
Saved best model with loss: 5.1201


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 210/400 - Loss: 5.1156
Saved best model with loss: 5.1156


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 211/400 - Loss: 5.1044
Saved best model with loss: 5.1044


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 212/400 - Loss: 5.0967
Saved best model with loss: 5.0967


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 213/400 - Loss: 5.0932
Saved best model with loss: 5.0932


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 214/400 - Loss: 5.0854
Saved best model with loss: 5.0854


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 215/400 - Loss: 5.0805
Saved best model with loss: 5.0805


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 216/400 - Loss: 5.0775
Saved best model with loss: 5.0775


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 217/400 - Loss: 5.0704
Saved best model with loss: 5.0704


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 218/400 - Loss: 5.0630
Saved best model with loss: 5.0630


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 219/400 - Loss: 5.0614
Saved best model with loss: 5.0614


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 220/400 - Loss: 5.0532
Saved best model with loss: 5.0532


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 221/400 - Loss: 5.0467
Saved best model with loss: 5.0467


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 222/400 - Loss: 5.0414
Saved best model with loss: 5.0414


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 223/400 - Loss: 5.0373
Saved best model with loss: 5.0373


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 224/400 - Loss: 5.0323
Saved best model with loss: 5.0323


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 225/400 - Loss: 5.0242
Saved best model with loss: 5.0242


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 226/400 - Loss: 5.0233
Saved best model with loss: 5.0233


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 227/400 - Loss: 5.0220
Saved best model with loss: 5.0220


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 228/400 - Loss: 5.0124
Saved best model with loss: 5.0124


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 229/400 - Loss: 5.0092
Saved best model with loss: 5.0092


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 230/400 - Loss: 5.0075
Saved best model with loss: 5.0075


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 231/400 - Loss: 4.9994
Saved best model with loss: 4.9994


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 232/400 - Loss: 4.9967
Saved best model with loss: 4.9967


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 233/400 - Loss: 4.9900
Saved best model with loss: 4.9900


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 234/400 - Loss: 4.9874
Saved best model with loss: 4.9874


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 235/400 - Loss: 4.9836
Saved best model with loss: 4.9836


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 236/400 - Loss: 4.9792
Saved best model with loss: 4.9792


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 237/400 - Loss: 4.9758
Saved best model with loss: 4.9758


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 238/400 - Loss: 4.9705
Saved best model with loss: 4.9705


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 239/400 - Loss: 4.9637
Saved best model with loss: 4.9637


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 240/400 - Loss: 4.9645


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 241/400 - Loss: 4.9616
Saved best model with loss: 4.9616


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 242/400 - Loss: 4.9557
Saved best model with loss: 4.9557


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 243/400 - Loss: 4.9473
Saved best model with loss: 4.9473


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 244/400 - Loss: 4.9410
Saved best model with loss: 4.9410


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 245/400 - Loss: 4.9429


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 246/400 - Loss: 4.9420


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 247/400 - Loss: 4.9318
Saved best model with loss: 4.9318


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 248/400 - Loss: 4.9302
Saved best model with loss: 4.9302


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 249/400 - Loss: 4.9269
Saved best model with loss: 4.9269


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 250/400 - Loss: 4.9193
Saved best model with loss: 4.9193


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 251/400 - Loss: 4.8952
Saved best model with loss: 4.8952


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 252/400 - Loss: 4.8881
Saved best model with loss: 4.8881


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 253/400 - Loss: 4.8851
Saved best model with loss: 4.8851


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 254/400 - Loss: 4.8815
Saved best model with loss: 4.8815


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 255/400 - Loss: 4.8872


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 256/400 - Loss: 4.8864


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 257/400 - Loss: 4.8808
Saved best model with loss: 4.8808


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 258/400 - Loss: 4.8795
Saved best model with loss: 4.8795


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 259/400 - Loss: 4.8795
Saved best model with loss: 4.8795


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 260/400 - Loss: 4.8782
Saved best model with loss: 4.8782


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 261/400 - Loss: 4.8755
Saved best model with loss: 4.8755


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 262/400 - Loss: 4.8778


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 263/400 - Loss: 4.8765


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 264/400 - Loss: 4.8768


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 265/400 - Loss: 4.8788


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 266/400 - Loss: 4.8762


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 267/400 - Loss: 4.8742
Saved best model with loss: 4.8742


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 268/400 - Loss: 4.8730
Saved best model with loss: 4.8730


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 269/400 - Loss: 4.8766


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 270/400 - Loss: 4.8745


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 271/400 - Loss: 4.8729
Saved best model with loss: 4.8729


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 272/400 - Loss: 4.8745


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 273/400 - Loss: 4.8723
Saved best model with loss: 4.8723


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 274/400 - Loss: 4.8705
Saved best model with loss: 4.8705


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 275/400 - Loss: 4.8694
Saved best model with loss: 4.8694


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 276/400 - Loss: 4.8681
Saved best model with loss: 4.8681


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 277/400 - Loss: 4.8699


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 278/400 - Loss: 4.8636
Saved best model with loss: 4.8636


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 279/400 - Loss: 4.8702


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 280/400 - Loss: 4.8687


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 281/400 - Loss: 4.8636


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 282/400 - Loss: 4.8646


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 283/400 - Loss: 4.8676


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 284/400 - Loss: 4.8651


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 285/400 - Loss: 4.8625
Saved best model with loss: 4.8625


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 286/400 - Loss: 4.8666


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 287/400 - Loss: 4.8615
Saved best model with loss: 4.8615


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 288/400 - Loss: 4.8619


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 289/400 - Loss: 4.8631


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 290/400 - Loss: 4.8636


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 291/400 - Loss: 4.8630


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 292/400 - Loss: 4.8628


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 293/400 - Loss: 4.8595
Saved best model with loss: 4.8595


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 294/400 - Loss: 4.8588
Saved best model with loss: 4.8588


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 295/400 - Loss: 4.8570
Saved best model with loss: 4.8570


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 296/400 - Loss: 4.8582


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 297/400 - Loss: 4.8618


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 298/400 - Loss: 4.8636


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 299/400 - Loss: 4.8595


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 300/400 - Loss: 4.8576


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 301/400 - Loss: 4.8583


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 302/400 - Loss: 4.8537
Saved best model with loss: 4.8537


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 303/400 - Loss: 4.8554


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 304/400 - Loss: 4.8557


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 305/400 - Loss: 4.8558


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 306/400 - Loss: 4.8542


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 307/400 - Loss: 4.8532
Saved best model with loss: 4.8532


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 308/400 - Loss: 4.8541


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 309/400 - Loss: 4.8542


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 310/400 - Loss: 4.8524
Saved best model with loss: 4.8524


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 311/400 - Loss: 4.8543


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 312/400 - Loss: 4.8549


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 313/400 - Loss: 4.8560


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 314/400 - Loss: 4.8554


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 315/400 - Loss: 4.8523
Saved best model with loss: 4.8523


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 316/400 - Loss: 4.8544


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 317/400 - Loss: 4.8530


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 318/400 - Loss: 4.8534


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 319/400 - Loss: 4.8509
Saved best model with loss: 4.8509


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 320/400 - Loss: 4.8595


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 321/400 - Loss: 4.8538


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 322/400 - Loss: 4.8527


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 323/400 - Loss: 4.8529


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 324/400 - Loss: 4.8530


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 325/400 - Loss: 4.8522


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 326/400 - Loss: 4.8512


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 327/400 - Loss: 4.8528


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 328/400 - Loss: 4.8547


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 329/400 - Loss: 4.8544


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 330/400 - Loss: 4.8517


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 331/400 - Loss: 4.8552


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 332/400 - Loss: 4.8532


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 333/400 - Loss: 4.8524


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 334/400 - Loss: 4.8525


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 335/400 - Loss: 4.8488
Saved best model with loss: 4.8488


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 336/400 - Loss: 4.8523


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 337/400 - Loss: 4.8547


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 338/400 - Loss: 4.8536


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 339/400 - Loss: 4.8532


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 340/400 - Loss: 4.8507


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 341/400 - Loss: 4.8526


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 342/400 - Loss: 4.8506


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 343/400 - Loss: 4.8535


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 344/400 - Loss: 4.8538


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 345/400 - Loss: 4.8527


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 346/400 - Loss: 4.8494


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 347/400 - Loss: 4.8506


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 348/400 - Loss: 4.8511


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 349/400 - Loss: 4.8539


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 350/400 - Loss: 4.8514


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 351/400 - Loss: 4.8512


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 352/400 - Loss: 4.8553


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 353/400 - Loss: 4.8494


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 354/400 - Loss: 4.8502


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 355/400 - Loss: 4.8506


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 356/400 - Loss: 4.8512


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 357/400 - Loss: 4.8494


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 358/400 - Loss: 4.8499


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 359/400 - Loss: 4.8506


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 360/400 - Loss: 4.8509


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 361/400 - Loss: 4.8514


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 362/400 - Loss: 4.8515


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 363/400 - Loss: 4.8481
Saved best model with loss: 4.8481


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 364/400 - Loss: 4.8518


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 365/400 - Loss: 4.8510


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 366/400 - Loss: 4.8481
Saved best model with loss: 4.8481


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 367/400 - Loss: 4.8474
Saved best model with loss: 4.8474


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 368/400 - Loss: 4.8469
Saved best model with loss: 4.8469


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 369/400 - Loss: 4.8491


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 370/400 - Loss: 4.8496


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 371/400 - Loss: 4.8467
Saved best model with loss: 4.8467


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 372/400 - Loss: 4.8523


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 373/400 - Loss: 4.8496


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 374/400 - Loss: 4.8542


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 375/400 - Loss: 4.8504


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 376/400 - Loss: 4.8520


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 377/400 - Loss: 4.8462
Saved best model with loss: 4.8462


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 378/400 - Loss: 4.8542


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 379/400 - Loss: 4.8521


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 380/400 - Loss: 4.8489


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 381/400 - Loss: 4.8472


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 382/400 - Loss: 4.8496


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 383/400 - Loss: 4.8499


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 384/400 - Loss: 4.8529


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 385/400 - Loss: 4.8507


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 386/400 - Loss: 4.8483


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 387/400 - Loss: 4.8508


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 388/400 - Loss: 4.8499


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 389/400 - Loss: 4.8488


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 390/400 - Loss: 4.8461
Saved best model with loss: 4.8461


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 391/400 - Loss: 4.8441
Saved best model with loss: 4.8441


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 392/400 - Loss: 4.8506


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 393/400 - Loss: 4.8500


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 394/400 - Loss: 4.8498


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 395/400 - Loss: 4.8508


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 396/400 - Loss: 4.8483


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 397/400 - Loss: 4.8482


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 398/400 - Loss: 4.8470


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 399/400 - Loss: 4.8508


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 400/400 - Loss: 4.8491
Finished Training at 20250324-115151 with best validation accuracy 0.0000
Testing model...


In [22]:
print(test_acc)

0
