In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
import time
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
from tqdm.notebook import tqdm, trange
from PIL import Image
import matplotlib.pyplot as plt
import torchsummary
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import os
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
# import resnet18 model from pytorch
from torchvision.models import resnet18
from torch.utils.tensorboard import SummaryWriter
import mxnet as mx
from mxnet import recordio
import torch.multiprocessing as mp
from sklearn.model_selection import train_test_split
from collections import defaultdict

In [2]:
# data_iter = mx.image.ImageIter(
#     batch_size=4,
#     data_shape=(3, 112, 96),
#     path_imgrec="./faces_webface_112x112/train.rec",
#     path_imgidx="./faces_webface_112x112/train.idx",
# )
# data_iter.reset()
# for j in range(4):
#     batch = data_iter.next()
#     data = batch.data[0]
#     # print(batch)
#     label = batch.label[0].asnumpy()
#     for i in range(4):
#         ax = plt.subplot(1, 4, i + 1)
#         plt.imshow(data[i].asnumpy().astype(np.uint8).transpose((1, 2, 0)))
#         ax.set_title("class: " + str(label[i]))
#         plt.axis("off")
#     plt.show()

# # ======= Code to show single image =======#
# path_imgrec = "./faces_webface_112x112/train.rec"
# path_imgidx = "./faces_webface_112x112/train.idx"
# imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
# # %% 1 ~ 409623
# # for i in range(409623):
# for i in range(10):
#     header, s = recordio.unpack(imgrec.read_idx(i + 1))
#     img = mx.image.imdecode(s).asnumpy()
#     plt.imshow(img)
#     plt.title("id=" + str(i) + "label=" + str(header.label))
#     plt.pause(0.1)

# see how many identities are there in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
ids = []
for i in range(409623):
    header, _ = recordio.unpack(imgrec.read_idx(i + 1))
    ids.append(header.label)
print(len(set(ids)))

# show how many images are in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
count = 0
for i in range(409623):
    count += 1
print(count)

8076


409623


In [3]:
DIM = (112, 96)
BS = 512

In [4]:
class CASIAWebFaceDataset(Dataset):
    def __init__(self, rec_path, idx_path, property_path, transform=None):
        self.transform = transform
        self.imgrec = recordio.MXIndexedRecordIO(idx_path, rec_path, 'r')
        
        # Read property file for dataset metadata
        with open(property_path, "r") as f:
            property_str = f.read().strip()
            self.num_classes, self.img_height, self.img_width = map(int, property_str.split(','))
        
        # Parse class boundaries from idx file
        self.class_boundaries = {}
        with open(idx_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 2:
                    class_id, start_idx = int(parts[0]), int(parts[1])
                    self.class_boundaries[class_id] = start_idx
        
        self.class_ids = sorted(list(self.class_boundaries.keys()))
        
        # Calculate total number of samples
        self.total_samples = 0
        for i in range(len(self.class_ids)-1):
            class_id = self.class_ids[i]
            next_class_id = self.class_ids[i+1]
            class_size = self.class_boundaries[next_class_id] - self.class_boundaries[class_id]
            self.total_samples += class_size
        
        # Add samples from the last class
        last_class_id = self.class_ids[-1]
        # Assuming we know the total number of records from property file
        self.total_samples += 10572
    
    def __len__(self):
        return self.total_samples
    
    def __getitem__(self, idx):
        # Map global index to class ID and local index
        actual_idx = idx + 1  # MXNet indices start from 1
        
        # Read record
        header, s = recordio.unpack(self.imgrec.read_idx(actual_idx))
        img = mx.image.imdecode(s).asnumpy()
        label = int(header.label)
        
        # Convert to PIL and apply transforms
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [5]:
def find_overlapping_identities(casia_dataset, lfw_dataset):
    """Find identities that overlap between CASIA WebFace and LFW"""
    casia_identities = set()
    lfw_identities = set()
    
    # Extract all CASIA identity names
    for idx in tqdm(range(len(casia_dataset)), desc="Scanning CASIA WebFace"):
        try:
            _, label = casia_dataset[idx]
            casia_identities.add(label)
        except Exception as e:
            print(f"Error processing CASIA index {idx}: {e}")
    
    # Extract all LFW identity names
    for sample in tqdm(lfw_dataset, desc="Scanning LFW"):
        lfw_identities.add(sample.ground_truth.label)
    
    # Find overlap
    overlapping = casia_identities.intersection(lfw_identities)
    print(f"Found {len(overlapping)} overlapping identities")
    return overlapping

class FilteredCASIADataset(Dataset):
    """Wrapper dataset that filters out overlapping identities"""
    def __init__(self, base_dataset, excluded_labels):
        self.base_dataset = base_dataset
        self.excluded_labels = set(excluded_labels)
        
        # Pre-filter valid indices
        self.valid_indices = []
        for idx in tqdm(range(len(self.base_dataset)), desc="Filtering dataset"):
            try:
                _, label = self.base_dataset[idx]
                if label not in self.excluded_labels:
                    self.valid_indices.append(idx)
            except Exception as e:
                print(f"Error filtering at index {idx}: {e}")
        
        print(f"Kept {len(self.valid_indices)} out of {len(self.base_dataset)} samples")
    
    def __len__(self):
        return len(self.valid_indices)
    
    def __getitem__(self, idx):
        return self.base_dataset[self.valid_indices[idx]]

In [6]:
class CustomNormalize:
    def __call__(self, img):
        # Convert PIL image to tensor
        img = transforms.ToTensor()(img)
        # Subtract 128 and divide by 128
        img = (img * 255.0 - 128.0) / 128.0
        return img

train_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(45),  # Randomly rotate the image by up to 10 degrees
    transforms.RandomVerticalFlip(),  # Randomly flip the image vertically
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change brightness, contrast, saturation, and hue
    CustomNormalize()
])

test_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    CustomNormalize()
])

In [7]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import numpy as np
import random
import time
import fiftyone as fo
import fiftyone.zoo as foz
import fiftyone.brain as fob
from tqdm.notebook import tqdm, trange
from PIL import Image
import matplotlib.pyplot as plt
import torchsummary
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
import os
from torch.nn.parallel import DistributedDataParallel as DDP
import torch.distributed as dist
# import resnet18 model from pytorch
from torchvision.models import resnet18
from torch.utils.tensorboard import SummaryWriter
import mxnet as mx
from mxnet import recordio
import torch.multiprocessing as mp
from sklearn.model_selection import train_test_split
from collections import defaultdict

In [8]:
# data_iter = mx.image.ImageIter(
#     batch_size=4,
#     data_shape=(3, 112, 96),
#     path_imgrec="./faces_webface_112x112/train.rec",
#     path_imgidx="./faces_webface_112x112/train.idx",
# )
# data_iter.reset()
# for j in range(4):
#     batch = data_iter.next()
#     data = batch.data[0]
#     # print(batch)
#     label = batch.label[0].asnumpy()
#     for i in range(4):
#         ax = plt.subplot(1, 4, i + 1)
#         plt.imshow(data[i].asnumpy().astype(np.uint8).transpose((1, 2, 0)))
#         ax.set_title("class: " + str(label[i]))
#         plt.axis("off")
#     plt.show()

# # ======= Code to show single image =======#
# path_imgrec = "./faces_webface_112x112/train.rec"
# path_imgidx = "./faces_webface_112x112/train.idx"
# imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
# # %% 1 ~ 409623
# # for i in range(409623):
# for i in range(10):
#     header, s = recordio.unpack(imgrec.read_idx(i + 1))
#     img = mx.image.imdecode(s).asnumpy()
#     plt.imshow(img)
#     plt.title("id=" + str(i) + "label=" + str(header.label))
#     plt.pause(0.1)

# see how many identities are there in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
ids = []
for i in range(409623):
    header, _ = recordio.unpack(imgrec.read_idx(i + 1))
    ids.append(header.label)
print(len(set(ids)))

# show how many images are in the dataset
path_imgidx = "./faces_webface_112x112/train.idx"
path_imgrec="./faces_webface_112x112/train.rec"
imgrec = recordio.MXIndexedRecordIO(path_imgidx, path_imgrec, "r")
count = 0
for i in range(409623):
    count += 1
print(count)

8076


409623


In [9]:
DIM = (112, 96)
BS = 512

In [10]:
class CASIAWebFaceDataset(Dataset):
    def __init__(self, rec_path, idx_path, property_path, transform=None):
        self.transform = transform
        self.imgrec = recordio.MXIndexedRecordIO(idx_path, rec_path, 'r')
        
        # Read property file for dataset metadata
        with open(property_path, "r") as f:
            property_str = f.read().strip()
            self.num_classes, self.img_height, self.img_width = map(int, property_str.split(','))
        
        # Parse class boundaries from idx file
        self.class_boundaries = {}
        with open(idx_path, "r") as f:
            for line in f:
                parts = line.strip().split()
                if len(parts) == 2:
                    class_id, start_idx = int(parts[0]), int(parts[1])
                    self.class_boundaries[class_id] = start_idx
        
        self.class_ids = sorted(list(self.class_boundaries.keys()))
        
        # Calculate total number of samples
        self.total_samples = 0
        for i in range(len(self.class_ids)-1):
            class_id = self.class_ids[i]
            next_class_id = self.class_ids[i+1]
            class_size = self.class_boundaries[next_class_id] - self.class_boundaries[class_id]
            self.total_samples += class_size
        
        # Add samples from the last class
        last_class_id = self.class_ids[-1]
        # Assuming we know the total number of records from property file
        self.total_samples += 10572
    
    def __len__(self):
        return self.total_samples
    
    def __getitem__(self, idx):
        # Map global index to class ID and local index
        actual_idx = idx + 1  # MXNet indices start from 1
        
        # Read record
        header, s = recordio.unpack(self.imgrec.read_idx(actual_idx))
        img = mx.image.imdecode(s).asnumpy()
        label = int(header.label)
        
        # Convert to PIL and apply transforms
        img = Image.fromarray(img)
        if self.transform:
            img = self.transform(img)
        
        return img, label

In [11]:
def find_overlapping_identities(casia_dataset, lfw_dataset):
    """Find identities that overlap between CASIA WebFace and LFW"""
    casia_identities = set()
    lfw_identities = set()
    
    # Extract all CASIA identity names
    for idx in tqdm(range(len(casia_dataset)), desc="Scanning CASIA WebFace"):
        try:
            _, label = casia_dataset[idx]
            casia_identities.add(label)
        except Exception as e:
            print(f"Error processing CASIA index {idx}: {e}")
    
    # Extract all LFW identity names
    for sample in tqdm(lfw_dataset, desc="Scanning LFW"):
        lfw_identities.add(sample.ground_truth.label)
    
    # Find overlap
    overlapping = casia_identities.intersection(lfw_identities)
    print(f"Found {len(overlapping)} overlapping identities")
    return overlapping

class FilteredCASIADataset(Dataset):
    """Wrapper dataset that filters out overlapping identities"""
    def __init__(self, base_dataset, excluded_labels):
        self.base_dataset = base_dataset
        self.excluded_labels = set(excluded_labels)
        
        # Pre-filter valid indices
        self.valid_indices = []
        for idx in tqdm(range(len(self.base_dataset)), desc="Filtering dataset"):
            try:
                _, label = self.base_dataset[idx]
                if label not in self.excluded_labels:
                    self.valid_indices.append(idx)
            except Exception as e:
                print(f"Error filtering at index {idx}: {e}")
        
        print(f"Kept {len(self.valid_indices)} out of {len(self.base_dataset)} samples")
    
    def __len__(self):
        return len(self.valid_indices)
    
    def __getitem__(self, idx):
        return self.base_dataset[self.valid_indices[idx]]

In [12]:
class CustomNormalize:
    def __call__(self, img):
        # Convert PIL image to tensor
        img = transforms.ToTensor()(img)
        # Subtract 128 and divide by 128
        img = (img * 255.0 - 128.0) / 128.0
        return img

train_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(45),  # Randomly rotate the image by up to 10 degrees
    transforms.RandomVerticalFlip(),  # Randomly flip the image vertically
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change brightness, contrast, saturation, and hue
    CustomNormalize()
])

test_preprocess = transforms.Compose([
    transforms.Resize(DIM),
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    CustomNormalize()
])

In [13]:
class AMSoftmax(nn.Module):

    def __init__(self,
                 in_feats,
                 n_classes=10572,
                 m=0.35,
                 s=30):
        super(AMSoftmax, self).__init__()
        self.m = m
        self.s = s
        self.in_feats = in_feats
        self.W = torch.nn.Parameter(torch.randn(in_feats, n_classes), requires_grad=True)
        self.ce = nn.CrossEntropyLoss()
        nn.init.xavier_normal_(self.W, gain=1)

    def forward(self, x, lb):
        assert x.size()[0] == lb.size()[0]
        assert x.size()[1] == self.in_feats
        x_norm = torch.norm(x, p=2, dim=1, keepdim=True).clamp(min=1e-9)
        x_norm = torch.div(x, x_norm)
        w_norm = torch.norm(self.W, p=2, dim=0, keepdim=True).clamp(min=1e-9)
        w_norm = torch.div(self.W, w_norm)
        costh = torch.mm(x_norm, w_norm)
        delt_costh = torch.zeros_like(costh).scatter_(1, lb.unsqueeze(1), self.m)
        costh_m = costh - delt_costh
        costh_m_s = self.s * costh_m
        loss = self.ce(costh_m_s, lb)
        return loss

In [14]:
# class AMSoftmax(nn.Module):
#     '''
#     The am softmax as seen on https://arxiv.org/pdf/1801.05599.pdf,

#         in_features: size of the embedding, eg. 512
#         n_classes: number of classes on the classification task
#         s: s parameter of loss, standard = 30.
#         m: m parameter of loss, standard = 0.4, best between 0.35 and 0.4 according to paper.

#         *inputs: tensor shaped (batch_size X embedding_size)
#         output : tensor shaped (batch_size X n_classes) AM_softmax logits for NLL_loss.

#     '''
#     def __init__(self, in_features, n_classes, s=30, m=0.4):
#         super(AMSoftmax, self).__init__()
#         self.linear = nn.Linear(in_features, n_classes, bias=False)
#         self.s = s
#         self.m = m

#     def forward(self, *inputs):
#         x_vector = F.normalize(inputs[0], p=2, dim=-1)
#         self.linear.weight.data = F.normalize(self.linear.weight.data, p=2, dim=-1)
#         logits = self.linear(x_vector)
#         scaled_logits = (logits - self.m)*self.s
#         return  scaled_logits - self._am_logsumexp(logits)

#     def _am_logsumexp(self, logits):
#         '''
#         logsumexp designed for am_softmax, the computation is numerically stable

#         '''
#         max_x = torch.max(logits, dim=-1)[0].unsqueeze(-1)
#         term1 = (self.s*(logits - (max_x + self.m))).exp()
#         term2 = (self.s * (logits - max_x)).exp().sum(-1).unsqueeze(-1) \
#                 - (self.s * (logits - max_x)).exp()
#         return self.s*max_x + (term2 + term1).log()
    


class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.conv2(out)
        out = self.bn2(out)
        out += self.shortcut(x)
        out = self.relu(out)
        return out

class ResNet18(nn.Module):
    def __init__(self, num_classes=10):
        super(ResNet18, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        
        self.layer1 = self._make_layer(BasicBlock, 64, 2, stride=1)
        self.layer2 = self._make_layer(BasicBlock, 128, 2, stride=2)
        self.layer3 = self._make_layer(BasicBlock, 256, 2, stride=2)
        self.layer4 = self._make_layer(BasicBlock, 512, 2, stride=2)
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512, num_classes)

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
        out = self.maxpool(out)
        
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        
        out = self.avgpool(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out


# How can I modify the model to output embeddings of size 128?
# 1. Create a new model that outputs embeddings
# 2. Modify the last layer of the model to output embeddings
# 3. Use a hook to extract embeddings from the model
# 4. Use a custom loss function to train the model

class EmbeddingResNet18(nn.Module):
    def __init__(self, embedding_size=128, dropout=0.5):
        super(EmbeddingResNet18, self).__init__()
        self.resnet = resnet18(weights=None)
        self.resnet.fc = nn.Sequential(
            nn.Linear(512, embedding_size),
            nn.BatchNorm1d(embedding_size),
            nn.ReLU(),
            nn.Dropout(dropout)
        )
    
    def forward(self, x):
        return self.resnet(x)


In [15]:
# model = EmbeddingResNet18()
# model = model.to("cuda")
# torchsummary.summary(model, (3, 112, 96))

# model = ResNet18(num_classes=128)
# model = model.to("cuda")
# torchsummary.summary(model, (3, 112, 96))

In [16]:
def test_classifier(model, classifier, data_loader, device, message):
    model.eval()
    classifier.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(data_loader, desc="Testing"):
            images, labels = images.to(device), labels.to(device)
            embeddings = model(images)  # Extract features
            logits = classifier(embeddings)  # Compute AMSoftmax logits
            predictions = torch.argmax(logits, dim=1)  # Get class with max probability
            
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total * 100
    print(f"✅ Classification Accuracy for {message}: {accuracy:.2f}%")
    return accuracy

In [17]:
def test_classifier_cosine_similarity(model, classifier, data_loader, device, message):
    model.eval()
    classifier.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(data_loader, desc="Testing"):
            images, labels = images.to(device), labels.to(device)
            embeddings = model(images)  # Extract features
            embeddings = F.normalize(embeddings, p=2, dim=1)
            weights = F.normalize(classifier.W, p=2, dim=0)
            logits = torch.matmul(embeddings, weights)
            predictions = torch.argmax(logits, dim=1)

            correct += (predictions == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total * 100


    print(f"✅ Classification Accuracy for {message}: {accuracy:.2f}%")
    return accuracy

In [18]:
def train_AMSoftmax(model: nn.Module, classifier: nn.Module, data_loader: DataLoader, val_loader: DataLoader,
                    optimizer: optim.Optimizer, scheduler: optim.lr_scheduler, 
                    criterion: nn.Module, epochs: int, device: torch.device, 
                    retain_graph: bool):

    train_losses = []
    classifier.eval()
    tmstmp = time.strftime("%Y%m%d-%H%M%S")
    best_loss = np.inf
    vacc = 0

    # tmstmp = "20250313-074759"

    log_dir = f"runs/112x96_ResNet18_AMSoftmax_{tmstmp}"
    writer = SummaryWriter(log_dir=log_dir)

    print(f"Started Training at {tmstmp}")
    
    for e, epoch in enumerate(tqdm(range(epochs), desc="Epochs")):
        model.train()
        running_loss = 0.0
        for i, (images, labels) in enumerate(tqdm(data_loader, desc="Batches")):
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            embeddings = model(images)  # Extract embeddings from model
            # logits = classifier(embeddings)  # Compute AMSoftmax logits
            # loss = criterion(logits, labels)  # Compute NLL loss
            loss = classifier(embeddings, labels)

            loss.backward(retain_graph=retain_graph)
            optimizer.step()
            running_loss += loss.item()
            # print(f"Batch {i+1}/{len(data_loader)} - Loss: {loss.item():.4f}")

        scheduler.step()
        avg_loss = running_loss / len(data_loader)
        train_losses.append(avg_loss)

        # Validation
        model.eval()
        running_loss = 0.0
        for i, (images, labels) in enumerate(tqdm(val_loader, desc="Validation")):
            images, labels = images.to(device), labels.to(device)
            embeddings = model(images)
            # logits = classifier(embeddings)
            # loss = criterion(logits, labels)
            loss = classifier(embeddings, labels)
            running_loss += loss.item()
            # print(f"Batch {i+1}/{len(val_loader)} - Loss: {loss.item():.4f}")


        # Log per epoch
        writer.add_scalar('Loss/train', avg_loss, epoch)
        writer.add_scalar('Loss/val', running_loss / len(val_loader), epoch)
        # if (e+1) % 10 == 0:
        #     train_acc = test_classifier_cosine_similarity(model, classifier, data_loader, device, "Training")
        #     val_acc = test_classifier_cosine_similarity(model, classifier, val_loader, device, "Validation")
        #     writer.add_scalar('Accuracy/train', train_acc, epoch)
        #     writer.add_scalar('Accuracy/val', val_acc, epoch)
        #     if val_acc > vacc:
        #         vacc = val_acc
        #         torch.save(model.state_dict(), f"{log_dir}/112x96_ResNet18_AMSoftmax_validation_{tmstmp}.pt")
        #         print(f"Saved best model with validation accuracy {vacc}")
        print(f"Epoch {epoch+1}/{epochs} - Loss: {avg_loss:.4f}")
        # Save Best Model
        if avg_loss < best_loss:
            best_loss = avg_loss
            torch.save(model.state_dict(), f"{log_dir}/112x96_ResNet18_AMSoftmax_{tmstmp}.pt")
            print(f"Saved best model with loss: {best_loss:.4f}")

    print(f"Finished Training at {time.strftime('%Y%m%d-%H%M%S')} with best validation accuracy {vacc:.4f}")
    writer.close()

    return model, train_losses, tmstmp

In [19]:
# file_path = '/home/ichitu/py-files/label_to_indices.txt'
# if os.path.exists(file_path):
#     print(f"File exists, size: {os.path.getsize(file_path)} bytes")
    
#     # Preview file content
#     with open(file_path, 'r') as f:
#         # print size of file
#         print(f.read())
# else:
#     print("File doesn't exist")

In [20]:
def train_on_casia_webface(embedding_model, classifier, device, device_ids):
    # Set up paths
    rec_path = "/home/ichitu/py-files/faces_webface_112x112/train.rec"
    idx_path = "/home/ichitu/py-files/faces_webface_112x112/train.idx"
    property_path = "/home/ichitu/py-files/faces_webface_112x112/property"

    
    # Load datasets
    print("Loading CASIA WebFace dataset...")
    casia_dataset = CASIAWebFaceDataset(
        rec_path=rec_path,
        idx_path=idx_path,
        property_path=property_path,
        transform=train_preprocess
    )

    train_idx, test_idx = train_test_split(range(len(casia_dataset)), test_size=0.15, random_state=42)
    train_dataset = torch.utils.data.Subset(casia_dataset, train_idx)
    test_dataset = torch.utils.data.Subset(casia_dataset, test_idx)
    val_idx, test_idx = train_test_split(test_idx, test_size=0.7, random_state=42)
    val_dataset = torch.utils.data.Subset(casia_dataset, val_idx)
    test_dataset = torch.utils.data.Subset(casia_dataset, test_idx)

    print(len(train_dataset))
    print(len(val_dataset))
    print(len(test_dataset))

    

    print(len(casia_dataset))
    
    # print("Loading LFW dataset...")
    # lfw_dataset = foz.load_zoo_dataset("lfw")
    
    # Find and filter overlapping identities
    # print("Finding overlapping identities...")
    # overlapping_ids = find_overlapping_identities(casia_dataset, lfw_dataset)
    
    # print("Creating filtered dataset...")
    # filtered_dataset = FilteredCASIADataset(casia_dataset, overlapping_ids)
    # filtered_dataset = casia_dataset
    
    # Create data loader
    # train_loader = DataLoader(
    #     train_dataset, 
    #     batch_size=BS * len(device_ids),
    #     shuffle=True, 
    #     num_workers=2,
    #     pin_memory=True
    # )

    # val_loader = DataLoader(
    #     val_dataset, 
    #     batch_size=BS * len(device_ids),
    #     shuffle=True, 
    #     num_workers=2,
    #     pin_memory=True
    # )

    # test_loader = DataLoader(
    #     test_dataset, 
    #     batch_size=BS * len(device_ids),
    #     shuffle=True, 
    #     num_workers=2,
    #     pin_memory=True
    # )

    train_loader = DataLoader(
        casia_dataset,
        batch_size=BS * len(device_ids),
        num_workers=4,
        sampler=torch.utils.data.SubsetRandomSampler(train_idx),
        pin_memory=True
    )

    val_loader = DataLoader(
        CASIAWebFaceDataset(
            rec_path=rec_path,
            idx_path=idx_path,
            property_path=property_path,
            transform=test_preprocess
        ),
        batch_size=BS * len(device_ids),
        num_workers=2,
        sampler=torch.utils.data.SubsetRandomSampler(val_idx),
        pin_memory=True
    )

    test_loader = DataLoader(
        CASIAWebFaceDataset(
            rec_path=rec_path,
            idx_path=idx_path,
            property_path=property_path,
            transform=test_preprocess
        ),
        batch_size=BS * len(device_ids),
        num_workers=2,
        sampler=torch.utils.data.SubsetRandomSampler(test_idx),
        pin_memory=True
    )
    
    # Set up model
    print("Setting up model...")
    
    # Set up optimizer (include both models' parameters)
    optimizer = optim.SGD(
        embedding_model.parameters(),
        lr=0.1,
        momentum=0.9,
        weight_decay=5e-4
    )
    # optimizer = optim.Adam(
    #     embedding_model.parameters(),
    #     lr=0.01,
    #     weight_decay=5e-4
    # )
    
    # Set up scheduler
    # scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=100, gamma=0.1)
    # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=300, eta_min=1e-6)
    scheduler = optim.lr_scheduler.MultiStepLR(optimizer, 
                                               milestones=[100, 150, 200, 250, 300],
                                               gamma=0.1)
    
    # Set up loss
    criterion = nn.NLLLoss()

    device = torch.device("cuda:{}".format(device_ids[1][0]) if torch.cuda.is_available() else "cpu")
    
    # Train
    print("Starting training...")
    model, losses, timestamp = train_AMSoftmax(
        model=embedding_model,
        classifier=classifier,
        data_loader=train_loader,
        val_loader=val_loader,
        optimizer=optimizer,
        scheduler=scheduler,
        criterion=criterion,
        epochs=400,  # Adjust as needed
        device=device,
        retain_graph=False,
    )

    print("Testing model...")
    # test_acc = test_classifier(embedding_model, classifier, test_loader, device, "Test")
    test_acc = 0
    
    return model, losses, timestamp, test_acc

In [21]:
# assert 1==2
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

device_ids = [[0],[0]]
classifier = AMSoftmax(256, 10572)
# embedding_model = EmbeddingResNet18(128, 0.5).to(device)
embedding_model = ResNet18(256)

if torch.cuda.device_count() > 1:
    print(f"Avaible {torch.cuda.device_count()} GPUs and using {device_ids}")
    embedding_model = nn.DataParallel(embedding_model, device_ids=device_ids[0])
    classifier = nn.DataParallel(classifier, device_ids=device_ids[1])

# embedding_model = embedding_model.to(device)
# classifier = classifier.to(device)
# embedding_model.load_state_dict(torch.load("Models-pt/112x96_ResNet18_AMSoftmax_20250313-074759.pt"))

embedding_model, train_losses, tmstmp, test_acc = train_on_casia_webface(embedding_model, classifier, device, device_ids)


Avaible 4 GPUs and using [[0], [0]]
Loading CASIA WebFace dataset...


368434
19505
45513
433452


Setting up model...
Starting training...
Started Training at 20250323-194853


Epochs:   0%|          | 0/400 [00:00<?, ?it/s]

Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 1/400 - Loss: 20.7272
Saved best model with loss: 20.7272


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 2/400 - Loss: 20.5971
Saved best model with loss: 20.5971


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 3/400 - Loss: 20.4500
Saved best model with loss: 20.4500


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 4/400 - Loss: 20.2589
Saved best model with loss: 20.2589


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 5/400 - Loss: 20.0025
Saved best model with loss: 20.0025


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 6/400 - Loss: 19.6764
Saved best model with loss: 19.6764


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 7/400 - Loss: 19.3119
Saved best model with loss: 19.3119


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 8/400 - Loss: 18.9378
Saved best model with loss: 18.9378


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 9/400 - Loss: 18.5762
Saved best model with loss: 18.5762


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 10/400 - Loss: 18.2329
Saved best model with loss: 18.2329


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 11/400 - Loss: 17.9001
Saved best model with loss: 17.9001


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 12/400 - Loss: 17.5715
Saved best model with loss: 17.5715


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 13/400 - Loss: 17.2773
Saved best model with loss: 17.2773


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 14/400 - Loss: 17.0067
Saved best model with loss: 17.0067


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 15/400 - Loss: 16.7506
Saved best model with loss: 16.7506


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 16/400 - Loss: 16.5269
Saved best model with loss: 16.5269


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 17/400 - Loss: 16.3204
Saved best model with loss: 16.3204


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 18/400 - Loss: 16.1205
Saved best model with loss: 16.1205


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 19/400 - Loss: 15.9308
Saved best model with loss: 15.9308


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 20/400 - Loss: 15.7771
Saved best model with loss: 15.7771


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 21/400 - Loss: 15.6174
Saved best model with loss: 15.6174


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 22/400 - Loss: 15.4622
Saved best model with loss: 15.4622


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 23/400 - Loss: 15.3253
Saved best model with loss: 15.3253


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 24/400 - Loss: 15.1996
Saved best model with loss: 15.1996


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 25/400 - Loss: 15.0509
Saved best model with loss: 15.0509


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 26/400 - Loss: 14.9268
Saved best model with loss: 14.9268


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 27/400 - Loss: 14.7859
Saved best model with loss: 14.7859


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 28/400 - Loss: 14.6891
Saved best model with loss: 14.6891


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 29/400 - Loss: 14.5712
Saved best model with loss: 14.5712


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 30/400 - Loss: 14.4364
Saved best model with loss: 14.4364


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 31/400 - Loss: 14.3406
Saved best model with loss: 14.3406


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 32/400 - Loss: 14.2292
Saved best model with loss: 14.2292


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 33/400 - Loss: 14.1299
Saved best model with loss: 14.1299


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 34/400 - Loss: 14.0097
Saved best model with loss: 14.0097


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 35/400 - Loss: 13.9339
Saved best model with loss: 13.9339


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 36/400 - Loss: 13.8251
Saved best model with loss: 13.8251


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 37/400 - Loss: 13.7600
Saved best model with loss: 13.7600


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 38/400 - Loss: 13.6230
Saved best model with loss: 13.6230


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 39/400 - Loss: 13.5695
Saved best model with loss: 13.5695


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 40/400 - Loss: 13.4875
Saved best model with loss: 13.4875


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 41/400 - Loss: 13.4064
Saved best model with loss: 13.4064


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 42/400 - Loss: 13.3365
Saved best model with loss: 13.3365


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 43/400 - Loss: 13.2576
Saved best model with loss: 13.2576


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 44/400 - Loss: 13.1962
Saved best model with loss: 13.1962


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 45/400 - Loss: 13.1261
Saved best model with loss: 13.1261


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 46/400 - Loss: 13.0845
Saved best model with loss: 13.0845


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 47/400 - Loss: 13.0099
Saved best model with loss: 13.0099


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 48/400 - Loss: 12.9325
Saved best model with loss: 12.9325


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 49/400 - Loss: 12.9058
Saved best model with loss: 12.9058


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 50/400 - Loss: 12.8417
Saved best model with loss: 12.8417


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 51/400 - Loss: 12.7622
Saved best model with loss: 12.7622


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 52/400 - Loss: 12.7397
Saved best model with loss: 12.7397


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 53/400 - Loss: 12.6900
Saved best model with loss: 12.6900


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 54/400 - Loss: 12.6134
Saved best model with loss: 12.6134


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 55/400 - Loss: 12.5896
Saved best model with loss: 12.5896


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 56/400 - Loss: 12.5613
Saved best model with loss: 12.5613


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 57/400 - Loss: 12.4963
Saved best model with loss: 12.4963


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 58/400 - Loss: 12.4698
Saved best model with loss: 12.4698


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 59/400 - Loss: 12.4224
Saved best model with loss: 12.4224


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 60/400 - Loss: 12.4092
Saved best model with loss: 12.4092


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 61/400 - Loss: 12.3354
Saved best model with loss: 12.3354


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 62/400 - Loss: 12.3047
Saved best model with loss: 12.3047


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 63/400 - Loss: 12.2737
Saved best model with loss: 12.2737


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 64/400 - Loss: 12.2687
Saved best model with loss: 12.2687


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 65/400 - Loss: 12.2389
Saved best model with loss: 12.2389


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 66/400 - Loss: 12.1913
Saved best model with loss: 12.1913


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 67/400 - Loss: 12.1906
Saved best model with loss: 12.1906


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 68/400 - Loss: 12.1486
Saved best model with loss: 12.1486


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 69/400 - Loss: 12.1306
Saved best model with loss: 12.1306


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 70/400 - Loss: 12.1078
Saved best model with loss: 12.1078


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 71/400 - Loss: 12.0704
Saved best model with loss: 12.0704


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 72/400 - Loss: 12.0277
Saved best model with loss: 12.0277


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 73/400 - Loss: 12.0209
Saved best model with loss: 12.0209


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 74/400 - Loss: 12.0176
Saved best model with loss: 12.0176


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 75/400 - Loss: 11.9982
Saved best model with loss: 11.9982


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 76/400 - Loss: 11.9811
Saved best model with loss: 11.9811


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 77/400 - Loss: 11.9316
Saved best model with loss: 11.9316


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 78/400 - Loss: 11.9418


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 79/400 - Loss: 11.9139
Saved best model with loss: 11.9139


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 80/400 - Loss: 11.9087
Saved best model with loss: 11.9087


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 81/400 - Loss: 11.9099


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 82/400 - Loss: 11.9013
Saved best model with loss: 11.9013


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 83/400 - Loss: 11.8496
Saved best model with loss: 11.8496


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 84/400 - Loss: 11.8404
Saved best model with loss: 11.8404


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 85/400 - Loss: 11.8423


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 86/400 - Loss: 11.8104
Saved best model with loss: 11.8104


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 87/400 - Loss: 11.8083
Saved best model with loss: 11.8083


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 88/400 - Loss: 11.8151


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 89/400 - Loss: 11.7935
Saved best model with loss: 11.7935


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 90/400 - Loss: 11.7876
Saved best model with loss: 11.7876


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 91/400 - Loss: 11.7532
Saved best model with loss: 11.7532


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 92/400 - Loss: 11.7591


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 93/400 - Loss: 11.7249
Saved best model with loss: 11.7249


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 94/400 - Loss: 11.7454


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 95/400 - Loss: 11.7370


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 96/400 - Loss: 11.7066
Saved best model with loss: 11.7066


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 97/400 - Loss: 11.7107


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 98/400 - Loss: 11.7145


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 99/400 - Loss: 11.7049
Saved best model with loss: 11.7049


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 100/400 - Loss: 11.7001
Saved best model with loss: 11.7001


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 101/400 - Loss: 9.3531
Saved best model with loss: 9.3531


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 102/400 - Loss: 8.6008
Saved best model with loss: 8.6008


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 103/400 - Loss: 8.2382
Saved best model with loss: 8.2382


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 104/400 - Loss: 7.9788
Saved best model with loss: 7.9788


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 105/400 - Loss: 7.7657
Saved best model with loss: 7.7657


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 106/400 - Loss: 7.5851
Saved best model with loss: 7.5851


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 107/400 - Loss: 7.4228
Saved best model with loss: 7.4228


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 108/400 - Loss: 7.2754
Saved best model with loss: 7.2754


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 109/400 - Loss: 7.1410
Saved best model with loss: 7.1410


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 110/400 - Loss: 7.0169
Saved best model with loss: 7.0169


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 111/400 - Loss: 6.9044
Saved best model with loss: 6.9044


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 112/400 - Loss: 6.8035
Saved best model with loss: 6.8035


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 113/400 - Loss: 6.7045
Saved best model with loss: 6.7045


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 114/400 - Loss: 6.6197
Saved best model with loss: 6.6197


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 115/400 - Loss: 6.5333
Saved best model with loss: 6.5333


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 116/400 - Loss: 6.4597
Saved best model with loss: 6.4597


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 117/400 - Loss: 6.3823
Saved best model with loss: 6.3823


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 118/400 - Loss: 6.3159
Saved best model with loss: 6.3159


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 119/400 - Loss: 6.2601
Saved best model with loss: 6.2601


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 120/400 - Loss: 6.2045
Saved best model with loss: 6.2045


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 121/400 - Loss: 6.1494
Saved best model with loss: 6.1494


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 122/400 - Loss: 6.0989
Saved best model with loss: 6.0989


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 123/400 - Loss: 6.0663
Saved best model with loss: 6.0663


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 124/400 - Loss: 6.0138
Saved best model with loss: 6.0138


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 125/400 - Loss: 5.9894
Saved best model with loss: 5.9894


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 126/400 - Loss: 5.9510
Saved best model with loss: 5.9510


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 127/400 - Loss: 5.9133
Saved best model with loss: 5.9133


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 128/400 - Loss: 5.9006
Saved best model with loss: 5.9006


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 129/400 - Loss: 5.8706
Saved best model with loss: 5.8706


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 130/400 - Loss: 5.8492
Saved best model with loss: 5.8492


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 131/400 - Loss: 5.8247
Saved best model with loss: 5.8247


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 132/400 - Loss: 5.8191
Saved best model with loss: 5.8191


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 133/400 - Loss: 5.7899
Saved best model with loss: 5.7899


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 134/400 - Loss: 5.7846
Saved best model with loss: 5.7846


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 135/400 - Loss: 5.7716
Saved best model with loss: 5.7716


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 136/400 - Loss: 5.7598
Saved best model with loss: 5.7598


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 137/400 - Loss: 5.7372
Saved best model with loss: 5.7372


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 138/400 - Loss: 5.7246
Saved best model with loss: 5.7246


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 139/400 - Loss: 5.7075
Saved best model with loss: 5.7075


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 140/400 - Loss: 5.6966
Saved best model with loss: 5.6966


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 141/400 - Loss: 5.6797
Saved best model with loss: 5.6797


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 142/400 - Loss: 5.6716
Saved best model with loss: 5.6716


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 143/400 - Loss: 5.6485
Saved best model with loss: 5.6485


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 144/400 - Loss: 5.6377
Saved best model with loss: 5.6377


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 145/400 - Loss: 5.6330
Saved best model with loss: 5.6330


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 146/400 - Loss: 5.6191
Saved best model with loss: 5.6191


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 147/400 - Loss: 5.5902
Saved best model with loss: 5.5902


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 148/400 - Loss: 5.5939


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 149/400 - Loss: 5.5910


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 150/400 - Loss: 5.5539
Saved best model with loss: 5.5539


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 151/400 - Loss: 4.5552
Saved best model with loss: 4.5552


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 152/400 - Loss: 4.2716
Saved best model with loss: 4.2716


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 153/400 - Loss: 4.1621
Saved best model with loss: 4.1621


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 154/400 - Loss: 4.0791
Saved best model with loss: 4.0791


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 155/400 - Loss: 4.0290
Saved best model with loss: 4.0290


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 156/400 - Loss: 3.9735
Saved best model with loss: 3.9735


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 157/400 - Loss: 3.9304
Saved best model with loss: 3.9304


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 158/400 - Loss: 3.8954
Saved best model with loss: 3.8954


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 159/400 - Loss: 3.8634
Saved best model with loss: 3.8634


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 160/400 - Loss: 3.8269
Saved best model with loss: 3.8269


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 161/400 - Loss: 3.7986
Saved best model with loss: 3.7986


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 162/400 - Loss: 3.7742
Saved best model with loss: 3.7742


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 163/400 - Loss: 3.7502
Saved best model with loss: 3.7502


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 164/400 - Loss: 3.7251
Saved best model with loss: 3.7251


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 165/400 - Loss: 3.7006
Saved best model with loss: 3.7006


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 166/400 - Loss: 3.6762
Saved best model with loss: 3.6762


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 167/400 - Loss: 3.6613
Saved best model with loss: 3.6613


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 168/400 - Loss: 3.6359
Saved best model with loss: 3.6359


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 169/400 - Loss: 3.6158
Saved best model with loss: 3.6158


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 170/400 - Loss: 3.5985
Saved best model with loss: 3.5985


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 171/400 - Loss: 3.5793
Saved best model with loss: 3.5793


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 172/400 - Loss: 3.5646
Saved best model with loss: 3.5646


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 173/400 - Loss: 3.5461
Saved best model with loss: 3.5461


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 174/400 - Loss: 3.5291
Saved best model with loss: 3.5291


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 175/400 - Loss: 3.5120
Saved best model with loss: 3.5120


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 176/400 - Loss: 3.4960
Saved best model with loss: 3.4960


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 177/400 - Loss: 3.4808
Saved best model with loss: 3.4808


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 178/400 - Loss: 3.4669
Saved best model with loss: 3.4669


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 179/400 - Loss: 3.4530
Saved best model with loss: 3.4530


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 180/400 - Loss: 3.4406
Saved best model with loss: 3.4406


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 181/400 - Loss: 3.4232
Saved best model with loss: 3.4232


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 182/400 - Loss: 3.4098
Saved best model with loss: 3.4098


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 183/400 - Loss: 3.3966
Saved best model with loss: 3.3966


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 184/400 - Loss: 3.3858
Saved best model with loss: 3.3858


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 185/400 - Loss: 3.3718
Saved best model with loss: 3.3718


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 186/400 - Loss: 3.3542
Saved best model with loss: 3.3542


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 187/400 - Loss: 3.3424
Saved best model with loss: 3.3424


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 188/400 - Loss: 3.3326
Saved best model with loss: 3.3326


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 189/400 - Loss: 3.3205
Saved best model with loss: 3.3205


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 190/400 - Loss: 3.3077
Saved best model with loss: 3.3077


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 191/400 - Loss: 3.2934
Saved best model with loss: 3.2934


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 192/400 - Loss: 3.2804
Saved best model with loss: 3.2804


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 193/400 - Loss: 3.2696
Saved best model with loss: 3.2696


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 194/400 - Loss: 3.2572
Saved best model with loss: 3.2572


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 195/400 - Loss: 3.2508
Saved best model with loss: 3.2508


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 196/400 - Loss: 3.2367
Saved best model with loss: 3.2367


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 197/400 - Loss: 3.2253
Saved best model with loss: 3.2253


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 198/400 - Loss: 3.2163
Saved best model with loss: 3.2163


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 199/400 - Loss: 3.2067
Saved best model with loss: 3.2067


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 200/400 - Loss: 3.1955
Saved best model with loss: 3.1955


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 201/400 - Loss: 3.1052
Saved best model with loss: 3.1052


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 202/400 - Loss: 3.0865
Saved best model with loss: 3.0865


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 203/400 - Loss: 3.0777
Saved best model with loss: 3.0777


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 204/400 - Loss: 3.0749
Saved best model with loss: 3.0749


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 205/400 - Loss: 3.0681
Saved best model with loss: 3.0681


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 206/400 - Loss: 3.0648
Saved best model with loss: 3.0648


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 207/400 - Loss: 3.0646
Saved best model with loss: 3.0646


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 208/400 - Loss: 3.0633
Saved best model with loss: 3.0633


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 209/400 - Loss: 3.0565
Saved best model with loss: 3.0565


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 210/400 - Loss: 3.0568


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 211/400 - Loss: 3.0533
Saved best model with loss: 3.0533


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 212/400 - Loss: 3.0511
Saved best model with loss: 3.0511


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 213/400 - Loss: 3.0519


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 214/400 - Loss: 3.0469
Saved best model with loss: 3.0469


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 215/400 - Loss: 3.0453
Saved best model with loss: 3.0453


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 216/400 - Loss: 3.0445
Saved best model with loss: 3.0445


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 217/400 - Loss: 3.0416
Saved best model with loss: 3.0416


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 218/400 - Loss: 3.0432


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 219/400 - Loss: 3.0352
Saved best model with loss: 3.0352


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 220/400 - Loss: 3.0363


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 221/400 - Loss: 3.0326
Saved best model with loss: 3.0326


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 222/400 - Loss: 3.0330


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 223/400 - Loss: 3.0271
Saved best model with loss: 3.0271


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 224/400 - Loss: 3.0257
Saved best model with loss: 3.0257


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 225/400 - Loss: 3.0272


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 226/400 - Loss: 3.0256
Saved best model with loss: 3.0256


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 227/400 - Loss: 3.0227
Saved best model with loss: 3.0227


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 228/400 - Loss: 3.0235


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 229/400 - Loss: 3.0206
Saved best model with loss: 3.0206


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 230/400 - Loss: 3.0221


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 231/400 - Loss: 3.0160
Saved best model with loss: 3.0160


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 232/400 - Loss: 3.0182


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 233/400 - Loss: 3.0129
Saved best model with loss: 3.0129


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 234/400 - Loss: 3.0109
Saved best model with loss: 3.0109


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 235/400 - Loss: 3.0093
Saved best model with loss: 3.0093


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 236/400 - Loss: 3.0099


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 237/400 - Loss: 3.0073
Saved best model with loss: 3.0073


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 238/400 - Loss: 3.0067
Saved best model with loss: 3.0067


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 239/400 - Loss: 3.0034
Saved best model with loss: 3.0034


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 240/400 - Loss: 3.0038


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 241/400 - Loss: 3.0012
Saved best model with loss: 3.0012


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 242/400 - Loss: 2.9984
Saved best model with loss: 2.9984


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 243/400 - Loss: 2.9998


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 244/400 - Loss: 2.9991


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 245/400 - Loss: 2.9956
Saved best model with loss: 2.9956


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 246/400 - Loss: 2.9945
Saved best model with loss: 2.9945


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 247/400 - Loss: 2.9938
Saved best model with loss: 2.9938


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 248/400 - Loss: 2.9922
Saved best model with loss: 2.9922


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 249/400 - Loss: 2.9907
Saved best model with loss: 2.9907


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 250/400 - Loss: 2.9883
Saved best model with loss: 2.9883


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 251/400 - Loss: 2.9823
Saved best model with loss: 2.9823


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 252/400 - Loss: 2.9773
Saved best model with loss: 2.9773


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 253/400 - Loss: 2.9776


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 254/400 - Loss: 2.9778


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 255/400 - Loss: 2.9783


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 256/400 - Loss: 2.9776


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 257/400 - Loss: 2.9787


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 258/400 - Loss: 2.9781


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 259/400 - Loss: 2.9764
Saved best model with loss: 2.9764


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 260/400 - Loss: 2.9747
Saved best model with loss: 2.9747


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 261/400 - Loss: 2.9782


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 262/400 - Loss: 2.9781


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 263/400 - Loss: 2.9741
Saved best model with loss: 2.9741


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 264/400 - Loss: 2.9739
Saved best model with loss: 2.9739


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 265/400 - Loss: 2.9752


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 266/400 - Loss: 2.9783


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 267/400 - Loss: 2.9746


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 268/400 - Loss: 2.9750


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 269/400 - Loss: 2.9735
Saved best model with loss: 2.9735


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 270/400 - Loss: 2.9747


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 271/400 - Loss: 2.9768


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 272/400 - Loss: 2.9760


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 273/400 - Loss: 2.9731
Saved best model with loss: 2.9731


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 274/400 - Loss: 2.9722
Saved best model with loss: 2.9722


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 275/400 - Loss: 2.9725


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 276/400 - Loss: 2.9768


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 277/400 - Loss: 2.9733


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 278/400 - Loss: 2.9736


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 279/400 - Loss: 2.9758


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 280/400 - Loss: 2.9702
Saved best model with loss: 2.9702


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 281/400 - Loss: 2.9690
Saved best model with loss: 2.9690


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 282/400 - Loss: 2.9711


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 283/400 - Loss: 2.9766


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 284/400 - Loss: 2.9738


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 285/400 - Loss: 2.9742


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 286/400 - Loss: 2.9687
Saved best model with loss: 2.9687


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 287/400 - Loss: 2.9699


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 288/400 - Loss: 2.9697


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 289/400 - Loss: 2.9721


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 290/400 - Loss: 2.9695


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 291/400 - Loss: 2.9758


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 292/400 - Loss: 2.9688


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 293/400 - Loss: 2.9701


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 294/400 - Loss: 2.9705


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 295/400 - Loss: 2.9735


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 296/400 - Loss: 2.9663
Saved best model with loss: 2.9663


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 297/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 298/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 299/400 - Loss: 2.9699


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 300/400 - Loss: 2.9712


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 301/400 - Loss: 2.9719


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 302/400 - Loss: 2.9707


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 303/400 - Loss: 2.9699


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 304/400 - Loss: 2.9711


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 305/400 - Loss: 2.9657
Saved best model with loss: 2.9657


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 306/400 - Loss: 2.9675


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 307/400 - Loss: 2.9693


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 308/400 - Loss: 2.9706


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 309/400 - Loss: 2.9672


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 310/400 - Loss: 2.9688


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 311/400 - Loss: 2.9651
Saved best model with loss: 2.9651


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 312/400 - Loss: 2.9707


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 313/400 - Loss: 2.9696


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 314/400 - Loss: 2.9698


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 315/400 - Loss: 2.9645
Saved best model with loss: 2.9645


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 316/400 - Loss: 2.9665


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 317/400 - Loss: 2.9689


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 318/400 - Loss: 2.9668


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 319/400 - Loss: 2.9670


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 320/400 - Loss: 2.9688


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 321/400 - Loss: 2.9652


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 322/400 - Loss: 2.9670


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 323/400 - Loss: 2.9660


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 324/400 - Loss: 2.9671


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 325/400 - Loss: 2.9700


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 326/400 - Loss: 2.9665


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 327/400 - Loss: 2.9704


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 328/400 - Loss: 2.9707


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 329/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 330/400 - Loss: 2.9680


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 331/400 - Loss: 2.9659


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 332/400 - Loss: 2.9675


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 333/400 - Loss: 2.9668


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 334/400 - Loss: 2.9687


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 335/400 - Loss: 2.9686


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 336/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 337/400 - Loss: 2.9669


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 338/400 - Loss: 2.9650


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 339/400 - Loss: 2.9676


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 340/400 - Loss: 2.9709


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 341/400 - Loss: 2.9708


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 342/400 - Loss: 2.9679


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 343/400 - Loss: 2.9697


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 344/400 - Loss: 2.9659


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 345/400 - Loss: 2.9651


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 346/400 - Loss: 2.9681


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 347/400 - Loss: 2.9699


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 348/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 349/400 - Loss: 2.9687


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 350/400 - Loss: 2.9679


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 351/400 - Loss: 2.9667


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 352/400 - Loss: 2.9669


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 353/400 - Loss: 2.9663


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 354/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 355/400 - Loss: 2.9673


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 356/400 - Loss: 2.9674


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 357/400 - Loss: 2.9681


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 358/400 - Loss: 2.9686


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 359/400 - Loss: 2.9683


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 360/400 - Loss: 2.9663


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 361/400 - Loss: 2.9646


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 362/400 - Loss: 2.9683


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 363/400 - Loss: 2.9688


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 364/400 - Loss: 2.9677


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 365/400 - Loss: 2.9675


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 366/400 - Loss: 2.9687


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 367/400 - Loss: 2.9705


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 368/400 - Loss: 2.9676


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 369/400 - Loss: 2.9672


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 370/400 - Loss: 2.9644
Saved best model with loss: 2.9644


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 371/400 - Loss: 2.9668


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 372/400 - Loss: 2.9618
Saved best model with loss: 2.9618


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 373/400 - Loss: 2.9656


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 374/400 - Loss: 2.9719


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 375/400 - Loss: 2.9683


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 376/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 377/400 - Loss: 2.9692


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 378/400 - Loss: 2.9672


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 379/400 - Loss: 2.9691


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 380/400 - Loss: 2.9665


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 381/400 - Loss: 2.9645


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 382/400 - Loss: 2.9652


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 383/400 - Loss: 2.9673


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 384/400 - Loss: 2.9688


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 385/400 - Loss: 2.9679


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 386/400 - Loss: 2.9685


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 387/400 - Loss: 2.9699


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 388/400 - Loss: 2.9679


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 389/400 - Loss: 2.9668


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 390/400 - Loss: 2.9655


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 391/400 - Loss: 2.9670


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 392/400 - Loss: 2.9677


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 393/400 - Loss: 2.9659


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 394/400 - Loss: 2.9664


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 395/400 - Loss: 2.9664


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 396/400 - Loss: 2.9674


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 397/400 - Loss: 2.9690


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 398/400 - Loss: 2.9652


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 399/400 - Loss: 2.9667


Batches:   0%|          | 0/360 [00:00<?, ?it/s]

Validation:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 400/400 - Loss: 2.9684
Finished Training at 20250324-140035 with best validation accuracy 0.0000
Testing model...


In [22]:
print(test_acc)

0
