In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, Dataset
from torchvision.datasets.utils import download_url
from torchvision.datasets.folder import default_loader
import os
import zipfile
import random
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F

def unfreeze(model):
    """
    Unfreezes all parameters of the given model, allowing them to be trainable.

    Args:
        model (torch.nn.Module): The model to unfreeze.
    """
    for param in model.parameters():
        param.requires_grad = True

def copy_and_freeze(student_layer, teacher_layer):
    """
    Copy weights and freeze parameters of the student layer based on the teacher layer.

    Args:
        student_layer: The layer in the student model to copy weights to.
        teacher_layer: The layer in the teacher model to copy weights from.
        frozen_params: A list to append frozen parameters for tracking.
    """
    student_layer.weight.data = teacher_layer.weight.data.clone()
    # freezing didn't do that good
    # student_layer.weight.requires_grad = False

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        # Reduced convolution complexity
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)

        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.downsample = downsample
        self.stride = stride

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out

class ResNet10(nn.Module):
    def __init__(self, num_classes=200):
        super(ResNet10, self).__init__()
        block = BasicBlock

        # Initial convolution layer with reduced kernel size
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Create layers with reduced depth
        self.layer1 = self._make_layer(block, 64, 64, 2, stride=1)
        self.layer2 = self._make_layer(block, 64, 128, 2, stride=2)
        self.layer3 = self._make_layer(block, 128, 256, 2, stride=2)
        self.layer4 = self._make_layer(block, 256, 512, 2, stride=2)

        # Global average pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # Final fully connected layer
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # Weight initialization
        self._initialize_weights()

    def _make_layer(self, block, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(in_channels, out_channels, stride, downsample))
        for _ in range(1, blocks):
            layers.append(block(out_channels * block.expansion, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

class ResNetUM(nn.Module):
    def __init__(self, teacher_model=None, ta_model=None, num_classes=200):
        super(ResNetUM, self).__init__()
        block = BasicBlock

        # Initial convolution layer with reduced kernel size
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Create layers with reduced depth
        self.layer1 = self._make_layer(block, 64, 64, 1, stride=1)
        self.layer2 = self._make_layer(block, 64, 128, 1, stride=2)
        self.layer3 = self._make_layer(block, 128, 256, 1, stride=2)
        self.layer4 = self._make_layer(block, 256, 512, 1, stride=2)

        # Global average pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # Final fully connected layer
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # Weight initialization
        self._initialize_weights()

        if teacher_model is not None:
            self._initialize_from_teacher(teacher_model, ta_model)

    def _make_layer(self, block, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(in_channels, out_channels, stride, downsample))
        for _ in range(1, blocks):
            layers.append(block(out_channels * block.expansion, out_channels))

        return nn.Sequential(*layers)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _initialize_from_teacher(self, teacher_model, ta_model):
        """
        Initialize weights from teacher model's first layers
        Allow for fine-tuning by keeping the initialized weights
        """
        # Copy initial convolution and batch norm
        copy_and_freeze(self.conv1, teacher_model.conv1)
        copy_and_freeze(self.bn1, teacher_model.bn1)

        # Copy last 3 layers from TA model
        layers_to_copy = [
            (self.layer2, ta_model.layer2),
            (self.layer3, ta_model.layer3),
            (self.layer4, ta_model.layer4),
        ]

        for student_layer, teacher_layer in layers_to_copy:
            # Iterate through each block in the layer
            for student_block, teacher_block in zip(student_layer, teacher_layer):
                # Copy conv weights
                copy_and_freeze(student_block.conv1, teacher_block.conv1)
                copy_and_freeze(student_block.conv2, teacher_block.conv2)

                # Copy batch norm weights
                copy_and_freeze(student_block.bn1, teacher_block.bn1)
                copy_and_freeze(student_block.bn2, teacher_block.bn2)

                # If downsample exists, copy its weights
                if student_block.downsample is not None and teacher_block.downsample is not None:
                    copy_and_freeze(student_block.downsample[0], teacher_block.downsample[0])
                    copy_and_freeze(student_block.downsample[1], teacher_block.downsample[1])

        # Copy fully connected layer weights
        copy_and_freeze(self.fc, teacher_model.fc)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

class ResNetUMTA1(nn.Module):
    def __init__(self, teacher_model=None, ta_model=None, num_classes=200):
        super(ResNetUMTA1, self).__init__()
        block = BasicBlock

        # Initial convolution layer with reduced kernel size
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Create layers with reduced depth
        self.layer1 = self._make_layer(block, 64, 64, 2, stride=1)
        self.layer2 = self._make_layer(block, 64, 128, 2, stride=2)
        self.layer3 = self._make_layer(block, 128, 256, 2, stride=2)
        self.layer4 = self._make_layer(block, 256, 512, 1, stride=2)

        # Global average pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # Final fully connected layer
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # Weight initialization
        self._initialize_weights()

        if teacher_model is not None:
            self._initialize_from_teacher(teacher_model, ta_model)

    def _make_layer(self, block, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(in_channels, out_channels, stride, downsample))
        for _ in range(1, blocks):
            layers.append(block(out_channels * block.expansion, out_channels))

        return nn.Sequential(*layers)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _initialize_from_teacher(self, teacher_model, ta_model):
        """
        Initialize weights from teacher model's first three layers
        Allow for fine-tuning by keeping the initialized weights
        """
        # Copy initial convolution and batch norm
        copy_and_freeze(self.conv1, teacher_model.conv1)
        copy_and_freeze(self.bn1, teacher_model.bn1)

        # Copy first three layers from teacher model
        layers_to_copy = [
            (self.layer1, teacher_model.layer1),
            (self.layer2, teacher_model.layer2),
            (self.layer3, teacher_model.layer3)
        ]

        for student_layer, teacher_layer in layers_to_copy:
            # Iterate through each block in the layer
            for student_block, teacher_block in zip(student_layer, teacher_layer):
                # Copy conv weights
                copy_and_freeze(student_block.conv1, teacher_block.conv1)
                copy_and_freeze(student_block.conv2, teacher_block.conv2)

                # Copy batch norm weights
                copy_and_freeze(student_block.bn1, teacher_block.bn1)
                copy_and_freeze(student_block.bn2, teacher_block.bn2)

                # If downsample exists, copy its weights
                if student_block.downsample is not None and teacher_block.downsample is not None:
                    copy_and_freeze(student_block.downsample[0], teacher_block.downsample[0])
                    copy_and_freeze(student_block.downsample[1], teacher_block.downsample[1])

        # Copy fully connected layer weights
        copy_and_freeze(self.fc, teacher_model.fc)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

class ResNetUMTA2(nn.Module):
    def __init__(self, teacher_model=None, ta_model=None, num_classes=200):
        super(ResNetUMTA2, self).__init__()
        block = BasicBlock

        # Initial convolution layer with reduced kernel size
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Create layers with reduced depth
        self.layer1 = self._make_layer(block, 64, 64, 2, stride=1)
        self.layer2 = self._make_layer(block, 64, 128, 2, stride=2)
        self.layer3 = self._make_layer(block, 128, 256, 1, stride=2)
        self.layer4 = self._make_layer(block, 256, 512, 1, stride=2)

        # Global average pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # Final fully connected layer
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # Weight initialization
        self._initialize_weights()

        if teacher_model is not None:
            self._initialize_from_teacher(teacher_model, ta_model)

    def _make_layer(self, block, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(in_channels, out_channels, stride, downsample))
        for _ in range(1, blocks):
            layers.append(block(out_channels * block.expansion, out_channels))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _initialize_from_teacher(self, teacher_model, ta_model):
        """
        Initialize weights from teacher model's first three layers
        Allow for fine-tuning by keeping the initialized weights
        """
        # Copy initial convolution and batch norm
        copy_and_freeze(self.conv1, teacher_model.conv1)
        copy_and_freeze(self.bn1, teacher_model.bn1)

        # Copy first two layers from teacher model
        layers_to_copy = [
            (self.layer1, teacher_model.layer1),
            (self.layer2, teacher_model.layer2),
        ]

        for student_layer, teacher_layer in layers_to_copy:
            # Iterate through each block in the layer
            for student_block, teacher_block in zip(student_layer, teacher_layer):
                # Copy conv weights
                copy_and_freeze(student_block.conv1, teacher_block.conv1)
                copy_and_freeze(student_block.conv2, teacher_block.conv2)

                # Copy batch norm weights
                copy_and_freeze(student_block.bn1, teacher_block.bn1)
                copy_and_freeze(student_block.bn2, teacher_block.bn2)

                # If downsample exists, copy its weights
                if student_block.downsample is not None and teacher_block.downsample is not None:
                    copy_and_freeze(student_block.downsample[0], teacher_block.downsample[0])
                    copy_and_freeze(student_block.downsample[1], teacher_block.downsample[1])

        # Copy last layer from TA model
        layers_to_copy = [
            (self.layer4, ta_model.layer4),
        ]

        for student_layer, teacher_layer in layers_to_copy:
            # Iterate through each block in the layer
            for student_block, teacher_block in zip(student_layer, teacher_layer):
                # Copy conv weights
                copy_and_freeze(student_block.conv1, teacher_block.conv1)
                copy_and_freeze(student_block.conv2, teacher_block.conv2)

                # Copy batch norm weights
                copy_and_freeze(student_block.bn1, teacher_block.bn1)
                copy_and_freeze(student_block.bn2, teacher_block.bn2)

                # If downsample exists, copy its weights
                if student_block.downsample is not None and teacher_block.downsample is not None:
                    copy_and_freeze(student_block.downsample[0], teacher_block.downsample[0])
                    copy_and_freeze(student_block.downsample[1], teacher_block.downsample[1])

        # Copy fully connected layer weights
        copy_and_freeze(self.fc, teacher_model.fc)

class ResNetUMTA3(nn.Module):
    def __init__(self, teacher_model=None, ta_model=None, num_classes=200):
        super(ResNetUMTA3, self).__init__()
        block = BasicBlock

        # Initial convolution layer with reduced kernel size
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        # Create layers with reduced depth
        self.layer1 = self._make_layer(block, 64, 64, 2, stride=1)
        self.layer2 = self._make_layer(block, 64, 128, 1, stride=2)
        self.layer3 = self._make_layer(block, 128, 256, 1, stride=2)
        self.layer4 = self._make_layer(block, 256, 512, 1, stride=2)

        # Global average pooling
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        # Final fully connected layer
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        # Weight initialization
        self._initialize_weights()

        if teacher_model is not None:
            self._initialize_from_teacher(teacher_model, ta_model)

    def _make_layer(self, block, in_channels, out_channels, blocks, stride=1):
        downsample = None
        if stride != 1 or in_channels != out_channels * block.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels, out_channels * block.expansion, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels * block.expansion)
            )

        layers = []
        layers.append(block(in_channels, out_channels, stride, downsample))
        for _ in range(1, blocks):
            layers.append(block(out_channels * block.expansion, out_channels))

        return nn.Sequential(*layers)

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    def _initialize_from_teacher(self, teacher_model, ta_model):
        """
        Initialize weights from teacher model's first three layers
        Allow for fine-tuning by keeping the initialized weights
        """
        # Copy initial convolution and batch norm
        copy_and_freeze(self.conv1, teacher_model.conv1)
        copy_and_freeze(self.bn1, teacher_model.bn1)

        # Copy first layer from teacher model
        layers_to_copy = [
            (self.layer1, teacher_model.layer1),
        ]

        for student_layer, teacher_layer in layers_to_copy:
            # Iterate through each block in the layer
            for student_block, teacher_block in zip(student_layer, teacher_layer):
                # Copy conv weights
                copy_and_freeze(student_block.conv1, teacher_block.conv1)
                copy_and_freeze(student_block.conv2, teacher_block.conv2)

                # Copy batch norm weights
                copy_and_freeze(student_block.bn1, teacher_block.bn1)
                copy_and_freeze(student_block.bn2, teacher_block.bn2)

                # If downsample exists, copy its weights
                if student_block.downsample is not None and teacher_block.downsample is not None:
                    copy_and_freeze(student_block.downsample[0], teacher_block.downsample[0])
                    copy_and_freeze(student_block.downsample[1], teacher_block.downsample[1])

        # Copy last 2 layers from TA model
        layers_to_copy = [
            (self.layer3, ta_model.layer3),
            (self.layer4, ta_model.layer4),
        ]

        for student_layer, teacher_layer in layers_to_copy:
            # Iterate through each block in the layer
            for student_block, teacher_block in zip(student_layer, teacher_layer):
                # Copy conv weights
                copy_and_freeze(student_block.conv1, teacher_block.conv1)
                copy_and_freeze(student_block.conv2, teacher_block.conv2)

                # Copy batch norm weights
                copy_and_freeze(student_block.bn1, teacher_block.bn1)
                copy_and_freeze(student_block.bn2, teacher_block.bn2)

                # If downsample exists, copy its weights
                if student_block.downsample is not None and teacher_block.downsample is not None:
                    copy_and_freeze(student_block.downsample[0], teacher_block.downsample[0])
                    copy_and_freeze(student_block.downsample[1], teacher_block.downsample[1])

        # Copy fully connected layer weights
        copy_and_freeze(self.fc, teacher_model.fc)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)

        return x

# Ensure reproducibility
torch.manual_seed(0)
random.seed(0)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Define paths
data_dir = './tiny-imagenet-200'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')

# Download and extract Tiny ImageNet dataset
def download_and_extract_tiny_imagenet():
    if not os.path.exists(data_dir):
        os.makedirs(data_dir, exist_ok=True)
        url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'
        filename = 'tiny-imagenet-200.zip'
        zip_path = os.path.join('./', filename)
        print('Downloading Tiny ImageNet dataset...')
        download_url(url, root='./', filename=filename)
        print('Extracting Tiny ImageNet dataset...')
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall('./')
        os.remove(zip_path)
        print('Dataset downloaded and extracted.')
    else:
        print('Tiny ImageNet dataset already exists.')

download_and_extract_tiny_imagenet()

# Prepare validation data
def prepare_val_folder():
    val_img_dir = os.path.join(val_dir, 'images')
    if not os.path.exists(val_img_dir):
        return
    # Read val annotations file
    val_annotations_file = os.path.join(val_dir, 'val_annotations.txt')
    val_img_dict = {}
    with open(val_annotations_file, 'r') as f:
        for line in f.readlines():
            parts = line.strip().split('\t')
            img_name = parts[0]
            img_class = parts[1]
            val_img_dict[img_name] = img_class

    # Create folders for validation images
    print('Organizing validation images...')
    for img, cls in tqdm(val_img_dict.items()):
        cls_dir = os.path.join(val_dir, cls)
        if not os.path.exists(cls_dir):
            os.mkdir(cls_dir)
            os.mkdir(os.path.join(cls_dir, 'images'))
        img_src = os.path.join(val_dir, 'images', img)
        img_dst = os.path.join(cls_dir, 'images', img)
        if os.path.exists(img_src):
            os.rename(img_src, img_dst)
    os.rmdir(os.path.join(val_dir, 'images'))
    print('Validation images organized.')

prepare_val_folder()

# Define data transformations
transform_train = transforms.Compose([
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Create custom dataset class
class TinyImageNetDataset(Dataset):
    def __init__(self, root, train=True, transform=None):
        self.root = root
        self.transform = transform
        self.images = []
        self.labels = []
        self.train = train
        self._load_data()

    def _load_data(self):
        if self.train:
            data_dir = os.path.join(self.root, 'train')
        else:
            data_dir = os.path.join(self.root, 'val')
        classes = sorted(os.listdir(data_dir))
        class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}
        for cls_name in classes:
            cls_dir = os.path.join(data_dir, cls_name, 'images')
            if not os.path.isdir(cls_dir):
                continue
            img_files = os.listdir(cls_dir)
            for img_name in img_files:
                img_path = os.path.join(cls_dir, img_name)
                self.images.append(img_path)
                self.labels.append(class_to_idx[cls_name])
        self.classes = classes
        self.class_to_idx = class_to_idx

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = default_loader(img_path)
        if self.transform:
            image = self.transform(image)
        return image, label

# Load datasets
train_dataset = TinyImageNetDataset(root=data_dir, train=True, transform=transform_train)
test_dataset = TinyImageNetDataset(root=data_dir, train=False, transform=transform_test)

# Set a larger batch size
batch_size = 256  # Adjust this value based on your GPU memory
print('Batch size:', batch_size)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Define function to create and modify ResNet models
def create_resnet_model(name, num_classes=200, pretrained=True, teacher_model=None, ta_model=None):
    if name == 'resnetum':
        model = ResNetUM(num_classes=num_classes, teacher_model=teacher_model, ta_model=ta_model)
    elif name == 'resnetum_ta1':
        model = ResNetUMTA1(num_classes=num_classes, teacher_model=teacher_model, ta_model=ta_model)
    elif name == 'resnetum_ta2':
        model = ResNetUMTA2(num_classes=num_classes, teacher_model=teacher_model, ta_model=ta_model)
    elif name == 'resnetum_ta3':
        model = ResNetUMTA3(num_classes=num_classes, teacher_model=teacher_model, ta_model=ta_model)
    elif name == 'resnet10':
        model = ResNet10(num_classes=num_classes)
    elif name == 'resnet18':
        model = torchvision.models.resnet18(pretrained=pretrained)
    elif name == 'resnet34':
        model = torchvision.models.resnet34(pretrained=pretrained)
    elif name == 'resnet101':
        model = torchvision.models.resnet101(pretrained=pretrained)
    else:
        raise ValueError('Invalid model name')

    # Modify the final layer to match num_classes
    if name != 'resnet10':
        num_ftrs = model.fc.in_features
        model.fc = nn.Linear(num_ftrs, num_classes)

    return model

from torch.cuda.amp import autocast, GradScaler

# Function to train a model normally (used for teacher model)
def train_model(model, train_loader, test_loader, num_epochs=10, base_lr=0.1, device='cuda', save_path='best_model.pth'):
    criterion = nn.CrossEntropyLoss()
    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)
    optimizer = optim.SGD(model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    model.to(device)

    best_acc = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Function for knowledge distillation from teacher to student
def train_kd(student_model, teacher_model, train_loader, test_loader, num_epochs=10, base_lr=0.1, temperature=4, alpha=0.9, device='cuda', save_path='best_student_model.pth'):
    criterion = nn.CrossEntropyLoss()
    soft_loss_fn = nn.KLDivLoss(reduction='batchmean')

    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)

    optimizer = optim.SGD(student_model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    student_model.to(device)
    teacher_model.to(device)
    teacher_model.eval()

    best_acc = 0

    for epoch in range(num_epochs):
        student_model.train()
        running_loss = 0.0

        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()

            with autocast():
                outputs = student_model(inputs)
                with torch.no_grad():
                    teacher_outputs = teacher_model(inputs)

                loss_ce = criterion(outputs, targets)
                loss_kd = soft_loss_fn(F.log_softmax(outputs/temperature, dim=1),
                                       F.softmax(teacher_outputs/temperature, dim=1)) * (temperature ** 2)

                loss = alpha * loss_kd + (1 - alpha) * loss_ce

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        student_model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = student_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the student model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(student_model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Function for knowledge distillation with both teacher and TA (simple average)
def train_kd_with_ta(student_model, teacher_model, ta_model, train_loader, test_loader, num_epochs=10, base_lr=0.1, temperature=4, alpha=0.9, device='cuda', save_path='best_student_model.pth'):
    criterion = nn.CrossEntropyLoss()
    soft_loss_fn = nn.KLDivLoss(reduction='batchmean')

    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)

    optimizer = optim.SGD(student_model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    student_model.to(device)
    teacher_model.to(device)
    teacher_model.eval()
    ta_model.to(device)
    ta_model.eval()

    best_acc = 0

    for epoch in range(num_epochs):
        student_model.train()
        running_loss = 0.0

        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()

            with autocast():
                outputs = student_model(inputs)
                with torch.no_grad():
                    teacher_outputs = teacher_model(inputs)
                    ta_outputs = ta_model(inputs)
                    # Average the softmax outputs
                    avg_outputs = (F.softmax(teacher_outputs/temperature, dim=1) + F.softmax(ta_outputs/temperature, dim=1)) / 2

                loss_ce = criterion(outputs, targets)
                loss_kd = soft_loss_fn(F.log_softmax(outputs/temperature, dim=1),
                                       avg_outputs) * (temperature ** 2)

                loss = alpha * loss_kd + (1 - alpha) * loss_ce

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        student_model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = student_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the student model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(student_model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Function for the new distillation algorithm
def train_kd_new_algorithm(student_model, teacher_model, ta_model, train_loader, test_loader, num_epochs=10, base_lr=0.1, temp=5, alpha=0.9, device='cuda', save_path='best_student_model.pth'):
    criterion = nn.CrossEntropyLoss(reduction='none')  # per-sample loss
    kl_criterion = nn.KLDivLoss(reduction='none')  # per-sample loss

    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)

    optimizer = optim.SGD(student_model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    student_model.to(device)
    teacher_model.to(device)
    teacher_model.eval()
    ta_model.to(device)
    ta_model.eval()

    best_acc = 0

    for epoch in range(num_epochs):
        # if epoch > 0:
        #     print("UNFREEZING")
            unfreeze(student_model)
        student_model.train()
        running_loss = 0.0

        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data = data.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)

            optimizer.zero_grad()

            with autocast():
                output = student_model(data)
                with torch.no_grad():
                    teacher_outputs = teacher_model(data)
                    ta_outputs = ta_model(data)

                # Standard Learning Loss (Classification Loss)
                loss_SL = criterion(output, target)  # shape: [batch_size]
                hard_loss = loss_SL

                # Implement the new distillation algorithm
                # Compute per-sample cross-entropy losses for teacher and TA
                ce_teacher = criterion(teacher_outputs, target)  # shape: [batch_size]
                ce_ta = criterion(ta_outputs, target)  # shape: [batch_size]

                # Compute negative ce
                neg_ce_teacher = -ce_teacher
                neg_ce_ta = -ce_ta

                # Stack negative ce to compute confidence scores
                neg_ce = torch.stack([neg_ce_teacher, neg_ce_ta], dim=1)  # shape: [batch_size, 2]

                # Compute confidence scores
                conf_scores = F.softmax(neg_ce, dim=1)  # shape: [batch_size, 2]

                conf_teacher = conf_scores[:, 0]  # shape: [batch_size]
                conf_ta = conf_scores[:, 1]  # shape: [batch_size]

                # Compute softmax outputs for teacher and TA
                teacher_pred = F.softmax(teacher_outputs / temp, dim=1)  # shape: [batch_size, num_classes]
                ta_pred = F.softmax(ta_outputs / temp, dim=1)

                # Compute KL divergence between teacher and TA
                kl_teacher_ta = kl_criterion(
                    F.log_softmax(teacher_outputs / temp, dim=1),
                    ta_pred
                ).sum(dim=1)  # shape: [batch_size]

                # Compute kl_factor
                kl_factor = torch.sigmoid(kl_teacher_ta)  # shape: [batch_size]

                # Compute final weights
                w_teacher = (1 - kl_factor) * 0.5 + kl_factor * conf_teacher  # shape: [batch_size]
                w_ta = (1 - kl_factor) * 0.5 + kl_factor * conf_ta  # shape: [batch_size]

                # Compute KL divergence between student and teacher
                kl_student_teacher = kl_criterion(
                    F.log_softmax(output / temp, dim=1),
                    teacher_pred
                ).sum(dim=1)  # shape: [batch_size]

                kl_student_ta = kl_criterion(
                    F.log_softmax(output / temp, dim=1),
                    ta_pred
                ).sum(dim=1)  # shape: [batch_size]

                # Compute soft losses
                soft_loss_teacher = w_teacher * kl_student_teacher * (temp ** 2)  # shape: [batch_size]
                soft_loss_ta = w_ta * kl_student_ta * (temp ** 2)  # shape: [batch_size]

                # Compute total_loss per sample
                total_loss = alpha * (soft_loss_teacher + soft_loss_ta) + (1 - alpha) * hard_loss  # shape: [batch_size]

                # Compute loss as average over batch
                loss = total_loss.mean()

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        student_model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = student_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the student model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(student_model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Set up device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Clear cache
torch.cuda.empty_cache()


# Load the Teacher Model (ResNet-101)
print('Loading Teacher Model (ResNet-101)')
teacher_model = create_resnet_model('resnet101', num_classes=200, pretrained=False)
teacher_model.load_state_dict(torch.load('/content/best_model.pth'))
teacher_model = teacher_model.to(device)
teacher_model.eval()

print('Loading TA Model (ResNet-10)')
ta_model = create_resnet_model('resnet10', num_classes=200, pretrained=False)
ta_model.load_state_dict(torch.load('/content/resnet_10_tf.pth'))
ta_model = ta_model.to(device)
ta_model.eval()

# these are control test comparisons

print("NO KD")
no_kd_um = create_resnet_model('resnetum', num_classes=200, pretrained=False)
no_kd_um = no_kd_um.to(device)
no_kd_um_best_acc = train_model(no_kd_um, train_loader, test_loader, num_epochs=40, base_lr=0.1, device=device, save_path='no_kd_um.pth')

print("STANDARD KD")
standard_kd_um = create_resnet_model('resnetum', num_classes=200, pretrained=False)
standard_kd_um = standard_kd_um.to(device)
standard_kd_um_best_acc = train_kd(standard_kd_um, teacher_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temperature=4, alpha=0.9, device=device, save_path='standard_kd_um.pth')

print("TA")
student_model_alg3 = create_resnet_model('resnetum', num_classes=200, pretrained=False)
student_model_alg3 = student_model_alg3.to(device)
student_best_acc_alg3 = train_kd(student_model_alg3, ta_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temperature=4, alpha=0.9, device=device, save_path='student_model_alg3.pth')

print("Average")
student_model_alg2 = create_resnet_model('resnetum', num_classes=200, pretrained=False)
student_model_alg2 = student_model_alg2.to(device)
student_best_acc_alg2 = train_kd_with_ta(student_model_alg2, teacher_model, ta_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temperature=4, alpha=0.9, device=device, save_path='student_model_alg2.pth')

print("DREAM Framework: Dynamic Weighting")
student_model_alg1 = create_resnet_model('resnetum', num_classes=200, pretrained=False)
student_model_alg1 = student_model_alg1.to(device)
student_best_acc_alg1 = train_kd_with_ta(student_model_alg1, teacher_model, ta_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temperature=4, alpha=0.9, device=device, save_path='student_model_alg1.pth')

print("DREAM Framework: Dynamic Weighting + Section Wise Bottom Up")

print("TA1")
interim_ta1 = create_resnet_model('resnetum_ta1', num_classes=200, pretrained=False, teacher_model=ta_model, ta_model=None)
interim_ta1 = interim_ta1.to(device)
interim_ta1_best_acc = train_kd_new_algorithm(interim_ta1, teacher_model, ta_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temp=5, alpha=0.9, device=device, save_path='interim_ta1.pth')

print("TA2")
interim_ta2 = create_resnet_model('resnetum_ta2', num_classes=200, pretrained=False, teacher_model=ta_model, ta_model=interim_ta1)
interim_ta2 = interim_ta2.to(device)
interim_ta2_best_acc = train_kd_new_algorithm(interim_ta2, teacher_model, interim_ta1, train_loader, test_loader, num_epochs=40, base_lr=0.1, temp=5, alpha=0.9, device=device, save_path='interim_ta2.pth')

print("TA3")
interim_ta3 = create_resnet_model('resnetum_ta3', num_classes=200, pretrained=False, teacher_model=ta_model, ta_model=interim_ta2)
interim_ta3 = interim_ta3.to(device)
interim_ta3_best_acc = train_kd_new_algorithm(interim_ta3, teacher_model, interim_ta2, train_loader, test_loader, num_epochs=40, base_lr=0.1, temp=5, alpha=0.9, device=device, save_path='interim_ta3.pth')

print("Final")
final_model = create_resnet_model('resnetum', num_classes=200, pretrained=False,  teacher_model=ta_model, ta_model=interim_ta3)
final_model = final_model.to(device)
final_model_best_acc = train_kd_new_algorithm(final_model, teacher_model, interim_ta3, train_loader, test_loader, num_epochs=40, base_lr=0.1, temp=5, alpha=0.9, device=device, save_path='final_model.pth')

Using device: cuda
Tiny ImageNet dataset already exists.
Batch size: 256
Using device: cuda
Loading Teacher Model (ResNet-101)


  teacher_model.load_state_dict(torch.load('/content/best_model.pth'))


Loading TA Model (ResNet-10)


  ta_model.load_state_dict(torch.load('/content/resnet_10_tf.pth'))
  scaler = GradScaler()


Section-wise
TA1
Adjusted learning rate: 0.1


  with autocast():
  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [1/40], Step [10/391], Loss: 6.7759


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [1/40], Step [20/391], Loss: 6.3401


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [1/40], Step [30/391], Loss: 5.5719


 10%|█         | 41/391 [00:07<00:57,  6.05it/s]

Epoch [1/40], Step [40/391], Loss: 5.0250


 13%|█▎        | 51/391 [00:09<00:56,  6.05it/s]

Epoch [1/40], Step [50/391], Loss: 4.7170


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [1/40], Step [60/391], Loss: 4.5026


 18%|█▊        | 71/391 [00:12<00:53,  6.03it/s]

Epoch [1/40], Step [70/391], Loss: 4.1670


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [1/40], Step [80/391], Loss: 4.0963


 23%|██▎       | 91/391 [00:16<00:49,  6.04it/s]

Epoch [1/40], Step [90/391], Loss: 3.8115


 26%|██▌       | 101/391 [00:17<00:47,  6.04it/s]

Epoch [1/40], Step [100/391], Loss: 3.6582


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [1/40], Step [110/391], Loss: 3.4595


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [1/40], Step [120/391], Loss: 3.3752


 34%|███▎      | 131/391 [00:22<00:43,  6.03it/s]

Epoch [1/40], Step [130/391], Loss: 3.3444


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [1/40], Step [140/391], Loss: 3.1815


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [1/40], Step [150/391], Loss: 3.0520


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [1/40], Step [160/391], Loss: 3.0339


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [1/40], Step [170/391], Loss: 2.9157


 46%|████▋     | 181/391 [00:30<00:34,  6.04it/s]

Epoch [1/40], Step [180/391], Loss: 2.9318


 49%|████▉     | 191/391 [00:32<00:33,  6.05it/s]

Epoch [1/40], Step [190/391], Loss: 2.9230


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [1/40], Step [200/391], Loss: 2.8306


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [1/40], Step [210/391], Loss: 2.6649


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [1/40], Step [220/391], Loss: 2.6573


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [1/40], Step [230/391], Loss: 2.6897


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [1/40], Step [240/391], Loss: 2.5923


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [1/40], Step [250/391], Loss: 2.4845


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [1/40], Step [260/391], Loss: 2.4885


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [1/40], Step [270/391], Loss: 2.5003


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [1/40], Step [280/391], Loss: 2.4161


 74%|███████▍  | 291/391 [00:49<00:16,  6.06it/s]

Epoch [1/40], Step [290/391], Loss: 2.3600


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [1/40], Step [300/391], Loss: 2.4527


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [1/40], Step [310/391], Loss: 2.3844


 82%|████████▏ | 321/391 [00:53<00:11,  6.04it/s]

Epoch [1/40], Step [320/391], Loss: 2.3002


 85%|████████▍ | 331/391 [00:55<00:09,  6.03it/s]

Epoch [1/40], Step [330/391], Loss: 2.3026


 87%|████████▋ | 341/391 [00:57<00:08,  6.04it/s]

Epoch [1/40], Step [340/391], Loss: 2.2025


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [1/40], Step [350/391], Loss: 2.2448


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [1/40], Step [360/391], Loss: 2.2025


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [1/40], Step [370/391], Loss: 2.2226


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [1/40], Step [380/391], Loss: 2.1900


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [1/40], Step [390/391], Loss: 2.0904



  with autocast():


Test Accuracy of the student model on the test images: 44.26 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [2/40], Step [10/391], Loss: 2.5183


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [2/40], Step [20/391], Loss: 3.1346


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [2/40], Step [30/391], Loss: 3.2320


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [2/40], Step [40/391], Loss: 3.2859


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [2/40], Step [50/391], Loss: 3.1233


 16%|█▌        | 61/391 [00:11<00:57,  5.72it/s]

Epoch [2/40], Step [60/391], Loss: 3.0070


 18%|█▊        | 71/391 [00:13<00:55,  5.72it/s]

Epoch [2/40], Step [70/391], Loss: 2.8634


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [2/40], Step [80/391], Loss: 2.9157


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [2/40], Step [90/391], Loss: 2.7908


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [2/40], Step [100/391], Loss: 2.8654


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [2/40], Step [110/391], Loss: 2.6700


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [2/40], Step [120/391], Loss: 2.6686


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [2/40], Step [130/391], Loss: 2.5835


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [2/40], Step [140/391], Loss: 2.5712


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [2/40], Step [150/391], Loss: 2.5266


 41%|████      | 161/391 [00:28<00:40,  5.73it/s]

Epoch [2/40], Step [160/391], Loss: 2.4524


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [2/40], Step [170/391], Loss: 2.4672


 46%|████▋     | 181/391 [00:32<00:36,  5.72it/s]

Epoch [2/40], Step [180/391], Loss: 2.3987


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [2/40], Step [190/391], Loss: 2.4046


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [2/40], Step [200/391], Loss: 2.3914


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [2/40], Step [210/391], Loss: 2.4164


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [2/40], Step [220/391], Loss: 2.3533


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [2/40], Step [230/391], Loss: 2.3214


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [2/40], Step [240/391], Loss: 2.3139


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [2/40], Step [250/391], Loss: 2.3586


 67%|██████▋   | 261/391 [00:46<00:22,  5.72it/s]

Epoch [2/40], Step [260/391], Loss: 2.3135


 69%|██████▉   | 271/391 [00:48<00:21,  5.64it/s]

Epoch [2/40], Step [270/391], Loss: 2.2633


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [2/40], Step [280/391], Loss: 2.2093


 74%|███████▍  | 291/391 [00:51<00:17,  5.72it/s]

Epoch [2/40], Step [290/391], Loss: 2.3600


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [2/40], Step [300/391], Loss: 2.2603


 80%|███████▉  | 311/391 [00:55<00:13,  5.75it/s]

Epoch [2/40], Step [310/391], Loss: 2.2823


 82%|████████▏ | 321/391 [00:56<00:12,  5.72it/s]

Epoch [2/40], Step [320/391], Loss: 2.2417


 85%|████████▍ | 331/391 [00:58<00:10,  5.75it/s]

Epoch [2/40], Step [330/391], Loss: 2.2397


 87%|████████▋ | 341/391 [01:00<00:08,  5.75it/s]

Epoch [2/40], Step [340/391], Loss: 2.1608


 90%|████████▉ | 351/391 [01:02<00:07,  5.71it/s]

Epoch [2/40], Step [350/391], Loss: 2.1616


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [2/40], Step [360/391], Loss: 2.1528


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [2/40], Step [370/391], Loss: 2.1466


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [2/40], Step [380/391], Loss: 2.1226


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [2/40], Step [390/391], Loss: 2.1426





Test Accuracy of the student model on the test images: 37.26 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [3/40], Step [10/391], Loss: 2.0954


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [3/40], Step [20/391], Loss: 2.0238


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [3/40], Step [30/391], Loss: 2.0269


 10%|█         | 41/391 [00:07<01:01,  5.73it/s]

Epoch [3/40], Step [40/391], Loss: 2.0620


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [3/40], Step [50/391], Loss: 2.0520


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [3/40], Step [60/391], Loss: 2.0685


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [3/40], Step [70/391], Loss: 1.9816


 21%|██        | 81/391 [00:14<00:54,  5.74it/s]

Epoch [3/40], Step [80/391], Loss: 2.0098


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [3/40], Step [90/391], Loss: 1.9958


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [3/40], Step [100/391], Loss: 1.9805


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [3/40], Step [110/391], Loss: 1.9994


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [3/40], Step [120/391], Loss: 1.9824


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [3/40], Step [130/391], Loss: 1.9875


 36%|███▌      | 141/391 [00:25<00:43,  5.72it/s]

Epoch [3/40], Step [140/391], Loss: 1.9777


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [3/40], Step [150/391], Loss: 1.9987


 41%|████      | 161/391 [00:28<00:40,  5.72it/s]

Epoch [3/40], Step [160/391], Loss: 2.0149


 44%|████▎     | 171/391 [00:30<00:38,  5.72it/s]

Epoch [3/40], Step [170/391], Loss: 1.9719


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [3/40], Step [180/391], Loss: 1.9830


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [3/40], Step [190/391], Loss: 1.9491


 51%|█████▏    | 201/391 [00:35<00:33,  5.67it/s]

Epoch [3/40], Step [200/391], Loss: 2.0315


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [3/40], Step [210/391], Loss: 1.9542


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [3/40], Step [220/391], Loss: 1.9449


 59%|█████▉    | 231/391 [00:41<00:27,  5.72it/s]

Epoch [3/40], Step [230/391], Loss: 1.8997


 62%|██████▏   | 241/391 [00:42<00:26,  5.72it/s]

Epoch [3/40], Step [240/391], Loss: 1.8754


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [3/40], Step [250/391], Loss: 1.8668


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [3/40], Step [260/391], Loss: 1.8985


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [3/40], Step [270/391], Loss: 1.8939


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [3/40], Step [280/391], Loss: 1.8670


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [3/40], Step [290/391], Loss: 1.8365


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [3/40], Step [300/391], Loss: 1.8860


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [3/40], Step [310/391], Loss: 1.8893


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [3/40], Step [320/391], Loss: 1.8661


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [3/40], Step [330/391], Loss: 1.8837


 87%|████████▋ | 341/391 [01:00<00:08,  5.72it/s]

Epoch [3/40], Step [340/391], Loss: 1.8536


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [3/40], Step [350/391], Loss: 1.8779


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [3/40], Step [360/391], Loss: 1.8896


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [3/40], Step [370/391], Loss: 1.7955


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [3/40], Step [380/391], Loss: 1.8068


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [3/40], Step [390/391], Loss: 1.8485





Test Accuracy of the student model on the test images: 45.08 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.52it/s]

Epoch [4/40], Step [10/391], Loss: 1.7580


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [4/40], Step [20/391], Loss: 1.7699


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [4/40], Step [30/391], Loss: 1.7098


 10%|█         | 41/391 [00:07<01:00,  5.74it/s]

Epoch [4/40], Step [40/391], Loss: 1.7396


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [4/40], Step [50/391], Loss: 1.7693


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [4/40], Step [60/391], Loss: 1.7117


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [4/40], Step [70/391], Loss: 1.7237


 21%|██        | 81/391 [00:14<00:54,  5.74it/s]

Epoch [4/40], Step [80/391], Loss: 1.7773


 23%|██▎       | 91/391 [00:16<00:52,  5.72it/s]

Epoch [4/40], Step [90/391], Loss: 1.6938


 26%|██▌       | 101/391 [00:18<00:50,  5.72it/s]

Epoch [4/40], Step [100/391], Loss: 1.7230


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [4/40], Step [110/391], Loss: 1.7275


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [4/40], Step [120/391], Loss: 1.7498


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [4/40], Step [130/391], Loss: 1.7493


 36%|███▌      | 141/391 [00:25<00:43,  5.75it/s]

Epoch [4/40], Step [140/391], Loss: 1.7490


 39%|███▊      | 151/391 [00:27<00:41,  5.71it/s]

Epoch [4/40], Step [150/391], Loss: 1.7655


 41%|████      | 161/391 [00:28<00:40,  5.71it/s]

Epoch [4/40], Step [160/391], Loss: 1.7505


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [4/40], Step [170/391], Loss: 1.7060


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [4/40], Step [180/391], Loss: 1.7601


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [4/40], Step [190/391], Loss: 1.7239


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [4/40], Step [200/391], Loss: 1.7448


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [4/40], Step [210/391], Loss: 1.7427


 57%|█████▋    | 221/391 [00:39<00:29,  5.72it/s]

Epoch [4/40], Step [220/391], Loss: 1.7439


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [4/40], Step [230/391], Loss: 1.7004


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [4/40], Step [240/391], Loss: 1.7294


 64%|██████▍   | 251/391 [00:44<00:24,  5.75it/s]

Epoch [4/40], Step [250/391], Loss: 1.6807


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [4/40], Step [260/391], Loss: 1.7138


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [4/40], Step [270/391], Loss: 1.6768


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [4/40], Step [280/391], Loss: 1.6916


 74%|███████▍  | 291/391 [00:51<00:17,  5.71it/s]

Epoch [4/40], Step [290/391], Loss: 1.6779


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [4/40], Step [300/391], Loss: 1.6418


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [4/40], Step [310/391], Loss: 1.6886


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [4/40], Step [320/391], Loss: 1.6616


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [4/40], Step [330/391], Loss: 1.6780


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [4/40], Step [340/391], Loss: 1.6504


 90%|████████▉ | 351/391 [01:01<00:06,  5.74it/s]

Epoch [4/40], Step [350/391], Loss: 1.7325


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [4/40], Step [360/391], Loss: 1.6914


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [4/40], Step [370/391], Loss: 1.7189


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [4/40], Step [380/391], Loss: 1.6880


100%|██████████| 391/391 [01:08<00:00,  5.67it/s]

Epoch [4/40], Step [390/391], Loss: 1.7492





Test Accuracy of the student model on the test images: 43.52 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [5/40], Step [10/391], Loss: 1.6590


  5%|▌         | 21/391 [00:04<01:04,  5.70it/s]

Epoch [5/40], Step [20/391], Loss: 1.5941


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [5/40], Step [30/391], Loss: 1.5977


 10%|█         | 41/391 [00:08<01:01,  5.72it/s]

Epoch [5/40], Step [40/391], Loss: 1.6087


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [5/40], Step [50/391], Loss: 1.5818


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [5/40], Step [60/391], Loss: 1.5643


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [5/40], Step [70/391], Loss: 1.5954


 21%|██        | 81/391 [00:15<00:54,  5.72it/s]

Epoch [5/40], Step [80/391], Loss: 1.5961


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [5/40], Step [90/391], Loss: 1.6008


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [5/40], Step [100/391], Loss: 1.6006


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [5/40], Step [110/391], Loss: 1.6163


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [5/40], Step [120/391], Loss: 1.6081


 34%|███▎      | 131/391 [00:23<00:45,  5.75it/s]

Epoch [5/40], Step [130/391], Loss: 1.5609


 36%|███▌      | 141/391 [00:25<00:43,  5.71it/s]

Epoch [5/40], Step [140/391], Loss: 1.5683


 39%|███▊      | 151/391 [00:27<00:41,  5.72it/s]

Epoch [5/40], Step [150/391], Loss: 1.6281


 41%|████      | 161/391 [00:29<00:40,  5.73it/s]

Epoch [5/40], Step [160/391], Loss: 1.5570


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [5/40], Step [170/391], Loss: 1.5745


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [5/40], Step [180/391], Loss: 1.5196


 49%|████▉     | 191/391 [00:34<00:34,  5.72it/s]

Epoch [5/40], Step [190/391], Loss: 1.5800


 51%|█████▏    | 201/391 [00:36<00:33,  5.75it/s]

Epoch [5/40], Step [200/391], Loss: 1.5507


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [5/40], Step [210/391], Loss: 1.5596


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [5/40], Step [220/391], Loss: 1.5556


 59%|█████▉    | 231/391 [00:41<00:27,  5.72it/s]

Epoch [5/40], Step [230/391], Loss: 1.6067


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [5/40], Step [240/391], Loss: 1.5642


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [5/40], Step [250/391], Loss: 1.6047


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [5/40], Step [260/391], Loss: 1.5376


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [5/40], Step [270/391], Loss: 1.5799


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [5/40], Step [280/391], Loss: 1.6325


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [5/40], Step [290/391], Loss: 1.6308


 77%|███████▋  | 301/391 [00:53<00:15,  5.75it/s]

Epoch [5/40], Step [300/391], Loss: 1.5745


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [5/40], Step [310/391], Loss: 1.5595


 82%|████████▏ | 321/391 [00:56<00:12,  5.75it/s]

Epoch [5/40], Step [320/391], Loss: 1.5725


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [5/40], Step [330/391], Loss: 1.6225


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [5/40], Step [340/391], Loss: 1.6191


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [5/40], Step [350/391], Loss: 1.5889


 92%|█████████▏| 361/391 [01:03<00:05,  5.72it/s]

Epoch [5/40], Step [360/391], Loss: 1.5922


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [5/40], Step [370/391], Loss: 1.5925


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [5/40], Step [380/391], Loss: 1.5789


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [5/40], Step [390/391], Loss: 1.5768





Test Accuracy of the student model on the test images: 45.75 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [6/40], Step [10/391], Loss: 1.4990


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [6/40], Step [20/391], Loss: 1.4493


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [6/40], Step [30/391], Loss: 1.4675


 10%|█         | 41/391 [00:07<01:00,  5.74it/s]

Epoch [6/40], Step [40/391], Loss: 1.4463


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [6/40], Step [50/391], Loss: 1.4547


 16%|█▌        | 61/391 [00:11<00:57,  5.75it/s]

Epoch [6/40], Step [60/391], Loss: 1.4701


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [6/40], Step [70/391], Loss: 1.5055


 21%|██        | 81/391 [00:14<00:54,  5.72it/s]

Epoch [6/40], Step [80/391], Loss: 1.4673


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [6/40], Step [90/391], Loss: 1.4695


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [6/40], Step [100/391], Loss: 1.4615


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [6/40], Step [110/391], Loss: 1.5203


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [6/40], Step [120/391], Loss: 1.4895


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [6/40], Step [130/391], Loss: 1.5271


 36%|███▌      | 141/391 [00:25<00:43,  5.71it/s]

Epoch [6/40], Step [140/391], Loss: 1.4857


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [6/40], Step [150/391], Loss: 1.4970


 41%|████      | 161/391 [00:28<00:40,  5.73it/s]

Epoch [6/40], Step [160/391], Loss: 1.5349


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [6/40], Step [170/391], Loss: 1.5219


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [6/40], Step [180/391], Loss: 1.5141


 49%|████▉     | 191/391 [00:34<00:35,  5.71it/s]

Epoch [6/40], Step [190/391], Loss: 1.5644


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [6/40], Step [200/391], Loss: 1.5657


 54%|█████▍    | 211/391 [00:37<00:31,  5.72it/s]

Epoch [6/40], Step [210/391], Loss: 1.5282


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [6/40], Step [220/391], Loss: 1.4828


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [6/40], Step [230/391], Loss: 1.4890


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [6/40], Step [240/391], Loss: 1.4955


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [6/40], Step [250/391], Loss: 1.5114


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [6/40], Step [260/391], Loss: 1.5305


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [6/40], Step [270/391], Loss: 1.4659


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [6/40], Step [280/391], Loss: 1.4866


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [6/40], Step [290/391], Loss: 1.4758


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [6/40], Step [300/391], Loss: 1.4629


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [6/40], Step [310/391], Loss: 1.4982


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [6/40], Step [320/391], Loss: 1.5058


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [6/40], Step [330/391], Loss: 1.5065


 87%|████████▋ | 341/391 [01:00<00:08,  5.71it/s]

Epoch [6/40], Step [340/391], Loss: 1.4736


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [6/40], Step [350/391], Loss: 1.5381


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [6/40], Step [360/391], Loss: 1.5513


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [6/40], Step [370/391], Loss: 1.4908


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [6/40], Step [380/391], Loss: 1.5066


100%|██████████| 391/391 [01:08<00:00,  5.67it/s]

Epoch [6/40], Step [390/391], Loss: 1.5155





Test Accuracy of the student model on the test images: 48.10 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [7/40], Step [10/391], Loss: 1.4418


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [7/40], Step [20/391], Loss: 1.3813


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [7/40], Step [30/391], Loss: 1.4394


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [7/40], Step [40/391], Loss: 1.3756


 13%|█▎        | 51/391 [00:09<00:59,  5.72it/s]

Epoch [7/40], Step [50/391], Loss: 1.3968


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [7/40], Step [60/391], Loss: 1.4573


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [7/40], Step [70/391], Loss: 1.4595


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [7/40], Step [80/391], Loss: 1.4137


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [7/40], Step [90/391], Loss: 1.3962


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [7/40], Step [100/391], Loss: 1.4508


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [7/40], Step [110/391], Loss: 1.4061


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [7/40], Step [120/391], Loss: 1.4447


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [7/40], Step [130/391], Loss: 1.4416


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [7/40], Step [140/391], Loss: 1.4078


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [7/40], Step [150/391], Loss: 1.4368


 41%|████      | 161/391 [00:28<00:40,  5.73it/s]

Epoch [7/40], Step [160/391], Loss: 1.4735


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [7/40], Step [170/391], Loss: 1.4762


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [7/40], Step [180/391], Loss: 1.4856


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [7/40], Step [190/391], Loss: 1.4525


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [7/40], Step [200/391], Loss: 1.4933


 54%|█████▍    | 211/391 [00:37<00:31,  5.75it/s]

Epoch [7/40], Step [210/391], Loss: 1.4618


 57%|█████▋    | 221/391 [00:39<00:29,  5.75it/s]

Epoch [7/40], Step [220/391], Loss: 1.4494


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [7/40], Step [230/391], Loss: 1.4353


 62%|██████▏   | 241/391 [00:42<00:26,  5.75it/s]

Epoch [7/40], Step [240/391], Loss: 1.4608


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [7/40], Step [250/391], Loss: 1.4178


 66%|██████▋   | 260/391 [00:46<00:22,  5.74it/s]

Epoch [7/40], Step [260/391], Loss: 1.4208


 69%|██████▉   | 271/391 [00:48<00:21,  5.71it/s]

Epoch [7/40], Step [270/391], Loss: 1.4704


 72%|███████▏  | 281/391 [00:49<00:19,  5.72it/s]

Epoch [7/40], Step [280/391], Loss: 1.4313


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [7/40], Step [290/391], Loss: 1.4580


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [7/40], Step [300/391], Loss: 1.4488


 80%|███████▉  | 311/391 [00:55<00:13,  5.75it/s]

Epoch [7/40], Step [310/391], Loss: 1.4428


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [7/40], Step [320/391], Loss: 1.4458


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [7/40], Step [330/391], Loss: 1.4680


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [7/40], Step [340/391], Loss: 1.4845


 90%|████████▉ | 351/391 [01:02<00:06,  5.72it/s]

Epoch [7/40], Step [350/391], Loss: 1.4560


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [7/40], Step [360/391], Loss: 1.4281


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [7/40], Step [370/391], Loss: 1.4323


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [7/40], Step [380/391], Loss: 1.4379


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [7/40], Step [390/391], Loss: 1.4592





Test Accuracy of the student model on the test images: 44.77 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.52it/s]

Epoch [8/40], Step [10/391], Loss: 1.3535


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [8/40], Step [20/391], Loss: 1.3200


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [8/40], Step [30/391], Loss: 1.3148


 10%|█         | 41/391 [00:07<01:01,  5.73it/s]

Epoch [8/40], Step [40/391], Loss: 1.3508


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [8/40], Step [50/391], Loss: 1.3838


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [8/40], Step [60/391], Loss: 1.3225


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [8/40], Step [70/391], Loss: 1.3589


 21%|██        | 81/391 [00:14<00:54,  5.74it/s]

Epoch [8/40], Step [80/391], Loss: 1.3362


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [8/40], Step [90/391], Loss: 1.3605


 26%|██▌       | 101/391 [00:18<00:50,  5.72it/s]

Epoch [8/40], Step [100/391], Loss: 1.3360


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [8/40], Step [110/391], Loss: 1.3835


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [8/40], Step [120/391], Loss: 1.3902


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [8/40], Step [130/391], Loss: 1.3625


 36%|███▌      | 141/391 [00:25<00:43,  5.72it/s]

Epoch [8/40], Step [140/391], Loss: 1.3530


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [8/40], Step [150/391], Loss: 1.3812


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [8/40], Step [160/391], Loss: 1.3676


 44%|████▎     | 171/391 [00:30<00:38,  5.75it/s]

Epoch [8/40], Step [170/391], Loss: 1.3692


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [8/40], Step [180/391], Loss: 1.4303


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [8/40], Step [190/391], Loss: 1.3927


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [8/40], Step [200/391], Loss: 1.3648


 54%|█████▍    | 211/391 [00:37<00:31,  5.72it/s]

Epoch [8/40], Step [210/391], Loss: 1.3796


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [8/40], Step [220/391], Loss: 1.4021


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [8/40], Step [230/391], Loss: 1.4020


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [8/40], Step [240/391], Loss: 1.4084


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [8/40], Step [250/391], Loss: 1.4333


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [8/40], Step [260/391], Loss: 1.4068


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [8/40], Step [270/391], Loss: 1.3958


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [8/40], Step [280/391], Loss: 1.4042


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [8/40], Step [290/391], Loss: 1.4287


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [8/40], Step [300/391], Loss: 1.3724


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [8/40], Step [310/391], Loss: 1.3831


 82%|████████▏ | 321/391 [00:56<00:12,  5.72it/s]

Epoch [8/40], Step [320/391], Loss: 1.3831


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [8/40], Step [330/391], Loss: 1.3667


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [8/40], Step [340/391], Loss: 1.4155


 90%|████████▉ | 351/391 [01:01<00:06,  5.73it/s]

Epoch [8/40], Step [350/391], Loss: 1.3960


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [8/40], Step [360/391], Loss: 1.4182


 95%|█████████▍| 371/391 [01:05<00:03,  5.75it/s]

Epoch [8/40], Step [370/391], Loss: 1.3835


 97%|█████████▋| 381/391 [01:07<00:01,  5.75it/s]

Epoch [8/40], Step [380/391], Loss: 1.4291


100%|██████████| 391/391 [01:08<00:00,  5.67it/s]

Epoch [8/40], Step [390/391], Loss: 1.4302





Test Accuracy of the student model on the test images: 48.39 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [9/40], Step [10/391], Loss: 1.3869


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [9/40], Step [20/391], Loss: 1.3403


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [9/40], Step [30/391], Loss: 1.3221


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [9/40], Step [40/391], Loss: 1.3857


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [9/40], Step [50/391], Loss: 1.2976


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [9/40], Step [60/391], Loss: 1.2882


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [9/40], Step [70/391], Loss: 1.3108


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [9/40], Step [80/391], Loss: 1.2948


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [9/40], Step [90/391], Loss: 1.2959


 26%|██▌       | 101/391 [00:18<00:50,  5.72it/s]

Epoch [9/40], Step [100/391], Loss: 1.3008


 28%|██▊       | 111/391 [00:20<00:48,  5.72it/s]

Epoch [9/40], Step [110/391], Loss: 1.3612


 31%|███       | 121/391 [00:22<00:47,  5.63it/s]

Epoch [9/40], Step [120/391], Loss: 1.3361


 34%|███▎      | 131/391 [00:23<00:45,  5.72it/s]

Epoch [9/40], Step [130/391], Loss: 1.3065


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [9/40], Step [140/391], Loss: 1.3403


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [9/40], Step [150/391], Loss: 1.3343


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [9/40], Step [160/391], Loss: 1.3569


 44%|████▎     | 171/391 [00:30<00:38,  5.72it/s]

Epoch [9/40], Step [170/391], Loss: 1.3177


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [9/40], Step [180/391], Loss: 1.3690


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [9/40], Step [190/391], Loss: 1.3346


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [9/40], Step [200/391], Loss: 1.3773


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [9/40], Step [210/391], Loss: 1.3636


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [9/40], Step [220/391], Loss: 1.3570


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [9/40], Step [230/391], Loss: 1.3770


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [9/40], Step [240/391], Loss: 1.3783


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [9/40], Step [250/391], Loss: 1.3754


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [9/40], Step [260/391], Loss: 1.3717


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [9/40], Step [270/391], Loss: 1.3570


 72%|███████▏  | 281/391 [00:49<00:19,  5.75it/s]

Epoch [9/40], Step [280/391], Loss: 1.3461


 74%|███████▍  | 291/391 [00:51<00:17,  5.76it/s]

Epoch [9/40], Step [290/391], Loss: 1.3656


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [9/40], Step [300/391], Loss: 1.3490


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [9/40], Step [310/391], Loss: 1.3336


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [9/40], Step [320/391], Loss: 1.3692


 85%|████████▍ | 331/391 [00:58<00:10,  5.75it/s]

Epoch [9/40], Step [330/391], Loss: 1.3110


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [9/40], Step [340/391], Loss: 1.3612


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [9/40], Step [350/391], Loss: 1.3707


 92%|█████████▏| 361/391 [01:03<00:05,  5.71it/s]

Epoch [9/40], Step [360/391], Loss: 1.3837


 95%|█████████▍| 371/391 [01:05<00:03,  5.72it/s]

Epoch [9/40], Step [370/391], Loss: 1.3694


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [9/40], Step [380/391], Loss: 1.3536


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [9/40], Step [390/391], Loss: 1.3932





Test Accuracy of the student model on the test images: 51.74 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.50it/s]

Epoch [10/40], Step [10/391], Loss: 1.2888


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [10/40], Step [20/391], Loss: 1.2606


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [10/40], Step [30/391], Loss: 1.2750


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [10/40], Step [40/391], Loss: 1.2840


 13%|█▎        | 51/391 [00:09<01:00,  5.66it/s]

Epoch [10/40], Step [50/391], Loss: 1.2729


 16%|█▌        | 61/391 [00:11<00:57,  5.75it/s]

Epoch [10/40], Step [60/391], Loss: 1.2861


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [10/40], Step [70/391], Loss: 1.3023


 21%|██        | 81/391 [00:15<00:53,  5.74it/s]

Epoch [10/40], Step [80/391], Loss: 1.2758


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [10/40], Step [90/391], Loss: 1.2902


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [10/40], Step [100/391], Loss: 1.2595


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [10/40], Step [110/391], Loss: 1.3064


 31%|███       | 121/391 [00:21<00:47,  5.74it/s]

Epoch [10/40], Step [120/391], Loss: 1.3067


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [10/40], Step [130/391], Loss: 1.3029


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [10/40], Step [140/391], Loss: 1.3145


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [10/40], Step [150/391], Loss: 1.3264


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [10/40], Step [160/391], Loss: 1.2785


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [10/40], Step [170/391], Loss: 1.3267


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [10/40], Step [180/391], Loss: 1.2903


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [10/40], Step [190/391], Loss: 1.2811


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [10/40], Step [200/391], Loss: 1.3211


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [10/40], Step [210/391], Loss: 1.2990


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [10/40], Step [220/391], Loss: 1.3062


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [10/40], Step [230/391], Loss: 1.3400


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [10/40], Step [240/391], Loss: 1.3618


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [10/40], Step [250/391], Loss: 1.3466


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [10/40], Step [260/391], Loss: 1.3038


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [10/40], Step [270/391], Loss: 1.3254


 72%|███████▏  | 281/391 [00:49<00:19,  5.72it/s]

Epoch [10/40], Step [280/391], Loss: 1.3042


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [10/40], Step [290/391], Loss: 1.3345


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [10/40], Step [300/391], Loss: 1.3345


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [10/40], Step [310/391], Loss: 1.3200


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [10/40], Step [320/391], Loss: 1.3223


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [10/40], Step [330/391], Loss: 1.3032


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [10/40], Step [340/391], Loss: 1.3506


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [10/40], Step [350/391], Loss: 1.3078


 92%|█████████▏| 361/391 [01:03<00:05,  5.72it/s]

Epoch [10/40], Step [360/391], Loss: 1.3644


 95%|█████████▍| 371/391 [01:05<00:03,  5.72it/s]

Epoch [10/40], Step [370/391], Loss: 1.2861


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [10/40], Step [380/391], Loss: 1.3563


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [10/40], Step [390/391], Loss: 1.3620





Test Accuracy of the student model on the test images: 51.99 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [11/40], Step [10/391], Loss: 1.2539


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [11/40], Step [20/391], Loss: 1.2146


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [11/40], Step [30/391], Loss: 1.1990


 10%|█         | 41/391 [00:07<01:01,  5.73it/s]

Epoch [11/40], Step [40/391], Loss: 1.2374


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [11/40], Step [50/391], Loss: 1.2321


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [11/40], Step [60/391], Loss: 1.2559


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [11/40], Step [70/391], Loss: 1.2231


 21%|██        | 81/391 [00:14<00:54,  5.69it/s]

Epoch [11/40], Step [80/391], Loss: 1.2285


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [11/40], Step [90/391], Loss: 1.2572


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [11/40], Step [100/391], Loss: 1.2543


 28%|██▊       | 111/391 [00:20<00:48,  5.75it/s]

Epoch [11/40], Step [110/391], Loss: 1.2873


 31%|███       | 121/391 [00:21<00:47,  5.74it/s]

Epoch [11/40], Step [120/391], Loss: 1.2491


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [11/40], Step [130/391], Loss: 1.2818


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [11/40], Step [140/391], Loss: 1.3116


 39%|███▊      | 151/391 [00:27<00:41,  5.72it/s]

Epoch [11/40], Step [150/391], Loss: 1.3220


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [11/40], Step [160/391], Loss: 1.3235


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [11/40], Step [170/391], Loss: 1.3013


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [11/40], Step [180/391], Loss: 1.2450


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [11/40], Step [190/391], Loss: 1.3011


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [11/40], Step [200/391], Loss: 1.2745


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [11/40], Step [210/391], Loss: 1.2950


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [11/40], Step [220/391], Loss: 1.2324


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [11/40], Step [230/391], Loss: 1.2508


 62%|██████▏   | 241/391 [00:42<00:26,  5.75it/s]

Epoch [11/40], Step [240/391], Loss: 1.3232


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [11/40], Step [250/391], Loss: 1.3254


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [11/40], Step [260/391], Loss: 1.3331


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [11/40], Step [270/391], Loss: 1.2863


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [11/40], Step [280/391], Loss: 1.2659


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [11/40], Step [290/391], Loss: 1.2829


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [11/40], Step [300/391], Loss: 1.2825


 80%|███████▉  | 311/391 [00:55<00:13,  5.72it/s]

Epoch [11/40], Step [310/391], Loss: 1.3124


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [11/40], Step [320/391], Loss: 1.2944


 85%|████████▍ | 331/391 [00:58<00:10,  5.68it/s]

Epoch [11/40], Step [330/391], Loss: 1.2907


 87%|████████▋ | 341/391 [01:00<00:08,  5.75it/s]

Epoch [11/40], Step [340/391], Loss: 1.2900


 90%|████████▉ | 351/391 [01:02<00:07,  5.71it/s]

Epoch [11/40], Step [350/391], Loss: 1.2943


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [11/40], Step [360/391], Loss: 1.2872


 95%|█████████▍| 371/391 [01:05<00:03,  5.75it/s]

Epoch [11/40], Step [370/391], Loss: 1.3322


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [11/40], Step [380/391], Loss: 1.2988


100%|██████████| 391/391 [01:09<00:00,  5.67it/s]

Epoch [11/40], Step [390/391], Loss: 1.3054





Test Accuracy of the student model on the test images: 52.52 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [12/40], Step [10/391], Loss: 1.2731


  5%|▌         | 21/391 [00:04<01:04,  5.71it/s]

Epoch [12/40], Step [20/391], Loss: 1.2420


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [12/40], Step [30/391], Loss: 1.2123


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [12/40], Step [40/391], Loss: 1.2026


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [12/40], Step [50/391], Loss: 1.1965


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [12/40], Step [60/391], Loss: 1.1810


 18%|█▊        | 71/391 [00:13<00:56,  5.71it/s]

Epoch [12/40], Step [70/391], Loss: 1.1808


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [12/40], Step [80/391], Loss: 1.1700


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [12/40], Step [90/391], Loss: 1.1927


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [12/40], Step [100/391], Loss: 1.1847


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [12/40], Step [110/391], Loss: 1.2266


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [12/40], Step [120/391], Loss: 1.2202


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [12/40], Step [130/391], Loss: 1.2139


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [12/40], Step [140/391], Loss: 1.2711


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [12/40], Step [150/391], Loss: 1.2325


 41%|████      | 161/391 [00:29<00:40,  5.73it/s]

Epoch [12/40], Step [160/391], Loss: 1.2483


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [12/40], Step [170/391], Loss: 1.2175


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [12/40], Step [180/391], Loss: 1.2195


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [12/40], Step [190/391], Loss: 1.2152


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [12/40], Step [200/391], Loss: 1.2328


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [12/40], Step [210/391], Loss: 1.2170


 57%|█████▋    | 221/391 [00:39<00:29,  5.75it/s]

Epoch [12/40], Step [220/391], Loss: 1.2286


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [12/40], Step [230/391], Loss: 1.2217


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [12/40], Step [240/391], Loss: 1.2456


 64%|██████▍   | 251/391 [00:44<00:24,  5.72it/s]

Epoch [12/40], Step [250/391], Loss: 1.2143


 67%|██████▋   | 261/391 [00:46<00:22,  5.72it/s]

Epoch [12/40], Step [260/391], Loss: 1.2650


 69%|██████▉   | 271/391 [00:48<00:20,  5.72it/s]

Epoch [12/40], Step [270/391], Loss: 1.3105


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [12/40], Step [280/391], Loss: 1.3118


 74%|███████▍  | 291/391 [00:51<00:17,  5.71it/s]

Epoch [12/40], Step [290/391], Loss: 1.3135


 77%|███████▋  | 301/391 [00:53<00:15,  5.72it/s]

Epoch [12/40], Step [300/391], Loss: 1.2728


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [12/40], Step [310/391], Loss: 1.2635


 82%|████████▏ | 321/391 [00:56<00:12,  5.72it/s]

Epoch [12/40], Step [320/391], Loss: 1.2595


 85%|████████▍ | 331/391 [00:58<00:10,  5.70it/s]

Epoch [12/40], Step [330/391], Loss: 1.2593


 87%|████████▋ | 341/391 [01:00<00:08,  5.72it/s]

Epoch [12/40], Step [340/391], Loss: 1.3087


 90%|████████▉ | 351/391 [01:02<00:07,  5.71it/s]

Epoch [12/40], Step [350/391], Loss: 1.2639


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [12/40], Step [360/391], Loss: 1.2935


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [12/40], Step [370/391], Loss: 1.2977


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [12/40], Step [380/391], Loss: 1.2488


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [12/40], Step [390/391], Loss: 1.2793





Test Accuracy of the student model on the test images: 50.29 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [13/40], Step [10/391], Loss: 1.1797


  5%|▌         | 21/391 [00:04<01:04,  5.71it/s]

Epoch [13/40], Step [20/391], Loss: 1.1921


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [13/40], Step [30/391], Loss: 1.1873


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [13/40], Step [40/391], Loss: 1.1892


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [13/40], Step [50/391], Loss: 1.1728


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [13/40], Step [60/391], Loss: 1.1794


 18%|█▊        | 71/391 [00:13<00:55,  5.72it/s]

Epoch [13/40], Step [70/391], Loss: 1.1978


 21%|██        | 81/391 [00:15<00:54,  5.72it/s]

Epoch [13/40], Step [80/391], Loss: 1.1603


 23%|██▎       | 91/391 [00:16<00:52,  5.72it/s]

Epoch [13/40], Step [90/391], Loss: 1.2023


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [13/40], Step [100/391], Loss: 1.1615


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [13/40], Step [110/391], Loss: 1.1564


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [13/40], Step [120/391], Loss: 1.1733


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [13/40], Step [130/391], Loss: 1.1690


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [13/40], Step [140/391], Loss: 1.2015


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [13/40], Step [150/391], Loss: 1.1610


 41%|████      | 161/391 [00:29<00:40,  5.73it/s]

Epoch [13/40], Step [160/391], Loss: 1.1701


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [13/40], Step [170/391], Loss: 1.1970


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [13/40], Step [180/391], Loss: 1.1951


 49%|████▉     | 191/391 [00:34<00:34,  5.72it/s]

Epoch [13/40], Step [190/391], Loss: 1.1963


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [13/40], Step [200/391], Loss: 1.2200


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [13/40], Step [210/391], Loss: 1.2286


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [13/40], Step [220/391], Loss: 1.2278


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [13/40], Step [230/391], Loss: 1.2151


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [13/40], Step [240/391], Loss: 1.2185


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [13/40], Step [250/391], Loss: 1.2303


 67%|██████▋   | 261/391 [00:46<00:22,  5.72it/s]

Epoch [13/40], Step [260/391], Loss: 1.2259


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [13/40], Step [270/391], Loss: 1.2160


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [13/40], Step [280/391], Loss: 1.2337


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [13/40], Step [290/391], Loss: 1.2449


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [13/40], Step [300/391], Loss: 1.2291


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [13/40], Step [310/391], Loss: 1.2502


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [13/40], Step [320/391], Loss: 1.2144


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [13/40], Step [330/391], Loss: 1.2683


 87%|████████▋ | 341/391 [01:00<00:08,  5.72it/s]

Epoch [13/40], Step [340/391], Loss: 1.2649


 90%|████████▉ | 351/391 [01:02<00:06,  5.72it/s]

Epoch [13/40], Step [350/391], Loss: 1.2686


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [13/40], Step [360/391], Loss: 1.2224


 95%|█████████▍| 371/391 [01:05<00:03,  5.72it/s]

Epoch [13/40], Step [370/391], Loss: 1.2390


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [13/40], Step [380/391], Loss: 1.2474


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [13/40], Step [390/391], Loss: 1.2450





Test Accuracy of the student model on the test images: 50.95 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.46it/s]

Epoch [14/40], Step [10/391], Loss: 1.1643


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [14/40], Step [20/391], Loss: 1.1794


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [14/40], Step [30/391], Loss: 1.1648


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [14/40], Step [40/391], Loss: 1.1601


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [14/40], Step [50/391], Loss: 1.1900


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [14/40], Step [60/391], Loss: 1.1616


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [14/40], Step [70/391], Loss: 1.1584


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [14/40], Step [80/391], Loss: 1.1567


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [14/40], Step [90/391], Loss: 1.1488


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [14/40], Step [100/391], Loss: 1.1738


 28%|██▊       | 111/391 [00:20<00:50,  5.55it/s]

Epoch [14/40], Step [110/391], Loss: 1.1383


 31%|███       | 121/391 [00:22<00:47,  5.72it/s]

Epoch [14/40], Step [120/391], Loss: 1.1609


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [14/40], Step [130/391], Loss: 1.1993


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [14/40], Step [140/391], Loss: 1.1820


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [14/40], Step [150/391], Loss: 1.1556


 41%|████      | 161/391 [00:29<00:40,  5.71it/s]

Epoch [14/40], Step [160/391], Loss: 1.1842


 44%|████▎     | 171/391 [00:30<00:38,  5.72it/s]

Epoch [14/40], Step [170/391], Loss: 1.1938


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [14/40], Step [180/391], Loss: 1.1748


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [14/40], Step [190/391], Loss: 1.2178


 51%|█████▏    | 201/391 [00:36<00:33,  5.74it/s]

Epoch [14/40], Step [200/391], Loss: 1.1870


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [14/40], Step [210/391], Loss: 1.1918


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [14/40], Step [220/391], Loss: 1.2027


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [14/40], Step [230/391], Loss: 1.1915


 62%|██████▏   | 241/391 [00:43<00:26,  5.73it/s]

Epoch [14/40], Step [240/391], Loss: 1.1995


 64%|██████▍   | 251/391 [00:44<00:24,  5.72it/s]

Epoch [14/40], Step [250/391], Loss: 1.2087


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [14/40], Step [260/391], Loss: 1.2005


 69%|██████▉   | 271/391 [00:48<00:20,  5.72it/s]

Epoch [14/40], Step [270/391], Loss: 1.2012


 72%|███████▏  | 281/391 [00:50<00:19,  5.73it/s]

Epoch [14/40], Step [280/391], Loss: 1.1802


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [14/40], Step [290/391], Loss: 1.1461


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [14/40], Step [300/391], Loss: 1.1956


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [14/40], Step [310/391], Loss: 1.2260


 82%|████████▏ | 321/391 [00:57<00:12,  5.73it/s]

Epoch [14/40], Step [320/391], Loss: 1.2190


 85%|████████▍ | 331/391 [00:58<00:10,  5.75it/s]

Epoch [14/40], Step [330/391], Loss: 1.2497


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [14/40], Step [340/391], Loss: 1.2284


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [14/40], Step [350/391], Loss: 1.2326


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [14/40], Step [360/391], Loss: 1.1972


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [14/40], Step [370/391], Loss: 1.1872


 97%|█████████▋| 381/391 [01:07<00:01,  5.75it/s]

Epoch [14/40], Step [380/391], Loss: 1.1935


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [14/40], Step [390/391], Loss: 1.1859





Test Accuracy of the student model on the test images: 53.40 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [15/40], Step [10/391], Loss: 1.1694


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [15/40], Step [20/391], Loss: 1.1365


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [15/40], Step [30/391], Loss: 1.1190


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [15/40], Step [40/391], Loss: 1.1240


 13%|█▎        | 51/391 [00:09<00:59,  5.75it/s]

Epoch [15/40], Step [50/391], Loss: 1.0775


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [15/40], Step [60/391], Loss: 1.1139


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [15/40], Step [70/391], Loss: 1.1110


 21%|██        | 81/391 [00:15<00:54,  5.73it/s]

Epoch [15/40], Step [80/391], Loss: 1.0842


 23%|██▎       | 91/391 [00:16<00:52,  5.71it/s]

Epoch [15/40], Step [90/391], Loss: 1.1366


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [15/40], Step [100/391], Loss: 1.1151


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [15/40], Step [110/391], Loss: 1.1142


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [15/40], Step [120/391], Loss: 1.1206


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [15/40], Step [130/391], Loss: 1.1389


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [15/40], Step [140/391], Loss: 1.1356


 39%|███▊      | 151/391 [00:27<00:42,  5.69it/s]

Epoch [15/40], Step [150/391], Loss: 1.1574


 41%|████      | 161/391 [00:28<00:40,  5.73it/s]

Epoch [15/40], Step [160/391], Loss: 1.1416


 44%|████▎     | 171/391 [00:30<00:38,  5.72it/s]

Epoch [15/40], Step [170/391], Loss: 1.1603


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [15/40], Step [180/391], Loss: 1.1347


 49%|████▉     | 191/391 [00:34<00:34,  5.75it/s]

Epoch [15/40], Step [190/391], Loss: 1.1840


 51%|█████▏    | 201/391 [00:35<00:33,  5.75it/s]

Epoch [15/40], Step [200/391], Loss: 1.1789


 54%|█████▍    | 211/391 [00:37<00:31,  5.75it/s]

Epoch [15/40], Step [210/391], Loss: 1.1407


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [15/40], Step [220/391], Loss: 1.1740


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [15/40], Step [230/391], Loss: 1.1667


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [15/40], Step [240/391], Loss: 1.1851


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [15/40], Step [250/391], Loss: 1.1800


 67%|██████▋   | 261/391 [00:46<00:22,  5.75it/s]

Epoch [15/40], Step [260/391], Loss: 1.1530


 69%|██████▉   | 271/391 [00:48<00:20,  5.72it/s]

Epoch [15/40], Step [270/391], Loss: 1.1552


 72%|███████▏  | 281/391 [00:49<00:19,  5.72it/s]

Epoch [15/40], Step [280/391], Loss: 1.1816


 74%|███████▍  | 291/391 [00:51<00:17,  5.72it/s]

Epoch [15/40], Step [290/391], Loss: 1.1535


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [15/40], Step [300/391], Loss: 1.1495


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [15/40], Step [310/391], Loss: 1.1494


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [15/40], Step [320/391], Loss: 1.1728


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [15/40], Step [330/391], Loss: 1.2083


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [15/40], Step [340/391], Loss: 1.1785


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [15/40], Step [350/391], Loss: 1.2099


 92%|█████████▏| 361/391 [01:03<00:05,  5.72it/s]

Epoch [15/40], Step [360/391], Loss: 1.1736


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [15/40], Step [370/391], Loss: 1.1543


 97%|█████████▋| 381/391 [01:07<00:01,  5.69it/s]

Epoch [15/40], Step [380/391], Loss: 1.1856


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [15/40], Step [390/391], Loss: 1.1790





Test Accuracy of the student model on the test images: 54.33 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.47it/s]

Epoch [16/40], Step [10/391], Loss: 1.0998


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [16/40], Step [20/391], Loss: 1.0780


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [16/40], Step [30/391], Loss: 1.0745


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [16/40], Step [40/391], Loss: 1.0619


 13%|█▎        | 51/391 [00:09<00:59,  5.75it/s]

Epoch [16/40], Step [50/391], Loss: 1.0757


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [16/40], Step [60/391], Loss: 1.0674


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [16/40], Step [70/391], Loss: 1.0701


 21%|██        | 81/391 [00:15<00:53,  5.74it/s]

Epoch [16/40], Step [80/391], Loss: 1.1030


 23%|██▎       | 91/391 [00:16<00:52,  5.72it/s]

Epoch [16/40], Step [90/391], Loss: 1.0652


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [16/40], Step [100/391], Loss: 1.0817


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [16/40], Step [110/391], Loss: 1.1202


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [16/40], Step [120/391], Loss: 1.1189


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [16/40], Step [130/391], Loss: 1.1371


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [16/40], Step [140/391], Loss: 1.0776


 39%|███▊      | 151/391 [00:27<00:41,  5.72it/s]

Epoch [16/40], Step [150/391], Loss: 1.0943


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [16/40], Step [160/391], Loss: 1.0995


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [16/40], Step [170/391], Loss: 1.0982


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [16/40], Step [180/391], Loss: 1.1219


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [16/40], Step [190/391], Loss: 1.1118


 51%|█████▏    | 201/391 [00:36<00:33,  5.73it/s]

Epoch [16/40], Step [200/391], Loss: 1.1177


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [16/40], Step [210/391], Loss: 1.0851


 57%|█████▋    | 221/391 [00:39<00:29,  5.71it/s]

Epoch [16/40], Step [220/391], Loss: 1.1297


 59%|█████▉    | 231/391 [00:41<00:27,  5.75it/s]

Epoch [16/40], Step [230/391], Loss: 1.1140


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [16/40], Step [240/391], Loss: 1.1263


 64%|██████▍   | 251/391 [00:44<00:24,  5.72it/s]

Epoch [16/40], Step [250/391], Loss: 1.1217


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [16/40], Step [260/391], Loss: 1.1401


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [16/40], Step [270/391], Loss: 1.1446


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [16/40], Step [280/391], Loss: 1.1416


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [16/40], Step [290/391], Loss: 1.1130


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [16/40], Step [300/391], Loss: 1.1348


 80%|███████▉  | 311/391 [00:55<00:13,  5.75it/s]

Epoch [16/40], Step [310/391], Loss: 1.1137


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [16/40], Step [320/391], Loss: 1.1460


 85%|████████▍ | 331/391 [00:58<00:10,  5.72it/s]

Epoch [16/40], Step [330/391], Loss: 1.1678


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [16/40], Step [340/391], Loss: 1.1596


 90%|████████▉ | 351/391 [01:02<00:06,  5.75it/s]

Epoch [16/40], Step [350/391], Loss: 1.1639


 92%|█████████▏| 361/391 [01:03<00:05,  5.75it/s]

Epoch [16/40], Step [360/391], Loss: 1.1885


 95%|█████████▍| 371/391 [01:05<00:03,  5.75it/s]

Epoch [16/40], Step [370/391], Loss: 1.1987


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [16/40], Step [380/391], Loss: 1.1793


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [16/40], Step [390/391], Loss: 1.1508





Test Accuracy of the student model on the test images: 52.57 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [17/40], Step [10/391], Loss: 1.0783


  5%|▌         | 21/391 [00:04<01:04,  5.75it/s]

Epoch [17/40], Step [20/391], Loss: 1.0739


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [17/40], Step [30/391], Loss: 1.0420


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [17/40], Step [40/391], Loss: 1.0435


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [17/40], Step [50/391], Loss: 1.0579


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [17/40], Step [60/391], Loss: 1.0579


 18%|█▊        | 71/391 [00:13<00:56,  5.71it/s]

Epoch [17/40], Step [70/391], Loss: 1.0332


 21%|██        | 81/391 [00:15<00:54,  5.73it/s]

Epoch [17/40], Step [80/391], Loss: 1.0900


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [17/40], Step [90/391], Loss: 1.0678


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [17/40], Step [100/391], Loss: 1.0832


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [17/40], Step [110/391], Loss: 1.0817


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [17/40], Step [120/391], Loss: 1.0540


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [17/40], Step [130/391], Loss: 1.0659


 36%|███▌      | 141/391 [00:25<00:43,  5.69it/s]

Epoch [17/40], Step [140/391], Loss: 1.0929


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [17/40], Step [150/391], Loss: 1.0798


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [17/40], Step [160/391], Loss: 1.0818


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [17/40], Step [170/391], Loss: 1.1063


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [17/40], Step [180/391], Loss: 1.0948


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [17/40], Step [190/391], Loss: 1.1079


 51%|█████▏    | 201/391 [00:35<00:33,  5.72it/s]

Epoch [17/40], Step [200/391], Loss: 1.0798


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [17/40], Step [210/391], Loss: 1.1208


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [17/40], Step [220/391], Loss: 1.0905


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [17/40], Step [230/391], Loss: 1.1206


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [17/40], Step [240/391], Loss: 1.0976


 64%|██████▍   | 251/391 [00:44<00:24,  5.72it/s]

Epoch [17/40], Step [250/391], Loss: 1.1027


 67%|██████▋   | 261/391 [00:46<00:22,  5.72it/s]

Epoch [17/40], Step [260/391], Loss: 1.0801


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [17/40], Step [270/391], Loss: 1.0977


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [17/40], Step [280/391], Loss: 1.0850


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [17/40], Step [290/391], Loss: 1.0890


 77%|███████▋  | 301/391 [00:53<00:15,  5.75it/s]

Epoch [17/40], Step [300/391], Loss: 1.0796


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [17/40], Step [310/391], Loss: 1.1135


 82%|████████▏ | 321/391 [00:56<00:12,  5.75it/s]

Epoch [17/40], Step [320/391], Loss: 1.1109


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [17/40], Step [330/391], Loss: 1.1328


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [17/40], Step [340/391], Loss: 1.1142


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [17/40], Step [350/391], Loss: 1.0922


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [17/40], Step [360/391], Loss: 1.1387


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [17/40], Step [370/391], Loss: 1.1065


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [17/40], Step [380/391], Loss: 1.0837


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [17/40], Step [390/391], Loss: 1.1111





Test Accuracy of the student model on the test images: 54.13 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.51it/s]

Epoch [18/40], Step [10/391], Loss: 1.0546


  5%|▌         | 21/391 [00:04<01:04,  5.74it/s]

Epoch [18/40], Step [20/391], Loss: 1.0530


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [18/40], Step [30/391], Loss: 1.0610


 10%|█         | 41/391 [00:07<01:01,  5.73it/s]

Epoch [18/40], Step [40/391], Loss: 1.0288


 13%|█▎        | 51/391 [00:09<00:59,  5.72it/s]

Epoch [18/40], Step [50/391], Loss: 1.0208


 16%|█▌        | 61/391 [00:11<00:57,  5.72it/s]

Epoch [18/40], Step [60/391], Loss: 1.0281


 18%|█▊        | 71/391 [00:13<00:55,  5.72it/s]

Epoch [18/40], Step [70/391], Loss: 1.0238


 21%|██        | 81/391 [00:14<00:54,  5.72it/s]

Epoch [18/40], Step [80/391], Loss: 1.0125


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [18/40], Step [90/391], Loss: 1.0255


 26%|██▌       | 101/391 [00:18<00:50,  5.75it/s]

Epoch [18/40], Step [100/391], Loss: 1.0342


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [18/40], Step [110/391], Loss: 1.0546


 31%|███       | 121/391 [00:21<00:47,  5.74it/s]

Epoch [18/40], Step [120/391], Loss: 1.0296


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [18/40], Step [130/391], Loss: 1.0126


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [18/40], Step [140/391], Loss: 1.0297


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [18/40], Step [150/391], Loss: 1.0167


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [18/40], Step [160/391], Loss: 1.0591


 44%|████▎     | 171/391 [00:30<00:38,  5.75it/s]

Epoch [18/40], Step [170/391], Loss: 1.0874


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [18/40], Step [180/391], Loss: 1.0526


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [18/40], Step [190/391], Loss: 1.0684


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [18/40], Step [200/391], Loss: 1.0559


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [18/40], Step [210/391], Loss: 1.0677


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [18/40], Step [220/391], Loss: 1.0889


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [18/40], Step [230/391], Loss: 1.0946


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [18/40], Step [240/391], Loss: 1.0891


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [18/40], Step [250/391], Loss: 1.0497


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [18/40], Step [260/391], Loss: 1.0748


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [18/40], Step [270/391], Loss: 1.0534


 72%|███████▏  | 281/391 [00:49<00:19,  5.75it/s]

Epoch [18/40], Step [280/391], Loss: 1.0812


 74%|███████▍  | 291/391 [00:51<00:17,  5.72it/s]

Epoch [18/40], Step [290/391], Loss: 1.0708


 77%|███████▋  | 301/391 [00:53<00:15,  5.72it/s]

Epoch [18/40], Step [300/391], Loss: 1.0982


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [18/40], Step [310/391], Loss: 1.0890


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [18/40], Step [320/391], Loss: 1.0802


 85%|████████▍ | 331/391 [00:58<00:10,  5.72it/s]

Epoch [18/40], Step [330/391], Loss: 1.0742


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [18/40], Step [340/391], Loss: 1.0322


 90%|████████▉ | 351/391 [01:01<00:06,  5.75it/s]

Epoch [18/40], Step [350/391], Loss: 1.0545


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [18/40], Step [360/391], Loss: 1.0835


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [18/40], Step [370/391], Loss: 1.0864


 97%|█████████▋| 381/391 [01:07<00:01,  5.75it/s]

Epoch [18/40], Step [380/391], Loss: 1.0886


100%|██████████| 391/391 [01:08<00:00,  5.67it/s]

Epoch [18/40], Step [390/391], Loss: 1.0595





Test Accuracy of the student model on the test images: 53.73 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [19/40], Step [10/391], Loss: 1.0252


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [19/40], Step [20/391], Loss: 1.0285


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [19/40], Step [30/391], Loss: 1.0084


 10%|█         | 41/391 [00:08<01:00,  5.75it/s]

Epoch [19/40], Step [40/391], Loss: 0.9998


 13%|█▎        | 51/391 [00:09<00:59,  5.69it/s]

Epoch [19/40], Step [50/391], Loss: 0.9885


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [19/40], Step [60/391], Loss: 0.9894


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [19/40], Step [70/391], Loss: 1.0136


 21%|██        | 81/391 [00:15<00:54,  5.73it/s]

Epoch [19/40], Step [80/391], Loss: 1.0071


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [19/40], Step [90/391], Loss: 0.9793


 26%|██▌       | 101/391 [00:18<00:50,  5.72it/s]

Epoch [19/40], Step [100/391], Loss: 0.9825


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [19/40], Step [110/391], Loss: 0.9815


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [19/40], Step [120/391], Loss: 1.0198


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [19/40], Step [130/391], Loss: 0.9982


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [19/40], Step [140/391], Loss: 1.0045


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [19/40], Step [150/391], Loss: 1.0147


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [19/40], Step [160/391], Loss: 1.0047


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [19/40], Step [170/391], Loss: 1.0216


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [19/40], Step [180/391], Loss: 1.0121


 49%|████▉     | 191/391 [00:34<00:34,  5.75it/s]

Epoch [19/40], Step [190/391], Loss: 1.0309


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [19/40], Step [200/391], Loss: 1.0185


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [19/40], Step [210/391], Loss: 1.0280


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [19/40], Step [220/391], Loss: 1.0379


 59%|█████▉    | 231/391 [00:41<00:27,  5.75it/s]

Epoch [19/40], Step [230/391], Loss: 1.0147


 62%|██████▏   | 241/391 [00:42<00:26,  5.71it/s]

Epoch [19/40], Step [240/391], Loss: 1.0328


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [19/40], Step [250/391], Loss: 1.0222


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [19/40], Step [260/391], Loss: 1.0173


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [19/40], Step [270/391], Loss: 1.0494


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [19/40], Step [280/391], Loss: 1.0197


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [19/40], Step [290/391], Loss: 1.0286


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [19/40], Step [300/391], Loss: 1.0348


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [19/40], Step [310/391], Loss: 1.0302


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [19/40], Step [320/391], Loss: 1.0244


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [19/40], Step [330/391], Loss: 1.0530


 87%|████████▋ | 341/391 [01:00<00:08,  5.72it/s]

Epoch [19/40], Step [340/391], Loss: 1.0314


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [19/40], Step [350/391], Loss: 1.0280


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [19/40], Step [360/391], Loss: 1.0487


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [19/40], Step [370/391], Loss: 1.0456


 97%|█████████▋| 381/391 [01:07<00:01,  5.72it/s]

Epoch [19/40], Step [380/391], Loss: 1.0926


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [19/40], Step [390/391], Loss: 1.0759





Test Accuracy of the student model on the test images: 55.59 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.45it/s]

Epoch [20/40], Step [10/391], Loss: 1.0273


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [20/40], Step [20/391], Loss: 0.9810


  8%|▊         | 31/391 [00:06<01:04,  5.62it/s]

Epoch [20/40], Step [30/391], Loss: 0.9707


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [20/40], Step [40/391], Loss: 0.9762


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [20/40], Step [50/391], Loss: 0.9375


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [20/40], Step [60/391], Loss: 0.9699


 18%|█▊        | 71/391 [00:13<00:55,  5.72it/s]

Epoch [20/40], Step [70/391], Loss: 0.9498


 21%|██        | 81/391 [00:15<00:54,  5.72it/s]

Epoch [20/40], Step [80/391], Loss: 0.9664


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [20/40], Step [90/391], Loss: 0.9496


 26%|██▌       | 101/391 [00:18<00:50,  5.71it/s]

Epoch [20/40], Step [100/391], Loss: 0.9494


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [20/40], Step [110/391], Loss: 0.9480


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [20/40], Step [120/391], Loss: 0.9744


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [20/40], Step [130/391], Loss: 0.9630


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [20/40], Step [140/391], Loss: 0.9698


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [20/40], Step [150/391], Loss: 0.9515


 41%|████      | 161/391 [00:29<00:40,  5.73it/s]

Epoch [20/40], Step [160/391], Loss: 0.9656


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [20/40], Step [170/391], Loss: 0.9685


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [20/40], Step [180/391], Loss: 0.9994


 49%|████▉     | 191/391 [00:34<00:34,  5.75it/s]

Epoch [20/40], Step [190/391], Loss: 1.0111


 51%|█████▏    | 201/391 [00:36<00:33,  5.75it/s]

Epoch [20/40], Step [200/391], Loss: 1.0202


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [20/40], Step [210/391], Loss: 0.9918


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [20/40], Step [220/391], Loss: 0.9851


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [20/40], Step [230/391], Loss: 1.0110


 62%|██████▏   | 241/391 [00:43<00:26,  5.72it/s]

Epoch [20/40], Step [240/391], Loss: 1.0116


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [20/40], Step [250/391], Loss: 1.0185


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [20/40], Step [260/391], Loss: 1.0270


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [20/40], Step [270/391], Loss: 1.0030


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [20/40], Step [280/391], Loss: 1.0148


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [20/40], Step [290/391], Loss: 1.0228


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [20/40], Step [300/391], Loss: 1.0311


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [20/40], Step [310/391], Loss: 1.0148


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [20/40], Step [320/391], Loss: 0.9761


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [20/40], Step [330/391], Loss: 0.9881


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [20/40], Step [340/391], Loss: 0.9838


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [20/40], Step [350/391], Loss: 1.0296


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [20/40], Step [360/391], Loss: 1.0115


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [20/40], Step [370/391], Loss: 1.0063


 97%|█████████▋| 381/391 [01:07<00:01,  5.75it/s]

Epoch [20/40], Step [380/391], Loss: 1.0112


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [20/40], Step [390/391], Loss: 1.0403





Test Accuracy of the student model on the test images: 53.28 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.46it/s]

Epoch [21/40], Step [10/391], Loss: 0.9679


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [21/40], Step [20/391], Loss: 0.9141


  8%|▊         | 31/391 [00:06<01:03,  5.70it/s]

Epoch [21/40], Step [30/391], Loss: 0.9359


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [21/40], Step [40/391], Loss: 0.9327


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [21/40], Step [50/391], Loss: 0.9206


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [21/40], Step [60/391], Loss: 0.9256


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [21/40], Step [70/391], Loss: 0.9463


 21%|██        | 81/391 [00:15<00:54,  5.73it/s]

Epoch [21/40], Step [80/391], Loss: 0.9424


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [21/40], Step [90/391], Loss: 0.9216


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [21/40], Step [100/391], Loss: 0.9239


 28%|██▊       | 111/391 [00:20<00:48,  5.72it/s]

Epoch [21/40], Step [110/391], Loss: 0.9490


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [21/40], Step [120/391], Loss: 0.9450


 34%|███▎      | 131/391 [00:23<00:45,  5.75it/s]

Epoch [21/40], Step [130/391], Loss: 0.9719


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [21/40], Step [140/391], Loss: 0.9243


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [21/40], Step [150/391], Loss: 0.9522


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [21/40], Step [160/391], Loss: 0.9450


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [21/40], Step [170/391], Loss: 0.9695


 46%|████▋     | 181/391 [00:32<00:36,  5.75it/s]

Epoch [21/40], Step [180/391], Loss: 0.9674


 49%|████▉     | 191/391 [00:34<00:34,  5.75it/s]

Epoch [21/40], Step [190/391], Loss: 0.9692


 51%|█████▏    | 201/391 [00:36<00:33,  5.73it/s]

Epoch [21/40], Step [200/391], Loss: 0.9799


 54%|█████▍    | 211/391 [00:37<00:31,  5.70it/s]

Epoch [21/40], Step [210/391], Loss: 0.9567


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [21/40], Step [220/391], Loss: 0.9733


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [21/40], Step [230/391], Loss: 0.9759


 62%|██████▏   | 241/391 [00:43<00:26,  5.74it/s]

Epoch [21/40], Step [240/391], Loss: 0.9536


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [21/40], Step [250/391], Loss: 0.9667


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [21/40], Step [260/391], Loss: 0.9711


 69%|██████▉   | 271/391 [00:48<00:20,  5.75it/s]

Epoch [21/40], Step [270/391], Loss: 0.9965


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [21/40], Step [280/391], Loss: 1.0037


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [21/40], Step [290/391], Loss: 0.9725


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [21/40], Step [300/391], Loss: 0.9671


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [21/40], Step [310/391], Loss: 0.9705


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [21/40], Step [320/391], Loss: 0.9747


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [21/40], Step [330/391], Loss: 0.9574


 87%|████████▋ | 341/391 [01:00<00:08,  5.72it/s]

Epoch [21/40], Step [340/391], Loss: 1.0092


 90%|████████▉ | 351/391 [01:02<00:07,  5.71it/s]

Epoch [21/40], Step [350/391], Loss: 0.9720


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [21/40], Step [360/391], Loss: 0.9850


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [21/40], Step [370/391], Loss: 0.9832


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [21/40], Step [380/391], Loss: 0.9995


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [21/40], Step [390/391], Loss: 0.9996





Test Accuracy of the student model on the test images: 56.27 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.47it/s]

Epoch [22/40], Step [10/391], Loss: 0.9030


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [22/40], Step [20/391], Loss: 0.8908


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [22/40], Step [30/391], Loss: 0.9099


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [22/40], Step [40/391], Loss: 0.8999


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [22/40], Step [50/391], Loss: 0.8911


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [22/40], Step [60/391], Loss: 0.9043


 18%|█▊        | 71/391 [00:13<00:55,  5.72it/s]

Epoch [22/40], Step [70/391], Loss: 0.9110


 21%|██        | 81/391 [00:15<00:54,  5.72it/s]

Epoch [22/40], Step [80/391], Loss: 0.8944


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [22/40], Step [90/391], Loss: 0.9025


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [22/40], Step [100/391], Loss: 0.9055


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [22/40], Step [110/391], Loss: 0.9084


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [22/40], Step [120/391], Loss: 0.9167


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [22/40], Step [130/391], Loss: 0.8859


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [22/40], Step [140/391], Loss: 0.8747


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [22/40], Step [150/391], Loss: 0.9044


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [22/40], Step [160/391], Loss: 0.9042


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [22/40], Step [170/391], Loss: 0.8937


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [22/40], Step [180/391], Loss: 0.9368


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [22/40], Step [190/391], Loss: 0.9014


 51%|█████▏    | 201/391 [00:36<00:33,  5.75it/s]

Epoch [22/40], Step [200/391], Loss: 0.9073


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [22/40], Step [210/391], Loss: 0.9228


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [22/40], Step [220/391], Loss: 0.9447


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [22/40], Step [230/391], Loss: 0.9117


 62%|██████▏   | 241/391 [00:43<00:26,  5.61it/s]

Epoch [22/40], Step [240/391], Loss: 0.9290


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [22/40], Step [250/391], Loss: 0.9449


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [22/40], Step [260/391], Loss: 0.9615


 69%|██████▉   | 271/391 [00:48<00:20,  5.75it/s]

Epoch [22/40], Step [270/391], Loss: 0.9589


 72%|███████▏  | 281/391 [00:50<00:19,  5.74it/s]

Epoch [22/40], Step [280/391], Loss: 0.9482


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [22/40], Step [290/391], Loss: 0.9529


 77%|███████▋  | 301/391 [00:53<00:15,  5.72it/s]

Epoch [22/40], Step [300/391], Loss: 0.9607


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [22/40], Step [310/391], Loss: 0.9390


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [22/40], Step [320/391], Loss: 0.9317


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [22/40], Step [330/391], Loss: 0.9634


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [22/40], Step [340/391], Loss: 0.9642


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [22/40], Step [350/391], Loss: 0.9587


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [22/40], Step [360/391], Loss: 0.9464


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [22/40], Step [370/391], Loss: 0.9603


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [22/40], Step [380/391], Loss: 0.9655


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [22/40], Step [390/391], Loss: 0.9682





Test Accuracy of the student model on the test images: 58.37 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.51it/s]

Epoch [23/40], Step [10/391], Loss: 0.9136


  5%|▌         | 21/391 [00:04<01:04,  5.70it/s]

Epoch [23/40], Step [20/391], Loss: 0.8642


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [23/40], Step [30/391], Loss: 0.8760


 10%|█         | 41/391 [00:07<01:01,  5.72it/s]

Epoch [23/40], Step [40/391], Loss: 0.8733


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [23/40], Step [50/391], Loss: 0.8765


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [23/40], Step [60/391], Loss: 0.8948


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [23/40], Step [70/391], Loss: 0.8694


 21%|██        | 81/391 [00:14<00:53,  5.75it/s]

Epoch [23/40], Step [80/391], Loss: 0.8653


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [23/40], Step [90/391], Loss: 0.8385


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [23/40], Step [100/391], Loss: 0.8626


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [23/40], Step [110/391], Loss: 0.8742


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [23/40], Step [120/391], Loss: 0.8744


 34%|███▎      | 131/391 [00:23<00:45,  5.75it/s]

Epoch [23/40], Step [130/391], Loss: 0.8816


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [23/40], Step [140/391], Loss: 0.8766


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [23/40], Step [150/391], Loss: 0.8950


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [23/40], Step [160/391], Loss: 0.8839


 44%|████▎     | 171/391 [00:30<00:38,  5.68it/s]

Epoch [23/40], Step [170/391], Loss: 0.8788


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [23/40], Step [180/391], Loss: 0.8958


 49%|████▉     | 191/391 [00:34<00:34,  5.75it/s]

Epoch [23/40], Step [190/391], Loss: 0.9054


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [23/40], Step [200/391], Loss: 0.8932


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [23/40], Step [210/391], Loss: 0.8715


 57%|█████▋    | 221/391 [00:39<00:29,  5.75it/s]

Epoch [23/40], Step [220/391], Loss: 0.8719


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [23/40], Step [230/391], Loss: 0.8794


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [23/40], Step [240/391], Loss: 0.8802


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [23/40], Step [250/391], Loss: 0.8857


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [23/40], Step [260/391], Loss: 0.8886


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [23/40], Step [270/391], Loss: 0.9061


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [23/40], Step [280/391], Loss: 0.9217


 74%|███████▍  | 291/391 [00:51<00:17,  5.72it/s]

Epoch [23/40], Step [290/391], Loss: 0.9049


 77%|███████▋  | 301/391 [00:53<00:15,  5.72it/s]

Epoch [23/40], Step [300/391], Loss: 0.8844


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [23/40], Step [310/391], Loss: 0.9117


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [23/40], Step [320/391], Loss: 0.9123


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [23/40], Step [330/391], Loss: 0.8906


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [23/40], Step [340/391], Loss: 0.9045


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [23/40], Step [350/391], Loss: 0.9212


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [23/40], Step [360/391], Loss: 0.9388


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [23/40], Step [370/391], Loss: 0.9297


 97%|█████████▋| 381/391 [01:07<00:01,  5.72it/s]

Epoch [23/40], Step [380/391], Loss: 0.9228


100%|██████████| 391/391 [01:09<00:00,  5.67it/s]

Epoch [23/40], Step [390/391], Loss: 0.9219





Test Accuracy of the student model on the test images: 57.91 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.47it/s]

Epoch [24/40], Step [10/391], Loss: 0.8571


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [24/40], Step [20/391], Loss: 0.8668


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [24/40], Step [30/391], Loss: 0.8442


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [24/40], Step [40/391], Loss: 0.8457


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [24/40], Step [50/391], Loss: 0.8485


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [24/40], Step [60/391], Loss: 0.8476


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [24/40], Step [70/391], Loss: 0.8529


 21%|██        | 81/391 [00:15<00:53,  5.74it/s]

Epoch [24/40], Step [80/391], Loss: 0.8595


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [24/40], Step [90/391], Loss: 0.8168


 26%|██▌       | 101/391 [00:18<00:50,  5.70it/s]

Epoch [24/40], Step [100/391], Loss: 0.8132


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [24/40], Step [110/391], Loss: 0.8078


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [24/40], Step [120/391], Loss: 0.8508


 34%|███▎      | 131/391 [00:23<00:45,  5.72it/s]

Epoch [24/40], Step [130/391], Loss: 0.8334


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [24/40], Step [140/391], Loss: 0.8325


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [24/40], Step [150/391], Loss: 0.8386


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [24/40], Step [160/391], Loss: 0.8172


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [24/40], Step [170/391], Loss: 0.8245


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [24/40], Step [180/391], Loss: 0.8340


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [24/40], Step [190/391], Loss: 0.8508


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [24/40], Step [200/391], Loss: 0.8589


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [24/40], Step [210/391], Loss: 0.8266


 57%|█████▋    | 221/391 [00:39<00:29,  5.71it/s]

Epoch [24/40], Step [220/391], Loss: 0.8273


 59%|█████▉    | 231/391 [00:41<00:27,  5.72it/s]

Epoch [24/40], Step [230/391], Loss: 0.8585


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [24/40], Step [240/391], Loss: 0.8610


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [24/40], Step [250/391], Loss: 0.8500


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [24/40], Step [260/391], Loss: 0.8530


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [24/40], Step [270/391], Loss: 0.8593


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [24/40], Step [280/391], Loss: 0.8781


 74%|███████▍  | 291/391 [00:51<00:17,  5.75it/s]

Epoch [24/40], Step [290/391], Loss: 0.8838


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [24/40], Step [300/391], Loss: 0.8539


 80%|███████▉  | 311/391 [00:55<00:13,  5.75it/s]

Epoch [24/40], Step [310/391], Loss: 0.8733


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [24/40], Step [320/391], Loss: 0.8807


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [24/40], Step [330/391], Loss: 0.8571


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [24/40], Step [340/391], Loss: 0.8603


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [24/40], Step [350/391], Loss: 0.8624


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [24/40], Step [360/391], Loss: 0.8865


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [24/40], Step [370/391], Loss: 0.8726


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [24/40], Step [380/391], Loss: 0.8784


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [24/40], Step [390/391], Loss: 0.8889





Test Accuracy of the student model on the test images: 58.48 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [25/40], Step [10/391], Loss: 0.8184


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [25/40], Step [20/391], Loss: 0.8098


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [25/40], Step [30/391], Loss: 0.8014


 10%|█         | 41/391 [00:08<01:01,  5.74it/s]

Epoch [25/40], Step [40/391], Loss: 0.8023


 13%|█▎        | 51/391 [00:09<00:59,  5.72it/s]

Epoch [25/40], Step [50/391], Loss: 0.7963


 16%|█▌        | 61/391 [00:11<00:57,  5.72it/s]

Epoch [25/40], Step [60/391], Loss: 0.7882


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [25/40], Step [70/391], Loss: 0.8263


 21%|██        | 81/391 [00:14<00:54,  5.73it/s]

Epoch [25/40], Step [80/391], Loss: 0.8176


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [25/40], Step [90/391], Loss: 0.7915


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [25/40], Step [100/391], Loss: 0.8190


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [25/40], Step [110/391], Loss: 0.7930


 31%|███       | 121/391 [00:21<00:46,  5.75it/s]

Epoch [25/40], Step [120/391], Loss: 0.7941


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [25/40], Step [130/391], Loss: 0.7821


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [25/40], Step [140/391], Loss: 0.7989


 39%|███▊      | 151/391 [00:27<00:41,  5.75it/s]

Epoch [25/40], Step [150/391], Loss: 0.8089


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [25/40], Step [160/391], Loss: 0.8306


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [25/40], Step [170/391], Loss: 0.8223


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [25/40], Step [180/391], Loss: 0.8112


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [25/40], Step [190/391], Loss: 0.8287


 51%|█████▏    | 201/391 [00:35<00:33,  5.75it/s]

Epoch [25/40], Step [200/391], Loss: 0.8238


 54%|█████▍    | 211/391 [00:37<00:31,  5.71it/s]

Epoch [25/40], Step [210/391], Loss: 0.8041


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [25/40], Step [220/391], Loss: 0.8188


 59%|█████▉    | 231/391 [00:41<00:27,  5.75it/s]

Epoch [25/40], Step [230/391], Loss: 0.8001


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [25/40], Step [240/391], Loss: 0.8091


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [25/40], Step [250/391], Loss: 0.8264


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [25/40], Step [260/391], Loss: 0.8324


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [25/40], Step [270/391], Loss: 0.8073


 72%|███████▏  | 281/391 [00:49<00:19,  5.72it/s]

Epoch [25/40], Step [280/391], Loss: 0.8391


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [25/40], Step [290/391], Loss: 0.8321


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [25/40], Step [300/391], Loss: 0.8425


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [25/40], Step [310/391], Loss: 0.8641


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [25/40], Step [320/391], Loss: 0.8480


 85%|████████▍ | 331/391 [00:58<00:10,  5.75it/s]

Epoch [25/40], Step [330/391], Loss: 0.8186


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [25/40], Step [340/391], Loss: 0.8337


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [25/40], Step [350/391], Loss: 0.8391


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [25/40], Step [360/391], Loss: 0.8437


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [25/40], Step [370/391], Loss: 0.8745


 97%|█████████▋| 381/391 [01:07<00:01,  5.72it/s]

Epoch [25/40], Step [380/391], Loss: 0.8531


100%|██████████| 391/391 [01:08<00:00,  5.67it/s]

Epoch [25/40], Step [390/391], Loss: 0.8340





Test Accuracy of the student model on the test images: 60.21 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.51it/s]

Epoch [26/40], Step [10/391], Loss: 0.7887


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [26/40], Step [20/391], Loss: 0.7654


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [26/40], Step [30/391], Loss: 0.7600


 10%|█         | 41/391 [00:07<01:01,  5.72it/s]

Epoch [26/40], Step [40/391], Loss: 0.7828


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [26/40], Step [50/391], Loss: 0.7563


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [26/40], Step [60/391], Loss: 0.7620


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [26/40], Step [70/391], Loss: 0.7726


 21%|██        | 81/391 [00:14<00:54,  5.73it/s]

Epoch [26/40], Step [80/391], Loss: 0.7661


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [26/40], Step [90/391], Loss: 0.7792


 26%|██▌       | 101/391 [00:18<00:50,  5.75it/s]

Epoch [26/40], Step [100/391], Loss: 0.7793


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [26/40], Step [110/391], Loss: 0.7657


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [26/40], Step [120/391], Loss: 0.7773


 34%|███▎      | 131/391 [00:23<00:45,  5.72it/s]

Epoch [26/40], Step [130/391], Loss: 0.7889


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [26/40], Step [140/391], Loss: 0.7784


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [26/40], Step [150/391], Loss: 0.8083


 41%|████      | 161/391 [00:28<00:40,  5.73it/s]

Epoch [26/40], Step [160/391], Loss: 0.7844


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [26/40], Step [170/391], Loss: 0.7859


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [26/40], Step [180/391], Loss: 0.7829


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [26/40], Step [190/391], Loss: 0.7825


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [26/40], Step [200/391], Loss: 0.7872


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [26/40], Step [210/391], Loss: 0.7973


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [26/40], Step [220/391], Loss: 0.7661


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [26/40], Step [230/391], Loss: 0.7967


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [26/40], Step [240/391], Loss: 0.7757


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [26/40], Step [250/391], Loss: 0.7757


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [26/40], Step [260/391], Loss: 0.7906


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [26/40], Step [270/391], Loss: 0.7836


 72%|███████▏  | 281/391 [00:49<00:19,  5.70it/s]

Epoch [26/40], Step [280/391], Loss: 0.8027


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [26/40], Step [290/391], Loss: 0.8066


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [26/40], Step [300/391], Loss: 0.7915


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [26/40], Step [310/391], Loss: 0.7809


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [26/40], Step [320/391], Loss: 0.7862


 85%|████████▍ | 331/391 [00:58<00:10,  5.75it/s]

Epoch [26/40], Step [330/391], Loss: 0.7802


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [26/40], Step [340/391], Loss: 0.7855


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [26/40], Step [350/391], Loss: 0.7977


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [26/40], Step [360/391], Loss: 0.7908


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [26/40], Step [370/391], Loss: 0.7988


 97%|█████████▋| 381/391 [01:07<00:01,  5.72it/s]

Epoch [26/40], Step [380/391], Loss: 0.7881


100%|██████████| 391/391 [01:09<00:00,  5.67it/s]

Epoch [26/40], Step [390/391], Loss: 0.8037





Test Accuracy of the student model on the test images: 61.68 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [27/40], Step [10/391], Loss: 0.7520


  5%|▌         | 21/391 [00:04<01:04,  5.70it/s]

Epoch [27/40], Step [20/391], Loss: 0.7464


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [27/40], Step [30/391], Loss: 0.7336


 10%|█         | 41/391 [00:08<01:01,  5.71it/s]

Epoch [27/40], Step [40/391], Loss: 0.7334


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [27/40], Step [50/391], Loss: 0.7357


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [27/40], Step [60/391], Loss: 0.7169


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [27/40], Step [70/391], Loss: 0.7248


 21%|██        | 81/391 [00:15<00:54,  5.73it/s]

Epoch [27/40], Step [80/391], Loss: 0.7259


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [27/40], Step [90/391], Loss: 0.7328


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [27/40], Step [100/391], Loss: 0.7242


 28%|██▊       | 111/391 [00:20<00:49,  5.70it/s]

Epoch [27/40], Step [110/391], Loss: 0.7548


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [27/40], Step [120/391], Loss: 0.7178


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [27/40], Step [130/391], Loss: 0.7283


 36%|███▌      | 141/391 [00:25<00:43,  5.75it/s]

Epoch [27/40], Step [140/391], Loss: 0.7307


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [27/40], Step [150/391], Loss: 0.7343


 41%|████      | 161/391 [00:29<00:40,  5.72it/s]

Epoch [27/40], Step [160/391], Loss: 0.7297


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [27/40], Step [170/391], Loss: 0.7400


 46%|████▋     | 181/391 [00:32<00:36,  5.72it/s]

Epoch [27/40], Step [180/391], Loss: 0.7396


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [27/40], Step [190/391], Loss: 0.7304


 51%|█████▏    | 201/391 [00:36<00:33,  5.73it/s]

Epoch [27/40], Step [200/391], Loss: 0.7340


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [27/40], Step [210/391], Loss: 0.7386


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [27/40], Step [220/391], Loss: 0.7499


 59%|█████▉    | 231/391 [00:41<00:27,  5.75it/s]

Epoch [27/40], Step [230/391], Loss: 0.7340


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [27/40], Step [240/391], Loss: 0.7490


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [27/40], Step [250/391], Loss: 0.7494


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [27/40], Step [260/391], Loss: 0.7596


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [27/40], Step [270/391], Loss: 0.7544


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [27/40], Step [280/391], Loss: 0.7553


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [27/40], Step [290/391], Loss: 0.7545


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [27/40], Step [300/391], Loss: 0.7679


 80%|███████▉  | 311/391 [00:55<00:13,  5.72it/s]

Epoch [27/40], Step [310/391], Loss: 0.7731


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [27/40], Step [320/391], Loss: 0.7505


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [27/40], Step [330/391], Loss: 0.7626


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [27/40], Step [340/391], Loss: 0.7408


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [27/40], Step [350/391], Loss: 0.7707


 92%|█████████▏| 361/391 [01:03<00:05,  5.72it/s]

Epoch [27/40], Step [360/391], Loss: 0.7650


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [27/40], Step [370/391], Loss: 0.7489


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [27/40], Step [380/391], Loss: 0.7481


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [27/40], Step [390/391], Loss: 0.7711





Test Accuracy of the student model on the test images: 60.42 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.47it/s]

Epoch [28/40], Step [10/391], Loss: 0.7102


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [28/40], Step [20/391], Loss: 0.7101


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [28/40], Step [30/391], Loss: 0.6843


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [28/40], Step [40/391], Loss: 0.6877


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [28/40], Step [50/391], Loss: 0.6935


 16%|█▌        | 61/391 [00:11<00:57,  5.71it/s]

Epoch [28/40], Step [60/391], Loss: 0.6970


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [28/40], Step [70/391], Loss: 0.7095


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [28/40], Step [80/391], Loss: 0.7017


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [28/40], Step [90/391], Loss: 0.6995


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [28/40], Step [100/391], Loss: 0.6988


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [28/40], Step [110/391], Loss: 0.7075


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [28/40], Step [120/391], Loss: 0.6904


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [28/40], Step [130/391], Loss: 0.6925


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [28/40], Step [140/391], Loss: 0.6926


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [28/40], Step [150/391], Loss: 0.6964


 41%|████      | 161/391 [00:29<00:40,  5.73it/s]

Epoch [28/40], Step [160/391], Loss: 0.7142


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [28/40], Step [170/391], Loss: 0.7183


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [28/40], Step [180/391], Loss: 0.7071


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [28/40], Step [190/391], Loss: 0.6926


 51%|█████▏    | 201/391 [00:36<00:33,  5.75it/s]

Epoch [28/40], Step [200/391], Loss: 0.6924


 54%|█████▍    | 211/391 [00:37<00:31,  5.72it/s]

Epoch [28/40], Step [210/391], Loss: 0.6931


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [28/40], Step [220/391], Loss: 0.7098


 59%|█████▉    | 231/391 [00:41<00:27,  5.72it/s]

Epoch [28/40], Step [230/391], Loss: 0.7006


 62%|██████▏   | 241/391 [00:43<00:26,  5.74it/s]

Epoch [28/40], Step [240/391], Loss: 0.7091


 64%|██████▍   | 251/391 [00:44<00:24,  5.72it/s]

Epoch [28/40], Step [250/391], Loss: 0.7083


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [28/40], Step [260/391], Loss: 0.7035


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [28/40], Step [270/391], Loss: 0.6926


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [28/40], Step [280/391], Loss: 0.7158


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [28/40], Step [290/391], Loss: 0.7108


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [28/40], Step [300/391], Loss: 0.7219


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [28/40], Step [310/391], Loss: 0.7183


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [28/40], Step [320/391], Loss: 0.7342


 85%|████████▍ | 331/391 [00:58<00:10,  5.72it/s]

Epoch [28/40], Step [330/391], Loss: 0.7143


 87%|████████▋ | 341/391 [01:00<00:08,  5.75it/s]

Epoch [28/40], Step [340/391], Loss: 0.7207


 90%|████████▉ | 351/391 [01:02<00:06,  5.72it/s]

Epoch [28/40], Step [350/391], Loss: 0.7382


 92%|█████████▏| 361/391 [01:03<00:05,  5.71it/s]

Epoch [28/40], Step [360/391], Loss: 0.7218


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [28/40], Step [370/391], Loss: 0.7086


 97%|█████████▋| 381/391 [01:07<00:01,  5.72it/s]

Epoch [28/40], Step [380/391], Loss: 0.7059


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [28/40], Step [390/391], Loss: 0.7168





Test Accuracy of the student model on the test images: 61.97 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.47it/s]

Epoch [29/40], Step [10/391], Loss: 0.6761


  5%|▌         | 21/391 [00:04<01:04,  5.71it/s]

Epoch [29/40], Step [20/391], Loss: 0.6744


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [29/40], Step [30/391], Loss: 0.6699


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [29/40], Step [40/391], Loss: 0.6499


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [29/40], Step [50/391], Loss: 0.6492


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [29/40], Step [60/391], Loss: 0.6652


 18%|█▊        | 71/391 [00:13<00:55,  5.72it/s]

Epoch [29/40], Step [70/391], Loss: 0.6599


 21%|██        | 81/391 [00:15<00:54,  5.73it/s]

Epoch [29/40], Step [80/391], Loss: 0.6546


 23%|██▎       | 91/391 [00:16<00:52,  5.71it/s]

Epoch [29/40], Step [90/391], Loss: 0.6810


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [29/40], Step [100/391], Loss: 0.6672


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [29/40], Step [110/391], Loss: 0.6714


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [29/40], Step [120/391], Loss: 0.6616


 34%|███▎      | 131/391 [00:23<00:45,  5.75it/s]

Epoch [29/40], Step [130/391], Loss: 0.6718


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [29/40], Step [140/391], Loss: 0.6567


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [29/40], Step [150/391], Loss: 0.6743


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [29/40], Step [160/391], Loss: 0.6639


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [29/40], Step [170/391], Loss: 0.6677


 46%|████▋     | 181/391 [00:32<00:36,  5.75it/s]

Epoch [29/40], Step [180/391], Loss: 0.6776


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [29/40], Step [190/391], Loss: 0.6697


 51%|█████▏    | 201/391 [00:36<00:33,  5.73it/s]

Epoch [29/40], Step [200/391], Loss: 0.6824


 54%|█████▍    | 211/391 [00:37<00:31,  5.72it/s]

Epoch [29/40], Step [210/391], Loss: 0.6684


 57%|█████▋    | 221/391 [00:39<00:29,  5.72it/s]

Epoch [29/40], Step [220/391], Loss: 0.6697


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [29/40], Step [230/391], Loss: 0.6783


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [29/40], Step [240/391], Loss: 0.6747


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [29/40], Step [250/391], Loss: 0.6710


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [29/40], Step [260/391], Loss: 0.6725


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [29/40], Step [270/391], Loss: 0.6659


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [29/40], Step [280/391], Loss: 0.6685


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [29/40], Step [290/391], Loss: 0.6995


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [29/40], Step [300/391], Loss: 0.6845


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [29/40], Step [310/391], Loss: 0.6775


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [29/40], Step [320/391], Loss: 0.6642


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [29/40], Step [330/391], Loss: 0.6794


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [29/40], Step [340/391], Loss: 0.6827


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [29/40], Step [350/391], Loss: 0.6721


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [29/40], Step [360/391], Loss: 0.6882


 95%|█████████▍| 371/391 [01:05<00:03,  5.75it/s]

Epoch [29/40], Step [370/391], Loss: 0.6855


 97%|█████████▋| 381/391 [01:07<00:01,  5.75it/s]

Epoch [29/40], Step [380/391], Loss: 0.6860


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [29/40], Step [390/391], Loss: 0.6946





Test Accuracy of the student model on the test images: 63.88 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.49it/s]

Epoch [30/40], Step [10/391], Loss: 0.6425


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [30/40], Step [20/391], Loss: 0.6261


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [30/40], Step [30/391], Loss: 0.6297


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [30/40], Step [40/391], Loss: 0.6300


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [30/40], Step [50/391], Loss: 0.6250


 16%|█▌        | 61/391 [00:11<00:57,  5.70it/s]

Epoch [30/40], Step [60/391], Loss: 0.6186


 18%|█▊        | 71/391 [00:13<00:56,  5.71it/s]

Epoch [30/40], Step [70/391], Loss: 0.6208


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [30/40], Step [80/391], Loss: 0.6292


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [30/40], Step [90/391], Loss: 0.6292


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [30/40], Step [100/391], Loss: 0.6221


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [30/40], Step [110/391], Loss: 0.6194


 31%|███       | 121/391 [00:22<00:47,  5.72it/s]

Epoch [30/40], Step [120/391], Loss: 0.6330


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [30/40], Step [130/391], Loss: 0.6184


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [30/40], Step [140/391], Loss: 0.6354


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [30/40], Step [150/391], Loss: 0.6346


 41%|████      | 161/391 [00:28<00:40,  5.71it/s]

Epoch [30/40], Step [160/391], Loss: 0.6384


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [30/40], Step [170/391], Loss: 0.6359


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [30/40], Step [180/391], Loss: 0.6455


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [30/40], Step [190/391], Loss: 0.6406


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [30/40], Step [200/391], Loss: 0.6328


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [30/40], Step [210/391], Loss: 0.6376


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [30/40], Step [220/391], Loss: 0.6329


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [30/40], Step [230/391], Loss: 0.6341


 62%|██████▏   | 241/391 [00:42<00:26,  5.72it/s]

Epoch [30/40], Step [240/391], Loss: 0.6360


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [30/40], Step [250/391], Loss: 0.6437


 67%|██████▋   | 261/391 [00:46<00:22,  5.72it/s]

Epoch [30/40], Step [260/391], Loss: 0.6490


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [30/40], Step [270/391], Loss: 0.6550


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [30/40], Step [280/391], Loss: 0.6564


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [30/40], Step [290/391], Loss: 0.6347


 77%|███████▋  | 301/391 [00:53<00:15,  5.72it/s]

Epoch [30/40], Step [300/391], Loss: 0.6534


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [30/40], Step [310/391], Loss: 0.6539


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [30/40], Step [320/391], Loss: 0.6464


 85%|████████▍ | 331/391 [00:58<00:10,  5.72it/s]

Epoch [30/40], Step [330/391], Loss: 0.6590


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [30/40], Step [340/391], Loss: 0.6521


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [30/40], Step [350/391], Loss: 0.6454


 92%|█████████▏| 361/391 [01:03<00:05,  5.75it/s]

Epoch [30/40], Step [360/391], Loss: 0.6494


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [30/40], Step [370/391], Loss: 0.6517


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [30/40], Step [380/391], Loss: 0.6428


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [30/40], Step [390/391], Loss: 0.6607





Test Accuracy of the student model on the test images: 64.37 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.52it/s]

Epoch [31/40], Step [10/391], Loss: 0.6127


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [31/40], Step [20/391], Loss: 0.6097


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [31/40], Step [30/391], Loss: 0.6064


 10%|█         | 41/391 [00:07<01:01,  5.73it/s]

Epoch [31/40], Step [40/391], Loss: 0.6124


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [31/40], Step [50/391], Loss: 0.6103


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [31/40], Step [60/391], Loss: 0.5925


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [31/40], Step [70/391], Loss: 0.5995


 21%|██        | 81/391 [00:14<00:53,  5.74it/s]

Epoch [31/40], Step [80/391], Loss: 0.5867


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [31/40], Step [90/391], Loss: 0.5850


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [31/40], Step [100/391], Loss: 0.5952


 28%|██▊       | 111/391 [00:20<00:48,  5.72it/s]

Epoch [31/40], Step [110/391], Loss: 0.5967


 31%|███       | 121/391 [00:21<00:47,  5.72it/s]

Epoch [31/40], Step [120/391], Loss: 0.5993


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [31/40], Step [130/391], Loss: 0.5990


 36%|███▌      | 141/391 [00:25<00:43,  5.72it/s]

Epoch [31/40], Step [140/391], Loss: 0.5844


 39%|███▊      | 151/391 [00:27<00:42,  5.70it/s]

Epoch [31/40], Step [150/391], Loss: 0.5891


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [31/40], Step [160/391], Loss: 0.6062


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [31/40], Step [170/391], Loss: 0.5946


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [31/40], Step [180/391], Loss: 0.6070


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [31/40], Step [190/391], Loss: 0.6014


 51%|█████▏    | 201/391 [00:35<00:33,  5.74it/s]

Epoch [31/40], Step [200/391], Loss: 0.6076


 54%|█████▍    | 211/391 [00:37<00:31,  5.71it/s]

Epoch [31/40], Step [210/391], Loss: 0.6063


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [31/40], Step [220/391], Loss: 0.6003


 59%|█████▉    | 231/391 [00:41<00:28,  5.71it/s]

Epoch [31/40], Step [230/391], Loss: 0.6182


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [31/40], Step [240/391], Loss: 0.6217


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [31/40], Step [250/391], Loss: 0.6007


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [31/40], Step [260/391], Loss: 0.6108


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [31/40], Step [270/391], Loss: 0.6082


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [31/40], Step [280/391], Loss: 0.6102


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [31/40], Step [290/391], Loss: 0.6101


 77%|███████▋  | 301/391 [00:53<00:15,  5.71it/s]

Epoch [31/40], Step [300/391], Loss: 0.5974


 80%|███████▉  | 311/391 [00:55<00:13,  5.72it/s]

Epoch [31/40], Step [310/391], Loss: 0.6079


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [31/40], Step [320/391], Loss: 0.5939


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [31/40], Step [330/391], Loss: 0.6170


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [31/40], Step [340/391], Loss: 0.6138


 90%|████████▉ | 351/391 [01:02<00:06,  5.75it/s]

Epoch [31/40], Step [350/391], Loss: 0.6081


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [31/40], Step [360/391], Loss: 0.6173


 95%|█████████▍| 371/391 [01:05<00:03,  5.72it/s]

Epoch [31/40], Step [370/391], Loss: 0.6062


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [31/40], Step [380/391], Loss: 0.5981


100%|██████████| 391/391 [01:08<00:00,  5.67it/s]

Epoch [31/40], Step [390/391], Loss: 0.6106





Test Accuracy of the student model on the test images: 65.32 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.47it/s]

Epoch [32/40], Step [10/391], Loss: 0.5845


  5%|▌         | 21/391 [00:04<01:04,  5.74it/s]

Epoch [32/40], Step [20/391], Loss: 0.5792


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [32/40], Step [30/391], Loss: 0.5652


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [32/40], Step [40/391], Loss: 0.5910


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [32/40], Step [50/391], Loss: 0.5804


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [32/40], Step [60/391], Loss: 0.5807


 18%|█▊        | 71/391 [00:13<00:55,  5.75it/s]

Epoch [32/40], Step [70/391], Loss: 0.5685


 21%|██        | 81/391 [00:15<00:54,  5.72it/s]

Epoch [32/40], Step [80/391], Loss: 0.5701


 23%|██▎       | 91/391 [00:16<00:52,  5.71it/s]

Epoch [32/40], Step [90/391], Loss: 0.5669


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [32/40], Step [100/391], Loss: 0.5671


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [32/40], Step [110/391], Loss: 0.5754


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [32/40], Step [120/391], Loss: 0.5543


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [32/40], Step [130/391], Loss: 0.5786


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [32/40], Step [140/391], Loss: 0.5583


 39%|███▊      | 151/391 [00:27<00:41,  5.72it/s]

Epoch [32/40], Step [150/391], Loss: 0.5649


 41%|████      | 161/391 [00:29<00:40,  5.73it/s]

Epoch [32/40], Step [160/391], Loss: 0.5713


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [32/40], Step [170/391], Loss: 0.5691


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [32/40], Step [180/391], Loss: 0.5716


 49%|████▉     | 191/391 [00:34<00:35,  5.71it/s]

Epoch [32/40], Step [190/391], Loss: 0.5793


 51%|█████▏    | 201/391 [00:36<00:33,  5.73it/s]

Epoch [32/40], Step [200/391], Loss: 0.5833


 54%|█████▍    | 211/391 [00:37<00:31,  5.75it/s]

Epoch [32/40], Step [210/391], Loss: 0.5623


 57%|█████▋    | 221/391 [00:39<00:29,  5.71it/s]

Epoch [32/40], Step [220/391], Loss: 0.5629


 59%|█████▉    | 231/391 [00:41<00:27,  5.74it/s]

Epoch [32/40], Step [230/391], Loss: 0.5651


 62%|██████▏   | 241/391 [00:43<00:26,  5.74it/s]

Epoch [32/40], Step [240/391], Loss: 0.5731


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [32/40], Step [250/391], Loss: 0.5843


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [32/40], Step [260/391], Loss: 0.5742


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [32/40], Step [270/391], Loss: 0.5684


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [32/40], Step [280/391], Loss: 0.5895


 74%|███████▍  | 291/391 [00:51<00:17,  5.72it/s]

Epoch [32/40], Step [290/391], Loss: 0.5728


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [32/40], Step [300/391], Loss: 0.5819


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [32/40], Step [310/391], Loss: 0.5740


 82%|████████▏ | 321/391 [00:56<00:12,  5.73it/s]

Epoch [32/40], Step [320/391], Loss: 0.5677


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [32/40], Step [330/391], Loss: 0.5754


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [32/40], Step [340/391], Loss: 0.5674


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [32/40], Step [350/391], Loss: 0.5759


 92%|█████████▏| 361/391 [01:03<00:05,  5.71it/s]

Epoch [32/40], Step [360/391], Loss: 0.5741


 95%|█████████▍| 371/391 [01:05<00:03,  5.75it/s]

Epoch [32/40], Step [370/391], Loss: 0.5866


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [32/40], Step [380/391], Loss: 0.5729


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [32/40], Step [390/391], Loss: 0.5681





Test Accuracy of the student model on the test images: 65.47 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.52it/s]

Epoch [33/40], Step [10/391], Loss: 0.5574


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [33/40], Step [20/391], Loss: 0.5479


  8%|▊         | 31/391 [00:06<01:02,  5.74it/s]

Epoch [33/40], Step [30/391], Loss: 0.5388


 10%|█         | 41/391 [00:07<01:01,  5.74it/s]

Epoch [33/40], Step [40/391], Loss: 0.5340


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [33/40], Step [50/391], Loss: 0.5404


 16%|█▌        | 61/391 [00:11<00:57,  5.71it/s]

Epoch [33/40], Step [60/391], Loss: 0.5423


 18%|█▊        | 71/391 [00:13<00:56,  5.69it/s]

Epoch [33/40], Step [70/391], Loss: 0.5353


 21%|██        | 81/391 [00:14<00:54,  5.72it/s]

Epoch [33/40], Step [80/391], Loss: 0.5431


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [33/40], Step [90/391], Loss: 0.5501


 26%|██▌       | 101/391 [00:18<00:50,  5.72it/s]

Epoch [33/40], Step [100/391], Loss: 0.5448


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [33/40], Step [110/391], Loss: 0.5466


 31%|███       | 121/391 [00:21<00:46,  5.75it/s]

Epoch [33/40], Step [120/391], Loss: 0.5468


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [33/40], Step [130/391], Loss: 0.5547


 36%|███▌      | 141/391 [00:25<00:43,  5.74it/s]

Epoch [33/40], Step [140/391], Loss: 0.5452


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [33/40], Step [150/391], Loss: 0.5397


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [33/40], Step [160/391], Loss: 0.5459


 44%|████▎     | 171/391 [00:30<00:38,  5.71it/s]

Epoch [33/40], Step [170/391], Loss: 0.5621


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [33/40], Step [180/391], Loss: 0.5452


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [33/40], Step [190/391], Loss: 0.5513


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [33/40], Step [200/391], Loss: 0.5572


 54%|█████▍    | 211/391 [00:37<00:31,  5.72it/s]

Epoch [33/40], Step [210/391], Loss: 0.5422


 57%|█████▋    | 221/391 [00:39<00:29,  5.72it/s]

Epoch [33/40], Step [220/391], Loss: 0.5499


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [33/40], Step [230/391], Loss: 0.5631


 62%|██████▏   | 241/391 [00:42<00:26,  5.73it/s]

Epoch [33/40], Step [240/391], Loss: 0.5507


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [33/40], Step [250/391], Loss: 0.5548


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [33/40], Step [260/391], Loss: 0.5538


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [33/40], Step [270/391], Loss: 0.5534


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [33/40], Step [280/391], Loss: 0.5598


 74%|███████▍  | 291/391 [00:51<00:17,  5.75it/s]

Epoch [33/40], Step [290/391], Loss: 0.5607


 77%|███████▋  | 301/391 [00:53<00:15,  5.75it/s]

Epoch [33/40], Step [300/391], Loss: 0.5458


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [33/40], Step [310/391], Loss: 0.5322


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [33/40], Step [320/391], Loss: 0.5563


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [33/40], Step [330/391], Loss: 0.5403


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [33/40], Step [340/391], Loss: 0.5507


 90%|████████▉ | 351/391 [01:01<00:06,  5.72it/s]

Epoch [33/40], Step [350/391], Loss: 0.5422


 92%|█████████▏| 361/391 [01:03<00:05,  5.75it/s]

Epoch [33/40], Step [360/391], Loss: 0.5495


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [33/40], Step [370/391], Loss: 0.5584


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [33/40], Step [380/391], Loss: 0.5530


100%|██████████| 391/391 [01:08<00:00,  5.67it/s]

Epoch [33/40], Step [390/391], Loss: 0.5368





Test Accuracy of the student model on the test images: 65.71 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.50it/s]

Epoch [34/40], Step [10/391], Loss: 0.5279


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [34/40], Step [20/391], Loss: 0.5117


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [34/40], Step [30/391], Loss: 0.5143


 10%|█         | 41/391 [00:08<01:01,  5.72it/s]

Epoch [34/40], Step [40/391], Loss: 0.5193


 13%|█▎        | 51/391 [00:09<00:59,  5.75it/s]

Epoch [34/40], Step [50/391], Loss: 0.5302


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [34/40], Step [60/391], Loss: 0.5326


 18%|█▊        | 71/391 [00:13<00:55,  5.73it/s]

Epoch [34/40], Step [70/391], Loss: 0.5191


 21%|██        | 81/391 [00:15<00:54,  5.66it/s]

Epoch [34/40], Step [80/391], Loss: 0.5220


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [34/40], Step [90/391], Loss: 0.5201


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [34/40], Step [100/391], Loss: 0.5175


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [34/40], Step [110/391], Loss: 0.5037


 31%|███       | 121/391 [00:22<00:47,  5.72it/s]

Epoch [34/40], Step [120/391], Loss: 0.5253


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [34/40], Step [130/391], Loss: 0.5138


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [34/40], Step [140/391], Loss: 0.5186


 39%|███▊      | 151/391 [00:27<00:41,  5.74it/s]

Epoch [34/40], Step [150/391], Loss: 0.5110


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [34/40], Step [160/391], Loss: 0.5338


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [34/40], Step [170/391], Loss: 0.5105


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [34/40], Step [180/391], Loss: 0.5223


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [34/40], Step [190/391], Loss: 0.5183


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [34/40], Step [200/391], Loss: 0.5238


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [34/40], Step [210/391], Loss: 0.5249


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [34/40], Step [220/391], Loss: 0.5234


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [34/40], Step [230/391], Loss: 0.5229


 62%|██████▏   | 241/391 [00:42<00:26,  5.74it/s]

Epoch [34/40], Step [240/391], Loss: 0.5354


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [34/40], Step [250/391], Loss: 0.5321


 67%|██████▋   | 261/391 [00:46<00:22,  5.75it/s]

Epoch [34/40], Step [260/391], Loss: 0.5164


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [34/40], Step [270/391], Loss: 0.5223


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [34/40], Step [280/391], Loss: 0.5286


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [34/40], Step [290/391], Loss: 0.5161


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [34/40], Step [300/391], Loss: 0.5221


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [34/40], Step [310/391], Loss: 0.5160


 82%|████████▏ | 321/391 [00:56<00:12,  5.75it/s]

Epoch [34/40], Step [320/391], Loss: 0.5300


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [34/40], Step [330/391], Loss: 0.5362


 87%|████████▋ | 341/391 [01:00<00:08,  5.75it/s]

Epoch [34/40], Step [340/391], Loss: 0.5342


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [34/40], Step [350/391], Loss: 0.5250


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [34/40], Step [360/391], Loss: 0.5323


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [34/40], Step [370/391], Loss: 0.5181


 97%|█████████▋| 381/391 [01:07<00:01,  5.73it/s]

Epoch [34/40], Step [380/391], Loss: 0.5258


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [34/40], Step [390/391], Loss: 0.5447





Test Accuracy of the student model on the test images: 66.55 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.44it/s]

Epoch [35/40], Step [10/391], Loss: 0.5132


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [35/40], Step [20/391], Loss: 0.5073


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [35/40], Step [30/391], Loss: 0.4968


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [35/40], Step [40/391], Loss: 0.4963


 13%|█▎        | 51/391 [00:09<00:59,  5.71it/s]

Epoch [35/40], Step [50/391], Loss: 0.4995


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [35/40], Step [60/391], Loss: 0.5095


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [35/40], Step [70/391], Loss: 0.4904


 21%|██        | 81/391 [00:15<00:53,  5.74it/s]

Epoch [35/40], Step [80/391], Loss: 0.5000


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [35/40], Step [90/391], Loss: 0.4949


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [35/40], Step [100/391], Loss: 0.5018


 28%|██▊       | 111/391 [00:20<00:48,  5.72it/s]

Epoch [35/40], Step [110/391], Loss: 0.5068


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [35/40], Step [120/391], Loss: 0.5051


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [35/40], Step [130/391], Loss: 0.5008


 36%|███▌      | 141/391 [00:25<00:43,  5.75it/s]

Epoch [35/40], Step [140/391], Loss: 0.4965


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [35/40], Step [150/391], Loss: 0.4922


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [35/40], Step [160/391], Loss: 0.4928


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [35/40], Step [170/391], Loss: 0.4955


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [35/40], Step [180/391], Loss: 0.5039


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [35/40], Step [190/391], Loss: 0.5004


 51%|█████▏    | 201/391 [00:36<00:33,  5.74it/s]

Epoch [35/40], Step [200/391], Loss: 0.4988


 54%|█████▍    | 211/391 [00:37<00:31,  5.73it/s]

Epoch [35/40], Step [210/391], Loss: 0.5054


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [35/40], Step [220/391], Loss: 0.5048


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [35/40], Step [230/391], Loss: 0.5013


 62%|██████▏   | 241/391 [00:43<00:26,  5.70it/s]

Epoch [35/40], Step [240/391], Loss: 0.5034


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [35/40], Step [250/391], Loss: 0.5148


 67%|██████▋   | 261/391 [00:46<00:22,  5.75it/s]

Epoch [35/40], Step [260/391], Loss: 0.5079


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [35/40], Step [270/391], Loss: 0.5003


 72%|███████▏  | 281/391 [00:49<00:19,  5.74it/s]

Epoch [35/40], Step [280/391], Loss: 0.4991


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [35/40], Step [290/391], Loss: 0.5042


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [35/40], Step [300/391], Loss: 0.5003


 80%|███████▉  | 311/391 [00:55<00:13,  5.72it/s]

Epoch [35/40], Step [310/391], Loss: 0.5045


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [35/40], Step [320/391], Loss: 0.5055


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [35/40], Step [330/391], Loss: 0.4961


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [35/40], Step [340/391], Loss: 0.5014


 90%|████████▉ | 351/391 [01:02<00:06,  5.74it/s]

Epoch [35/40], Step [350/391], Loss: 0.5028


 92%|█████████▏| 361/391 [01:03<00:05,  5.72it/s]

Epoch [35/40], Step [360/391], Loss: 0.5099


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [35/40], Step [370/391], Loss: 0.5080


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [35/40], Step [380/391], Loss: 0.5036


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [35/40], Step [390/391], Loss: 0.4986





Test Accuracy of the student model on the test images: 67.21 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.46it/s]

Epoch [36/40], Step [10/391], Loss: 0.4870


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [36/40], Step [20/391], Loss: 0.4873


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [36/40], Step [30/391], Loss: 0.4813


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [36/40], Step [40/391], Loss: 0.4813


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [36/40], Step [50/391], Loss: 0.4784


 16%|█▌        | 61/391 [00:11<00:57,  5.73it/s]

Epoch [36/40], Step [60/391], Loss: 0.4797


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [36/40], Step [70/391], Loss: 0.4855


 21%|██        | 81/391 [00:15<00:53,  5.74it/s]

Epoch [36/40], Step [80/391], Loss: 0.4813


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [36/40], Step [90/391], Loss: 0.4907


 26%|██▌       | 101/391 [00:18<00:50,  5.71it/s]

Epoch [36/40], Step [100/391], Loss: 0.4875


 28%|██▊       | 111/391 [00:20<00:49,  5.70it/s]

Epoch [36/40], Step [110/391], Loss: 0.4838


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [36/40], Step [120/391], Loss: 0.4812


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [36/40], Step [130/391], Loss: 0.4818


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [36/40], Step [140/391], Loss: 0.4848


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [36/40], Step [150/391], Loss: 0.4989


 41%|████      | 161/391 [00:29<00:40,  5.73it/s]

Epoch [36/40], Step [160/391], Loss: 0.4952


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [36/40], Step [170/391], Loss: 0.4855


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [36/40], Step [180/391], Loss: 0.4957


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [36/40], Step [190/391], Loss: 0.4896


 51%|█████▏    | 201/391 [00:36<00:33,  5.74it/s]

Epoch [36/40], Step [200/391], Loss: 0.4873


 54%|█████▍    | 211/391 [00:37<00:31,  5.72it/s]

Epoch [36/40], Step [210/391], Loss: 0.4822


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [36/40], Step [220/391], Loss: 0.4925


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [36/40], Step [230/391], Loss: 0.4814


 62%|██████▏   | 241/391 [00:43<00:26,  5.70it/s]

Epoch [36/40], Step [240/391], Loss: 0.4944


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [36/40], Step [250/391], Loss: 0.4872


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [36/40], Step [260/391], Loss: 0.4994


 69%|██████▉   | 271/391 [00:48<00:20,  5.75it/s]

Epoch [36/40], Step [270/391], Loss: 0.4969


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [36/40], Step [280/391], Loss: 0.4806


 74%|███████▍  | 291/391 [00:51<00:17,  5.73it/s]

Epoch [36/40], Step [290/391], Loss: 0.4958


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [36/40], Step [300/391], Loss: 0.4931


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [36/40], Step [310/391], Loss: 0.4791


 82%|████████▏ | 321/391 [00:56<00:12,  5.75it/s]

Epoch [36/40], Step [320/391], Loss: 0.4876


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [36/40], Step [330/391], Loss: 0.4808


 87%|████████▋ | 341/391 [01:00<00:08,  5.75it/s]

Epoch [36/40], Step [340/391], Loss: 0.4902


 90%|████████▉ | 351/391 [01:02<00:06,  5.75it/s]

Epoch [36/40], Step [350/391], Loss: 0.4960


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [36/40], Step [360/391], Loss: 0.4931


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [36/40], Step [370/391], Loss: 0.4849


 97%|█████████▋| 381/391 [01:07<00:01,  5.75it/s]

Epoch [36/40], Step [380/391], Loss: 0.4890


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [36/40], Step [390/391], Loss: 0.4873





Test Accuracy of the student model on the test images: 66.99 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [37/40], Step [10/391], Loss: 0.4786


  5%|▌         | 21/391 [00:04<01:04,  5.74it/s]

Epoch [37/40], Step [20/391], Loss: 0.4624


  8%|▊         | 31/391 [00:06<01:02,  5.72it/s]

Epoch [37/40], Step [30/391], Loss: 0.4748


 10%|█         | 41/391 [00:08<01:01,  5.73it/s]

Epoch [37/40], Step [40/391], Loss: 0.4717


 13%|█▎        | 51/391 [00:09<00:59,  5.72it/s]

Epoch [37/40], Step [50/391], Loss: 0.4720


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [37/40], Step [60/391], Loss: 0.4785


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [37/40], Step [70/391], Loss: 0.4696


 21%|██        | 81/391 [00:15<00:53,  5.74it/s]

Epoch [37/40], Step [80/391], Loss: 0.4742


 23%|██▎       | 91/391 [00:16<00:52,  5.73it/s]

Epoch [37/40], Step [90/391], Loss: 0.4805


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [37/40], Step [100/391], Loss: 0.4841


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [37/40], Step [110/391], Loss: 0.4787


 31%|███       | 121/391 [00:21<00:47,  5.73it/s]

Epoch [37/40], Step [120/391], Loss: 0.4865


 34%|███▎      | 131/391 [00:23<00:45,  5.74it/s]

Epoch [37/40], Step [130/391], Loss: 0.4641


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [37/40], Step [140/391], Loss: 0.4661


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [37/40], Step [150/391], Loss: 0.4709


 41%|████      | 161/391 [00:28<00:40,  5.74it/s]

Epoch [37/40], Step [160/391], Loss: 0.4718


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [37/40], Step [170/391], Loss: 0.4839


 46%|████▋     | 181/391 [00:32<00:36,  5.75it/s]

Epoch [37/40], Step [180/391], Loss: 0.4689


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [37/40], Step [190/391], Loss: 0.4734


 51%|█████▏    | 201/391 [00:35<00:33,  5.75it/s]

Epoch [37/40], Step [200/391], Loss: 0.4712


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [37/40], Step [210/391], Loss: 0.4650


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [37/40], Step [220/391], Loss: 0.4838


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [37/40], Step [230/391], Loss: 0.4825


 62%|██████▏   | 241/391 [00:42<00:26,  5.75it/s]

Epoch [37/40], Step [240/391], Loss: 0.4689


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [37/40], Step [250/391], Loss: 0.4715


 67%|██████▋   | 261/391 [00:46<00:22,  5.72it/s]

Epoch [37/40], Step [260/391], Loss: 0.4693


 69%|██████▉   | 271/391 [00:48<00:20,  5.73it/s]

Epoch [37/40], Step [270/391], Loss: 0.4763


 72%|███████▏  | 281/391 [00:49<00:19,  5.72it/s]

Epoch [37/40], Step [280/391], Loss: 0.4694


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [37/40], Step [290/391], Loss: 0.4710


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [37/40], Step [300/391], Loss: 0.4678


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [37/40], Step [310/391], Loss: 0.4723


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [37/40], Step [320/391], Loss: 0.4794


 85%|████████▍ | 331/391 [00:58<00:10,  5.72it/s]

Epoch [37/40], Step [330/391], Loss: 0.4730


 87%|████████▋ | 341/391 [01:00<00:08,  5.73it/s]

Epoch [37/40], Step [340/391], Loss: 0.4726


 90%|████████▉ | 351/391 [01:02<00:06,  5.72it/s]

Epoch [37/40], Step [350/391], Loss: 0.4775


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [37/40], Step [360/391], Loss: 0.4746


 95%|█████████▍| 371/391 [01:05<00:03,  5.74it/s]

Epoch [37/40], Step [370/391], Loss: 0.4692


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [37/40], Step [380/391], Loss: 0.4737


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [37/40], Step [390/391], Loss: 0.4664





Test Accuracy of the student model on the test images: 67.78 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.48it/s]

Epoch [38/40], Step [10/391], Loss: 0.4733


  5%|▌         | 21/391 [00:04<01:04,  5.73it/s]

Epoch [38/40], Step [20/391], Loss: 0.4610


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [38/40], Step [30/391], Loss: 0.4672


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [38/40], Step [40/391], Loss: 0.4630


 13%|█▎        | 51/391 [00:09<00:59,  5.74it/s]

Epoch [38/40], Step [50/391], Loss: 0.4595


 16%|█▌        | 61/391 [00:11<00:57,  5.71it/s]

Epoch [38/40], Step [60/391], Loss: 0.4578


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [38/40], Step [70/391], Loss: 0.4642


 21%|██        | 81/391 [00:15<00:54,  5.72it/s]

Epoch [38/40], Step [80/391], Loss: 0.4527


 23%|██▎       | 91/391 [00:16<00:52,  5.71it/s]

Epoch [38/40], Step [90/391], Loss: 0.4732


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [38/40], Step [100/391], Loss: 0.4614


 28%|██▊       | 111/391 [00:20<00:48,  5.72it/s]

Epoch [38/40], Step [110/391], Loss: 0.4618


 31%|███       | 121/391 [00:22<00:47,  5.73it/s]

Epoch [38/40], Step [120/391], Loss: 0.4600


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [38/40], Step [130/391], Loss: 0.4668


 36%|███▌      | 141/391 [00:25<00:43,  5.72it/s]

Epoch [38/40], Step [140/391], Loss: 0.4629


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [38/40], Step [150/391], Loss: 0.4605


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [38/40], Step [160/391], Loss: 0.4581


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [38/40], Step [170/391], Loss: 0.4750


 46%|████▋     | 181/391 [00:32<00:36,  5.74it/s]

Epoch [38/40], Step [180/391], Loss: 0.4612


 49%|████▉     | 191/391 [00:34<00:34,  5.72it/s]

Epoch [38/40], Step [190/391], Loss: 0.4681


 51%|█████▏    | 201/391 [00:36<00:33,  5.74it/s]

Epoch [38/40], Step [200/391], Loss: 0.4672


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [38/40], Step [210/391], Loss: 0.4562


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [38/40], Step [220/391], Loss: 0.4664


 59%|█████▉    | 231/391 [00:41<00:27,  5.75it/s]

Epoch [38/40], Step [230/391], Loss: 0.4649


 62%|██████▏   | 241/391 [00:42<00:26,  5.72it/s]

Epoch [38/40], Step [240/391], Loss: 0.4687


 64%|██████▍   | 251/391 [00:44<00:24,  5.73it/s]

Epoch [38/40], Step [250/391], Loss: 0.4626


 67%|██████▋   | 261/391 [00:46<00:22,  5.74it/s]

Epoch [38/40], Step [260/391], Loss: 0.4676


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [38/40], Step [270/391], Loss: 0.4670


 72%|███████▏  | 281/391 [00:49<00:19,  5.72it/s]

Epoch [38/40], Step [280/391], Loss: 0.4628


 74%|███████▍  | 291/391 [00:51<00:17,  5.74it/s]

Epoch [38/40], Step [290/391], Loss: 0.4677


 77%|███████▋  | 301/391 [00:53<00:15,  5.74it/s]

Epoch [38/40], Step [300/391], Loss: 0.4583


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [38/40], Step [310/391], Loss: 0.4619


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [38/40], Step [320/391], Loss: 0.4551


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [38/40], Step [330/391], Loss: 0.4628


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [38/40], Step [340/391], Loss: 0.4627


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [38/40], Step [350/391], Loss: 0.4643


 92%|█████████▏| 361/391 [01:03<00:05,  5.74it/s]

Epoch [38/40], Step [360/391], Loss: 0.4605


 95%|█████████▍| 371/391 [01:05<00:03,  5.75it/s]

Epoch [38/40], Step [370/391], Loss: 0.4611


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [38/40], Step [380/391], Loss: 0.4602


100%|██████████| 391/391 [01:09<00:00,  5.66it/s]

Epoch [38/40], Step [390/391], Loss: 0.4670





Test Accuracy of the student model on the test images: 67.73 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.46it/s]

Epoch [39/40], Step [10/391], Loss: 0.4497


  5%|▌         | 21/391 [00:04<01:04,  5.74it/s]

Epoch [39/40], Step [20/391], Loss: 0.4657


  8%|▊         | 31/391 [00:06<01:02,  5.73it/s]

Epoch [39/40], Step [30/391], Loss: 0.4530


 10%|█         | 41/391 [00:08<01:01,  5.72it/s]

Epoch [39/40], Step [40/391], Loss: 0.4548


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [39/40], Step [50/391], Loss: 0.4465


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [39/40], Step [60/391], Loss: 0.4618


 18%|█▊        | 71/391 [00:13<00:56,  5.62it/s]

Epoch [39/40], Step [70/391], Loss: 0.4579


 21%|██        | 81/391 [00:15<00:54,  5.73it/s]

Epoch [39/40], Step [80/391], Loss: 0.4474


 23%|██▎       | 91/391 [00:16<00:52,  5.74it/s]

Epoch [39/40], Step [90/391], Loss: 0.4560


 26%|██▌       | 101/391 [00:18<00:50,  5.74it/s]

Epoch [39/40], Step [100/391], Loss: 0.4529


 28%|██▊       | 111/391 [00:20<00:48,  5.73it/s]

Epoch [39/40], Step [110/391], Loss: 0.4497


 31%|███       | 121/391 [00:22<00:47,  5.72it/s]

Epoch [39/40], Step [120/391], Loss: 0.4562


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [39/40], Step [130/391], Loss: 0.4502


 36%|███▌      | 141/391 [00:25<00:43,  5.73it/s]

Epoch [39/40], Step [140/391], Loss: 0.4426


 39%|███▊      | 151/391 [00:27<00:41,  5.75it/s]

Epoch [39/40], Step [150/391], Loss: 0.4574


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [39/40], Step [160/391], Loss: 0.4622


 44%|████▎     | 171/391 [00:30<00:38,  5.73it/s]

Epoch [39/40], Step [170/391], Loss: 0.4505


 46%|████▋     | 181/391 [00:32<00:36,  5.73it/s]

Epoch [39/40], Step [180/391], Loss: 0.4575


 49%|████▉     | 191/391 [00:34<00:34,  5.74it/s]

Epoch [39/40], Step [190/391], Loss: 0.4662


 51%|█████▏    | 201/391 [00:36<00:33,  5.73it/s]

Epoch [39/40], Step [200/391], Loss: 0.4547


 54%|█████▍    | 211/391 [00:37<00:31,  5.74it/s]

Epoch [39/40], Step [210/391], Loss: 0.4722


 57%|█████▋    | 221/391 [00:39<00:29,  5.74it/s]

Epoch [39/40], Step [220/391], Loss: 0.4525


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [39/40], Step [230/391], Loss: 0.4661


 62%|██████▏   | 241/391 [00:43<00:26,  5.73it/s]

Epoch [39/40], Step [240/391], Loss: 0.4592


 64%|██████▍   | 251/391 [00:44<00:24,  5.74it/s]

Epoch [39/40], Step [250/391], Loss: 0.4522


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [39/40], Step [260/391], Loss: 0.4633


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [39/40], Step [270/391], Loss: 0.4707


 72%|███████▏  | 281/391 [00:50<00:19,  5.73it/s]

Epoch [39/40], Step [280/391], Loss: 0.4481


 74%|███████▍  | 291/391 [00:51<00:17,  5.75it/s]

Epoch [39/40], Step [290/391], Loss: 0.4520


 77%|███████▋  | 301/391 [00:53<00:15,  5.73it/s]

Epoch [39/40], Step [300/391], Loss: 0.4648


 80%|███████▉  | 311/391 [00:55<00:13,  5.74it/s]

Epoch [39/40], Step [310/391], Loss: 0.4529


 82%|████████▏ | 321/391 [00:57<00:12,  5.73it/s]

Epoch [39/40], Step [320/391], Loss: 0.4511


 85%|████████▍ | 331/391 [00:58<00:10,  5.74it/s]

Epoch [39/40], Step [330/391], Loss: 0.4507


 87%|████████▋ | 341/391 [01:00<00:08,  5.72it/s]

Epoch [39/40], Step [340/391], Loss: 0.4505


 90%|████████▉ | 351/391 [01:02<00:06,  5.73it/s]

Epoch [39/40], Step [350/391], Loss: 0.4540


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [39/40], Step [360/391], Loss: 0.4451


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [39/40], Step [370/391], Loss: 0.4583


 97%|█████████▋| 381/391 [01:07<00:01,  5.72it/s]

Epoch [39/40], Step [380/391], Loss: 0.4620


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [39/40], Step [390/391], Loss: 0.4572





Test Accuracy of the student model on the test images: 67.94 %
Saved best model to interim_ta1.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:09,  5.45it/s]

Epoch [40/40], Step [10/391], Loss: 0.4382


  5%|▌         | 21/391 [00:04<01:04,  5.72it/s]

Epoch [40/40], Step [20/391], Loss: 0.4528


  8%|▊         | 31/391 [00:06<01:03,  5.71it/s]

Epoch [40/40], Step [30/391], Loss: 0.4618


 10%|█         | 41/391 [00:08<01:00,  5.74it/s]

Epoch [40/40], Step [40/391], Loss: 0.4572


 13%|█▎        | 51/391 [00:09<00:59,  5.73it/s]

Epoch [40/40], Step [50/391], Loss: 0.4482


 16%|█▌        | 61/391 [00:11<00:57,  5.74it/s]

Epoch [40/40], Step [60/391], Loss: 0.4579


 18%|█▊        | 71/391 [00:13<00:55,  5.74it/s]

Epoch [40/40], Step [70/391], Loss: 0.4513


 21%|██        | 81/391 [00:15<00:54,  5.74it/s]

Epoch [40/40], Step [80/391], Loss: 0.4569


 23%|██▎       | 91/391 [00:16<00:52,  5.71it/s]

Epoch [40/40], Step [90/391], Loss: 0.4513


 26%|██▌       | 101/391 [00:18<00:50,  5.73it/s]

Epoch [40/40], Step [100/391], Loss: 0.4473


 28%|██▊       | 111/391 [00:20<00:48,  5.74it/s]

Epoch [40/40], Step [110/391], Loss: 0.4522


 31%|███       | 121/391 [00:22<00:47,  5.74it/s]

Epoch [40/40], Step [120/391], Loss: 0.4520


 34%|███▎      | 131/391 [00:23<00:45,  5.73it/s]

Epoch [40/40], Step [130/391], Loss: 0.4535


 36%|███▌      | 141/391 [00:25<00:43,  5.75it/s]

Epoch [40/40], Step [140/391], Loss: 0.4444


 39%|███▊      | 151/391 [00:27<00:41,  5.73it/s]

Epoch [40/40], Step [150/391], Loss: 0.4487


 41%|████      | 161/391 [00:29<00:40,  5.74it/s]

Epoch [40/40], Step [160/391], Loss: 0.4496


 44%|████▎     | 171/391 [00:30<00:38,  5.74it/s]

Epoch [40/40], Step [170/391], Loss: 0.4548


 46%|████▋     | 181/391 [00:32<00:36,  5.72it/s]

Epoch [40/40], Step [180/391], Loss: 0.4683


 49%|████▉     | 191/391 [00:34<00:34,  5.73it/s]

Epoch [40/40], Step [190/391], Loss: 0.4484


 51%|█████▏    | 201/391 [00:36<00:33,  5.74it/s]

Epoch [40/40], Step [200/391], Loss: 0.4550


 54%|█████▍    | 211/391 [00:37<00:31,  5.72it/s]

Epoch [40/40], Step [210/391], Loss: 0.4568


 57%|█████▋    | 221/391 [00:39<00:29,  5.73it/s]

Epoch [40/40], Step [220/391], Loss: 0.4471


 59%|█████▉    | 231/391 [00:41<00:27,  5.73it/s]

Epoch [40/40], Step [230/391], Loss: 0.4463


 62%|██████▏   | 241/391 [00:43<00:26,  5.73it/s]

Epoch [40/40], Step [240/391], Loss: 0.4523


 64%|██████▍   | 251/391 [00:44<00:24,  5.75it/s]

Epoch [40/40], Step [250/391], Loss: 0.4532


 67%|██████▋   | 261/391 [00:46<00:22,  5.73it/s]

Epoch [40/40], Step [260/391], Loss: 0.4530


 69%|██████▉   | 271/391 [00:48<00:20,  5.74it/s]

Epoch [40/40], Step [270/391], Loss: 0.4448


 72%|███████▏  | 281/391 [00:49<00:19,  5.73it/s]

Epoch [40/40], Step [280/391], Loss: 0.4426


 74%|███████▍  | 291/391 [00:51<00:17,  5.72it/s]

Epoch [40/40], Step [290/391], Loss: 0.4536


 77%|███████▋  | 301/391 [00:53<00:15,  5.72it/s]

Epoch [40/40], Step [300/391], Loss: 0.4468


 80%|███████▉  | 311/391 [00:55<00:13,  5.73it/s]

Epoch [40/40], Step [310/391], Loss: 0.4608


 82%|████████▏ | 321/391 [00:56<00:12,  5.74it/s]

Epoch [40/40], Step [320/391], Loss: 0.4652


 85%|████████▍ | 331/391 [00:58<00:10,  5.73it/s]

Epoch [40/40], Step [330/391], Loss: 0.4541


 87%|████████▋ | 341/391 [01:00<00:08,  5.74it/s]

Epoch [40/40], Step [340/391], Loss: 0.4644


 90%|████████▉ | 351/391 [01:02<00:07,  5.66it/s]

Epoch [40/40], Step [350/391], Loss: 0.4479


 92%|█████████▏| 361/391 [01:03<00:05,  5.73it/s]

Epoch [40/40], Step [360/391], Loss: 0.4486


 95%|█████████▍| 371/391 [01:05<00:03,  5.73it/s]

Epoch [40/40], Step [370/391], Loss: 0.4616


 97%|█████████▋| 381/391 [01:07<00:01,  5.74it/s]

Epoch [40/40], Step [380/391], Loss: 0.4595


100%|██████████| 391/391 [01:09<00:00,  5.65it/s]

Epoch [40/40], Step [390/391], Loss: 0.4682





Test Accuracy of the student model on the test images: 67.97 %
Saved best model to interim_ta1.pth
Best Accuracy: 67.97 %
TA2
Adjusted learning rate: 0.1


  3%|▎         | 11/391 [00:02<01:04,  5.86it/s]

Epoch [1/40], Step [10/391], Loss: 6.7583


  5%|▌         | 21/391 [00:04<01:00,  6.14it/s]

Epoch [1/40], Step [20/391], Loss: 6.7782


  8%|▊         | 31/391 [00:05<00:58,  6.15it/s]

Epoch [1/40], Step [30/391], Loss: 6.5883


 10%|█         | 41/391 [00:07<00:56,  6.16it/s]

Epoch [1/40], Step [40/391], Loss: 6.3520


 13%|█▎        | 51/391 [00:09<00:55,  6.16it/s]

Epoch [1/40], Step [50/391], Loss: 5.7623


 16%|█▌        | 61/391 [00:10<00:53,  6.16it/s]

Epoch [1/40], Step [60/391], Loss: 5.4477


 18%|█▊        | 71/391 [00:12<00:51,  6.16it/s]

Epoch [1/40], Step [70/391], Loss: 5.1585


 21%|██        | 81/391 [00:14<00:50,  6.13it/s]

Epoch [1/40], Step [80/391], Loss: 5.0743


 23%|██▎       | 91/391 [00:15<00:48,  6.16it/s]

Epoch [1/40], Step [90/391], Loss: 4.8076


 26%|██▌       | 101/391 [00:17<00:47,  6.16it/s]

Epoch [1/40], Step [100/391], Loss: 4.8986


 28%|██▊       | 111/391 [00:18<00:45,  6.15it/s]

Epoch [1/40], Step [110/391], Loss: 4.5386


 31%|███       | 121/391 [00:20<00:43,  6.16it/s]

Epoch [1/40], Step [120/391], Loss: 4.4845


 34%|███▎      | 131/391 [00:22<00:42,  6.16it/s]

Epoch [1/40], Step [130/391], Loss: 4.4366


 36%|███▌      | 141/391 [00:23<00:40,  6.16it/s]

Epoch [1/40], Step [140/391], Loss: 4.3381


 39%|███▊      | 151/391 [00:25<00:38,  6.16it/s]

Epoch [1/40], Step [150/391], Loss: 4.3731


 41%|████      | 161/391 [00:27<00:37,  6.16it/s]

Epoch [1/40], Step [160/391], Loss: 4.1687


 44%|████▎     | 171/391 [00:28<00:35,  6.16it/s]

Epoch [1/40], Step [170/391], Loss: 4.0795


 46%|████▋     | 181/391 [00:30<00:34,  6.15it/s]

Epoch [1/40], Step [180/391], Loss: 4.0379


 49%|████▉     | 191/391 [00:31<00:32,  6.15it/s]

Epoch [1/40], Step [190/391], Loss: 3.9239


 51%|█████▏    | 201/391 [00:33<00:30,  6.15it/s]

Epoch [1/40], Step [200/391], Loss: 3.8651


 54%|█████▍    | 211/391 [00:35<00:29,  6.16it/s]

Epoch [1/40], Step [210/391], Loss: 3.8832


 57%|█████▋    | 221/391 [00:36<00:27,  6.15it/s]

Epoch [1/40], Step [220/391], Loss: 3.8522


 59%|█████▉    | 231/391 [00:38<00:26,  6.15it/s]

Epoch [1/40], Step [230/391], Loss: 3.7865


 62%|██████▏   | 241/391 [00:40<00:24,  6.17it/s]

Epoch [1/40], Step [240/391], Loss: 3.7431


 64%|██████▍   | 251/391 [00:41<00:22,  6.16it/s]

Epoch [1/40], Step [250/391], Loss: 3.6519


 67%|██████▋   | 261/391 [00:43<00:21,  6.12it/s]

Epoch [1/40], Step [260/391], Loss: 3.7931


 69%|██████▉   | 271/391 [00:44<00:19,  6.16it/s]

Epoch [1/40], Step [270/391], Loss: 3.6812


 72%|███████▏  | 281/391 [00:46<00:17,  6.17it/s]

Epoch [1/40], Step [280/391], Loss: 3.7711


 74%|███████▍  | 291/391 [00:48<00:16,  6.17it/s]

Epoch [1/40], Step [290/391], Loss: 3.7857


 77%|███████▋  | 301/391 [00:49<00:14,  6.15it/s]

Epoch [1/40], Step [300/391], Loss: 3.5932


 80%|███████▉  | 311/391 [00:51<00:12,  6.17it/s]

Epoch [1/40], Step [310/391], Loss: 3.5664


 82%|████████▏ | 321/391 [00:53<00:11,  6.15it/s]

Epoch [1/40], Step [320/391], Loss: 3.6273


 85%|████████▍ | 331/391 [00:54<00:09,  6.16it/s]

Epoch [1/40], Step [330/391], Loss: 3.5220


 87%|████████▋ | 341/391 [00:56<00:08,  6.16it/s]

Epoch [1/40], Step [340/391], Loss: 3.4639


 90%|████████▉ | 351/391 [00:57<00:06,  6.15it/s]

Epoch [1/40], Step [350/391], Loss: 3.4241


 92%|█████████▏| 361/391 [00:59<00:04,  6.15it/s]

Epoch [1/40], Step [360/391], Loss: 3.6075


 95%|█████████▍| 371/391 [01:01<00:03,  6.16it/s]

Epoch [1/40], Step [370/391], Loss: 3.4254


 97%|█████████▋| 381/391 [01:02<00:01,  6.15it/s]

Epoch [1/40], Step [380/391], Loss: 3.4502


100%|██████████| 391/391 [01:04<00:00,  6.07it/s]

Epoch [1/40], Step [390/391], Loss: 3.3282





Test Accuracy of the student model on the test images: 21.56 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.59it/s]

Epoch [2/40], Step [10/391], Loss: 4.3675


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [2/40], Step [20/391], Loss: 4.6608


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [2/40], Step [30/391], Loss: 4.4068


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [2/40], Step [40/391], Loss: 4.3030


 13%|█▎        | 51/391 [00:09<00:57,  5.86it/s]

Epoch [2/40], Step [50/391], Loss: 4.0765


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [2/40], Step [60/391], Loss: 3.8948


 18%|█▊        | 71/391 [00:13<00:54,  5.87it/s]

Epoch [2/40], Step [70/391], Loss: 3.7584


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [2/40], Step [80/391], Loss: 3.7666


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [2/40], Step [90/391], Loss: 3.6090


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [2/40], Step [100/391], Loss: 3.5862


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [2/40], Step [110/391], Loss: 3.6177


 31%|███       | 121/391 [00:21<00:46,  5.87it/s]

Epoch [2/40], Step [120/391], Loss: 3.5045


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [2/40], Step [130/391], Loss: 3.3593


 36%|███▌      | 141/391 [00:24<00:42,  5.87it/s]

Epoch [2/40], Step [140/391], Loss: 3.3221


 39%|███▊      | 151/391 [00:26<00:41,  5.85it/s]

Epoch [2/40], Step [150/391], Loss: 3.3466


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [2/40], Step [160/391], Loss: 3.4517


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [2/40], Step [170/391], Loss: 3.3187


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [2/40], Step [180/391], Loss: 3.3149


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [2/40], Step [190/391], Loss: 3.3146


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [2/40], Step [200/391], Loss: 3.2693


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [2/40], Step [210/391], Loss: 3.1931


 57%|█████▋    | 221/391 [00:38<00:29,  5.83it/s]

Epoch [2/40], Step [220/391], Loss: 3.1514


 59%|█████▉    | 231/391 [00:40<00:27,  5.85it/s]

Epoch [2/40], Step [230/391], Loss: 3.1883


 62%|██████▏   | 241/391 [00:42<00:25,  5.84it/s]

Epoch [2/40], Step [240/391], Loss: 3.0986


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [2/40], Step [250/391], Loss: 3.1079


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [2/40], Step [260/391], Loss: 3.0458


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [2/40], Step [270/391], Loss: 3.0642


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [2/40], Step [280/391], Loss: 3.0211


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [2/40], Step [290/391], Loss: 2.9949


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [2/40], Step [300/391], Loss: 3.0277


 80%|███████▉  | 311/391 [00:53<00:13,  5.85it/s]

Epoch [2/40], Step [310/391], Loss: 2.9312


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [2/40], Step [320/391], Loss: 3.0371


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [2/40], Step [330/391], Loss: 2.9720


 87%|████████▋ | 341/391 [00:59<00:08,  5.87it/s]

Epoch [2/40], Step [340/391], Loss: 3.0168


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [2/40], Step [350/391], Loss: 2.8858


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [2/40], Step [360/391], Loss: 2.8737


 95%|█████████▍| 371/391 [01:04<00:03,  5.85it/s]

Epoch [2/40], Step [370/391], Loss: 2.9182


 97%|█████████▋| 381/391 [01:05<00:01,  5.87it/s]

Epoch [2/40], Step [380/391], Loss: 2.7627


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [2/40], Step [390/391], Loss: 2.8732





Test Accuracy of the student model on the test images: 28.05 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.57it/s]

Epoch [3/40], Step [10/391], Loss: 2.8294


  5%|▌         | 21/391 [00:04<01:03,  5.83it/s]

Epoch [3/40], Step [20/391], Loss: 2.7942


  8%|▊         | 31/391 [00:06<01:01,  5.82it/s]

Epoch [3/40], Step [30/391], Loss: 2.7800


 10%|█         | 41/391 [00:08<00:59,  5.86it/s]

Epoch [3/40], Step [40/391], Loss: 2.7883


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [3/40], Step [50/391], Loss: 2.6128


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [3/40], Step [60/391], Loss: 2.6673


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [3/40], Step [70/391], Loss: 2.6840


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [3/40], Step [80/391], Loss: 2.6433


 23%|██▎       | 91/391 [00:16<00:51,  5.84it/s]

Epoch [3/40], Step [90/391], Loss: 2.6025


 26%|██▌       | 101/391 [00:18<00:49,  5.87it/s]

Epoch [3/40], Step [100/391], Loss: 2.7837


 28%|██▊       | 111/391 [00:19<00:47,  5.87it/s]

Epoch [3/40], Step [110/391], Loss: 2.7576


 31%|███       | 121/391 [00:21<00:46,  5.84it/s]

Epoch [3/40], Step [120/391], Loss: 2.6786


 34%|███▎      | 131/391 [00:23<00:44,  5.84it/s]

Epoch [3/40], Step [130/391], Loss: 2.6578


 36%|███▌      | 141/391 [00:25<00:42,  5.85it/s]

Epoch [3/40], Step [140/391], Loss: 2.5797


 39%|███▊      | 151/391 [00:26<00:41,  5.84it/s]

Epoch [3/40], Step [150/391], Loss: 2.6119


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [3/40], Step [160/391], Loss: 2.5760


 44%|████▎     | 171/391 [00:30<00:37,  5.82it/s]

Epoch [3/40], Step [170/391], Loss: 2.6585


 46%|████▋     | 181/391 [00:31<00:35,  5.85it/s]

Epoch [3/40], Step [180/391], Loss: 2.5907


 49%|████▉     | 191/391 [00:33<00:34,  5.83it/s]

Epoch [3/40], Step [190/391], Loss: 2.5912


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [3/40], Step [200/391], Loss: 2.5366


 54%|█████▍    | 211/391 [00:37<00:30,  5.85it/s]

Epoch [3/40], Step [210/391], Loss: 2.5680


 57%|█████▋    | 221/391 [00:38<00:29,  5.83it/s]

Epoch [3/40], Step [220/391], Loss: 2.6155


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [3/40], Step [230/391], Loss: 2.4498


 62%|██████▏   | 241/391 [00:42<00:25,  5.85it/s]

Epoch [3/40], Step [240/391], Loss: 2.4098


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [3/40], Step [250/391], Loss: 2.3931


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [3/40], Step [260/391], Loss: 2.4529


 69%|██████▉   | 271/391 [00:47<00:20,  5.85it/s]

Epoch [3/40], Step [270/391], Loss: 2.4993


 72%|███████▏  | 281/391 [00:48<00:18,  5.84it/s]

Epoch [3/40], Step [280/391], Loss: 2.4143


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [3/40], Step [290/391], Loss: 2.4565


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [3/40], Step [300/391], Loss: 2.4329


 80%|███████▉  | 311/391 [00:54<00:13,  5.87it/s]

Epoch [3/40], Step [310/391], Loss: 2.4824


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [3/40], Step [320/391], Loss: 2.3749


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [3/40], Step [330/391], Loss: 2.4424


 87%|████████▋ | 341/391 [00:59<00:08,  5.87it/s]

Epoch [3/40], Step [340/391], Loss: 2.4345


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [3/40], Step [350/391], Loss: 2.3830


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [3/40], Step [360/391], Loss: 2.3959


 95%|█████████▍| 371/391 [01:04<00:03,  5.84it/s]

Epoch [3/40], Step [370/391], Loss: 2.3983


 97%|█████████▋| 381/391 [01:06<00:01,  5.85it/s]

Epoch [3/40], Step [380/391], Loss: 2.3523


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [3/40], Step [390/391], Loss: 2.4153





Test Accuracy of the student model on the test images: 34.17 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [4/40], Step [10/391], Loss: 2.2788


  5%|▌         | 21/391 [00:04<01:03,  5.84it/s]

Epoch [4/40], Step [20/391], Loss: 2.3364


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [4/40], Step [30/391], Loss: 2.3404


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [4/40], Step [40/391], Loss: 2.2380


 13%|█▎        | 51/391 [00:09<00:57,  5.86it/s]

Epoch [4/40], Step [50/391], Loss: 2.2505


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [4/40], Step [60/391], Loss: 2.2696


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [4/40], Step [70/391], Loss: 2.2789


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [4/40], Step [80/391], Loss: 2.3089


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [4/40], Step [90/391], Loss: 2.1826


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [4/40], Step [100/391], Loss: 2.2192


 28%|██▊       | 111/391 [00:19<00:47,  5.87it/s]

Epoch [4/40], Step [110/391], Loss: 2.2762


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [4/40], Step [120/391], Loss: 2.3109


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [4/40], Step [130/391], Loss: 2.2866


 36%|███▌      | 141/391 [00:24<00:42,  5.84it/s]

Epoch [4/40], Step [140/391], Loss: 2.3324


 39%|███▊      | 151/391 [00:26<00:41,  5.83it/s]

Epoch [4/40], Step [150/391], Loss: 2.2518


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [4/40], Step [160/391], Loss: 2.2140


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [4/40], Step [170/391], Loss: 2.1961


 46%|████▋     | 181/391 [00:31<00:35,  5.85it/s]

Epoch [4/40], Step [180/391], Loss: 2.2362


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [4/40], Step [190/391], Loss: 2.1897


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [4/40], Step [200/391], Loss: 2.2068


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [4/40], Step [210/391], Loss: 2.2396


 57%|█████▋    | 221/391 [00:38<00:29,  5.85it/s]

Epoch [4/40], Step [220/391], Loss: 2.2758


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [4/40], Step [230/391], Loss: 2.1980


 62%|██████▏   | 241/391 [00:42<00:25,  5.84it/s]

Epoch [4/40], Step [240/391], Loss: 2.2120


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [4/40], Step [250/391], Loss: 2.1602


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [4/40], Step [260/391], Loss: 2.1766


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [4/40], Step [270/391], Loss: 2.1918


 72%|███████▏  | 281/391 [00:48<00:18,  5.84it/s]

Epoch [4/40], Step [280/391], Loss: 2.1648


 74%|███████▍  | 291/391 [00:50<00:17,  5.84it/s]

Epoch [4/40], Step [290/391], Loss: 2.2046


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [4/40], Step [300/391], Loss: 2.1921


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [4/40], Step [310/391], Loss: 2.1813


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [4/40], Step [320/391], Loss: 2.1260


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [4/40], Step [330/391], Loss: 2.1149


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [4/40], Step [340/391], Loss: 2.1466


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [4/40], Step [350/391], Loss: 2.0780


 92%|█████████▏| 361/391 [01:02<00:05,  5.85it/s]

Epoch [4/40], Step [360/391], Loss: 2.0680


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [4/40], Step [370/391], Loss: 2.1228


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [4/40], Step [380/391], Loss: 2.1429


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [4/40], Step [390/391], Loss: 2.2021





Test Accuracy of the student model on the test images: 36.99 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [5/40], Step [10/391], Loss: 2.0837


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [5/40], Step [20/391], Loss: 2.0849


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [5/40], Step [30/391], Loss: 2.0594


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [5/40], Step [40/391], Loss: 2.0903


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [5/40], Step [50/391], Loss: 2.0384


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [5/40], Step [60/391], Loss: 2.0571


 18%|█▊        | 71/391 [00:12<00:54,  5.83it/s]

Epoch [5/40], Step [70/391], Loss: 2.0582


 21%|██        | 81/391 [00:14<00:53,  5.83it/s]

Epoch [5/40], Step [80/391], Loss: 2.0425


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [5/40], Step [90/391], Loss: 2.0163


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [5/40], Step [100/391], Loss: 1.9283


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [5/40], Step [110/391], Loss: 2.0103


 31%|███       | 121/391 [00:21<00:46,  5.84it/s]

Epoch [5/40], Step [120/391], Loss: 1.9532


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [5/40], Step [130/391], Loss: 1.9949


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [5/40], Step [140/391], Loss: 1.9989


 39%|███▊      | 151/391 [00:26<00:40,  5.87it/s]

Epoch [5/40], Step [150/391], Loss: 1.9744


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [5/40], Step [160/391], Loss: 2.0114


 44%|████▎     | 171/391 [00:29<00:37,  5.85it/s]

Epoch [5/40], Step [170/391], Loss: 1.9736


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [5/40], Step [180/391], Loss: 1.9719


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [5/40], Step [190/391], Loss: 2.0015


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [5/40], Step [200/391], Loss: 2.0349


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [5/40], Step [210/391], Loss: 2.0426


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [5/40], Step [220/391], Loss: 1.9470


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [5/40], Step [230/391], Loss: 2.0133


 62%|██████▏   | 241/391 [00:41<00:25,  5.85it/s]

Epoch [5/40], Step [240/391], Loss: 2.0131


 64%|██████▍   | 251/391 [00:43<00:23,  5.84it/s]

Epoch [5/40], Step [250/391], Loss: 1.9497


 67%|██████▋   | 261/391 [00:45<00:22,  5.84it/s]

Epoch [5/40], Step [260/391], Loss: 2.0461


 69%|██████▉   | 271/391 [00:47<00:20,  5.85it/s]

Epoch [5/40], Step [270/391], Loss: 2.0309


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [5/40], Step [280/391], Loss: 1.9669


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [5/40], Step [290/391], Loss: 1.9211


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [5/40], Step [300/391], Loss: 2.0239


 80%|███████▉  | 311/391 [00:53<00:13,  5.85it/s]

Epoch [5/40], Step [310/391], Loss: 1.9834


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [5/40], Step [320/391], Loss: 2.0063


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [5/40], Step [330/391], Loss: 1.9912


 87%|████████▋ | 341/391 [00:58<00:08,  5.86it/s]

Epoch [5/40], Step [340/391], Loss: 1.9253


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [5/40], Step [350/391], Loss: 1.9216


 92%|█████████▏| 361/391 [01:02<00:05,  5.87it/s]

Epoch [5/40], Step [360/391], Loss: 1.9668


 95%|█████████▍| 371/391 [01:04<00:03,  5.85it/s]

Epoch [5/40], Step [370/391], Loss: 1.9766


 97%|█████████▋| 381/391 [01:05<00:01,  5.87it/s]

Epoch [5/40], Step [380/391], Loss: 1.9335


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [5/40], Step [390/391], Loss: 1.8949





Test Accuracy of the student model on the test images: 39.65 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.57it/s]

Epoch [6/40], Step [10/391], Loss: 1.8624


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [6/40], Step [20/391], Loss: 1.8582


  8%|▊         | 31/391 [00:06<01:01,  5.84it/s]

Epoch [6/40], Step [30/391], Loss: 1.8190


 10%|█         | 41/391 [00:08<00:59,  5.85it/s]

Epoch [6/40], Step [40/391], Loss: 1.8043


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [6/40], Step [50/391], Loss: 1.7969


 16%|█▌        | 61/391 [00:11<00:56,  5.84it/s]

Epoch [6/40], Step [60/391], Loss: 1.7918


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [6/40], Step [70/391], Loss: 1.8660


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [6/40], Step [80/391], Loss: 1.8750


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [6/40], Step [90/391], Loss: 1.8874


 26%|██▌       | 101/391 [00:18<00:49,  5.87it/s]

Epoch [6/40], Step [100/391], Loss: 1.8655


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [6/40], Step [110/391], Loss: 1.8543


 31%|███       | 121/391 [00:21<00:46,  5.85it/s]

Epoch [6/40], Step [120/391], Loss: 1.8271


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [6/40], Step [130/391], Loss: 1.9003


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [6/40], Step [140/391], Loss: 1.8795


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [6/40], Step [150/391], Loss: 1.8610


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [6/40], Step [160/391], Loss: 1.8788


 44%|████▎     | 171/391 [00:30<00:37,  5.85it/s]

Epoch [6/40], Step [170/391], Loss: 1.9055


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [6/40], Step [180/391], Loss: 1.9060


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [6/40], Step [190/391], Loss: 1.8351


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [6/40], Step [200/391], Loss: 1.8771


 54%|█████▍    | 211/391 [00:37<00:30,  5.85it/s]

Epoch [6/40], Step [210/391], Loss: 1.8884


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [6/40], Step [220/391], Loss: 1.8621


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [6/40], Step [230/391], Loss: 1.8304


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [6/40], Step [240/391], Loss: 1.8694


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [6/40], Step [250/391], Loss: 1.8461


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [6/40], Step [260/391], Loss: 1.8776


 69%|██████▉   | 271/391 [00:47<00:20,  5.87it/s]

Epoch [6/40], Step [270/391], Loss: 1.9343


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [6/40], Step [280/391], Loss: 1.9042


 74%|███████▍  | 291/391 [00:50<00:17,  5.84it/s]

Epoch [6/40], Step [290/391], Loss: 1.8752


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [6/40], Step [300/391], Loss: 1.8449


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [6/40], Step [310/391], Loss: 1.8237


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [6/40], Step [320/391], Loss: 1.8247


 85%|████████▍ | 331/391 [00:57<00:10,  5.80it/s]

Epoch [6/40], Step [330/391], Loss: 1.8573


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [6/40], Step [340/391], Loss: 1.8201


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [6/40], Step [350/391], Loss: 1.8502


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [6/40], Step [360/391], Loss: 1.8466


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [6/40], Step [370/391], Loss: 1.8544


 97%|█████████▋| 381/391 [01:06<00:01,  5.86it/s]

Epoch [6/40], Step [380/391], Loss: 1.8376


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [6/40], Step [390/391], Loss: 1.8578





Test Accuracy of the student model on the test images: 40.27 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.63it/s]

Epoch [7/40], Step [10/391], Loss: 1.8339


  5%|▌         | 21/391 [00:04<01:03,  5.84it/s]

Epoch [7/40], Step [20/391], Loss: 1.7494


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [7/40], Step [30/391], Loss: 1.7445


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [7/40], Step [40/391], Loss: 1.7058


 13%|█▎        | 51/391 [00:09<00:57,  5.86it/s]

Epoch [7/40], Step [50/391], Loss: 1.7418


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [7/40], Step [60/391], Loss: 1.7205


 18%|█▊        | 71/391 [00:12<00:54,  5.85it/s]

Epoch [7/40], Step [70/391], Loss: 1.7019


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [7/40], Step [80/391], Loss: 1.7081


 23%|██▎       | 91/391 [00:16<00:51,  5.83it/s]

Epoch [7/40], Step [90/391], Loss: 1.7546


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [7/40], Step [100/391], Loss: 1.7185


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [7/40], Step [110/391], Loss: 1.7425


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [7/40], Step [120/391], Loss: 1.7006


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [7/40], Step [130/391], Loss: 1.7382


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [7/40], Step [140/391], Loss: 1.7498


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [7/40], Step [150/391], Loss: 1.7243


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [7/40], Step [160/391], Loss: 1.6578


 44%|████▎     | 171/391 [00:29<00:37,  5.86it/s]

Epoch [7/40], Step [170/391], Loss: 1.6945


 46%|████▋     | 181/391 [00:31<00:35,  5.85it/s]

Epoch [7/40], Step [180/391], Loss: 1.6983


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [7/40], Step [190/391], Loss: 1.7336


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [7/40], Step [200/391], Loss: 1.7464


 54%|█████▍    | 211/391 [00:36<00:30,  5.84it/s]

Epoch [7/40], Step [210/391], Loss: 1.7416


 57%|█████▋    | 221/391 [00:38<00:29,  5.85it/s]

Epoch [7/40], Step [220/391], Loss: 1.7344


 59%|█████▉    | 231/391 [00:40<00:27,  5.85it/s]

Epoch [7/40], Step [230/391], Loss: 1.7942


 62%|██████▏   | 241/391 [00:41<00:25,  5.85it/s]

Epoch [7/40], Step [240/391], Loss: 1.7878


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [7/40], Step [250/391], Loss: 1.7537


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [7/40], Step [260/391], Loss: 1.7327


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [7/40], Step [270/391], Loss: 1.7381


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [7/40], Step [280/391], Loss: 1.7142


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [7/40], Step [290/391], Loss: 1.7231


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [7/40], Step [300/391], Loss: 1.7614


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [7/40], Step [310/391], Loss: 1.8066


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [7/40], Step [320/391], Loss: 1.7439


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [7/40], Step [330/391], Loss: 1.7126


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [7/40], Step [340/391], Loss: 1.7187


 90%|████████▉ | 351/391 [01:00<00:06,  5.87it/s]

Epoch [7/40], Step [350/391], Loss: 1.7545


 92%|█████████▏| 361/391 [01:02<00:05,  5.87it/s]

Epoch [7/40], Step [360/391], Loss: 1.6985


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [7/40], Step [370/391], Loss: 1.7785


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [7/40], Step [380/391], Loss: 1.7978


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [7/40], Step [390/391], Loss: 1.7872





Test Accuracy of the student model on the test images: 41.29 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.57it/s]

Epoch [8/40], Step [10/391], Loss: 1.6910


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [8/40], Step [20/391], Loss: 1.6784


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [8/40], Step [30/391], Loss: 1.7020


 10%|█         | 41/391 [00:08<00:59,  5.85it/s]

Epoch [8/40], Step [40/391], Loss: 1.6465


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [8/40], Step [50/391], Loss: 1.6585


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [8/40], Step [60/391], Loss: 1.6401


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [8/40], Step [70/391], Loss: 1.6486


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [8/40], Step [80/391], Loss: 1.6660


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [8/40], Step [90/391], Loss: 1.7041


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [8/40], Step [100/391], Loss: 1.6870


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [8/40], Step [110/391], Loss: 1.6301


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [8/40], Step [120/391], Loss: 1.6631


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [8/40], Step [130/391], Loss: 1.7314


 36%|███▌      | 141/391 [00:25<00:42,  5.87it/s]

Epoch [8/40], Step [140/391], Loss: 1.7131


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [8/40], Step [150/391], Loss: 1.6807


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [8/40], Step [160/391], Loss: 1.6836


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [8/40], Step [170/391], Loss: 1.6872


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [8/40], Step [180/391], Loss: 1.7636


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [8/40], Step [190/391], Loss: 1.6981


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [8/40], Step [200/391], Loss: 1.6808


 54%|█████▍    | 211/391 [00:37<00:30,  5.85it/s]

Epoch [8/40], Step [210/391], Loss: 1.6815


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [8/40], Step [220/391], Loss: 1.6705


 59%|█████▉    | 231/391 [00:40<00:27,  5.87it/s]

Epoch [8/40], Step [230/391], Loss: 1.6191


 62%|██████▏   | 241/391 [00:42<00:25,  5.84it/s]

Epoch [8/40], Step [240/391], Loss: 1.6365


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [8/40], Step [250/391], Loss: 1.6683


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [8/40], Step [260/391], Loss: 1.6979


 69%|██████▉   | 271/391 [00:47<00:20,  5.85it/s]

Epoch [8/40], Step [270/391], Loss: 1.6854


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [8/40], Step [280/391], Loss: 1.6723


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [8/40], Step [290/391], Loss: 1.6334


 77%|███████▋  | 301/391 [00:52<00:15,  5.84it/s]

Epoch [8/40], Step [300/391], Loss: 1.6414


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [8/40], Step [310/391], Loss: 1.5992


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [8/40], Step [320/391], Loss: 1.6252


 85%|████████▍ | 331/391 [00:57<00:10,  5.85it/s]

Epoch [8/40], Step [330/391], Loss: 1.6868


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [8/40], Step [340/391], Loss: 1.7188


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [8/40], Step [350/391], Loss: 1.7083


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [8/40], Step [360/391], Loss: 1.7317


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [8/40], Step [370/391], Loss: 1.6538


 97%|█████████▋| 381/391 [01:06<00:01,  5.86it/s]

Epoch [8/40], Step [380/391], Loss: 1.6394


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [8/40], Step [390/391], Loss: 1.6029





Test Accuracy of the student model on the test images: 44.75 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [9/40], Step [10/391], Loss: 1.5942


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [9/40], Step [20/391], Loss: 1.5817


  8%|▊         | 31/391 [00:06<01:01,  5.84it/s]

Epoch [9/40], Step [30/391], Loss: 1.5787


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [9/40], Step [40/391], Loss: 1.5429


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [9/40], Step [50/391], Loss: 1.5695


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [9/40], Step [60/391], Loss: 1.5880


 18%|█▊        | 71/391 [00:13<00:54,  5.87it/s]

Epoch [9/40], Step [70/391], Loss: 1.5747


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [9/40], Step [80/391], Loss: 1.5843


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [9/40], Step [90/391], Loss: 1.6120


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [9/40], Step [100/391], Loss: 1.5556


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [9/40], Step [110/391], Loss: 1.6425


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [9/40], Step [120/391], Loss: 1.5359


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [9/40], Step [130/391], Loss: 1.6020


 36%|███▌      | 141/391 [00:25<00:42,  5.84it/s]

Epoch [9/40], Step [140/391], Loss: 1.5779


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [9/40], Step [150/391], Loss: 1.5841


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [9/40], Step [160/391], Loss: 1.5585


 44%|████▎     | 171/391 [00:30<00:37,  5.85it/s]

Epoch [9/40], Step [170/391], Loss: 1.6214


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [9/40], Step [180/391], Loss: 1.5878


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [9/40], Step [190/391], Loss: 1.5744


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [9/40], Step [200/391], Loss: 1.6159


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [9/40], Step [210/391], Loss: 1.6168


 57%|█████▋    | 221/391 [00:38<00:29,  5.85it/s]

Epoch [9/40], Step [220/391], Loss: 1.5952


 59%|█████▉    | 231/391 [00:40<00:27,  5.85it/s]

Epoch [9/40], Step [230/391], Loss: 1.5932


 62%|██████▏   | 241/391 [00:42<00:25,  5.85it/s]

Epoch [9/40], Step [240/391], Loss: 1.5809


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [9/40], Step [250/391], Loss: 1.5858


 67%|██████▋   | 261/391 [00:45<00:22,  5.87it/s]

Epoch [9/40], Step [260/391], Loss: 1.5559


 69%|██████▉   | 271/391 [00:47<00:20,  5.84it/s]

Epoch [9/40], Step [270/391], Loss: 1.5842


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [9/40], Step [280/391], Loss: 1.5488


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [9/40], Step [290/391], Loss: 1.6133


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [9/40], Step [300/391], Loss: 1.6100


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [9/40], Step [310/391], Loss: 1.6091


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [9/40], Step [320/391], Loss: 1.5848


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [9/40], Step [330/391], Loss: 1.5871


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [9/40], Step [340/391], Loss: 1.5883


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [9/40], Step [350/391], Loss: 1.5990


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [9/40], Step [360/391], Loss: 1.6003


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [9/40], Step [370/391], Loss: 1.6114


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [9/40], Step [380/391], Loss: 1.6290


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [9/40], Step [390/391], Loss: 1.5904





Test Accuracy of the student model on the test images: 43.78 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.56it/s]

Epoch [10/40], Step [10/391], Loss: 1.5717


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [10/40], Step [20/391], Loss: 1.5131


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [10/40], Step [30/391], Loss: 1.5295


 10%|█         | 41/391 [00:07<00:59,  5.84it/s]

Epoch [10/40], Step [40/391], Loss: 1.5027


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [10/40], Step [50/391], Loss: 1.4939


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [10/40], Step [60/391], Loss: 1.5030


 18%|█▊        | 71/391 [00:13<00:54,  5.85it/s]

Epoch [10/40], Step [70/391], Loss: 1.5621


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [10/40], Step [80/391], Loss: 1.5474


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [10/40], Step [90/391], Loss: 1.5124


 26%|██▌       | 101/391 [00:18<00:49,  5.87it/s]

Epoch [10/40], Step [100/391], Loss: 1.5040


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [10/40], Step [110/391], Loss: 1.5167


 31%|███       | 121/391 [00:21<00:45,  5.87it/s]

Epoch [10/40], Step [120/391], Loss: 1.5579


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [10/40], Step [130/391], Loss: 1.5462


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [10/40], Step [140/391], Loss: 1.5546


 39%|███▊      | 151/391 [00:26<00:41,  5.84it/s]

Epoch [10/40], Step [150/391], Loss: 1.5469


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [10/40], Step [160/391], Loss: 1.5538


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [10/40], Step [170/391], Loss: 1.4836


 46%|████▋     | 181/391 [00:31<00:35,  5.84it/s]

Epoch [10/40], Step [180/391], Loss: 1.5092


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [10/40], Step [190/391], Loss: 1.4800


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [10/40], Step [200/391], Loss: 1.5310


 54%|█████▍    | 211/391 [00:37<00:30,  5.86it/s]

Epoch [10/40], Step [210/391], Loss: 1.4961


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [10/40], Step [220/391], Loss: 1.5444


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [10/40], Step [230/391], Loss: 1.5047


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [10/40], Step [240/391], Loss: 1.5697


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [10/40], Step [250/391], Loss: 1.5794


 67%|██████▋   | 261/391 [00:45<00:22,  5.87it/s]

Epoch [10/40], Step [260/391], Loss: 1.5329


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [10/40], Step [270/391], Loss: 1.5328


 72%|███████▏  | 281/391 [00:48<00:18,  5.81it/s]

Epoch [10/40], Step [280/391], Loss: 1.5612


 74%|███████▍  | 291/391 [00:50<00:17,  5.84it/s]

Epoch [10/40], Step [290/391], Loss: 1.5544


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [10/40], Step [300/391], Loss: 1.5342


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [10/40], Step [310/391], Loss: 1.6038


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [10/40], Step [320/391], Loss: 1.6120


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [10/40], Step [330/391], Loss: 1.5751


 87%|████████▋ | 341/391 [00:59<00:08,  5.85it/s]

Epoch [10/40], Step [340/391], Loss: 1.5484


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [10/40], Step [350/391], Loss: 1.5550


 92%|█████████▏| 361/391 [01:02<00:05,  5.84it/s]

Epoch [10/40], Step [360/391], Loss: 1.5924


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [10/40], Step [370/391], Loss: 1.5758


 97%|█████████▋| 381/391 [01:06<00:01,  5.87it/s]

Epoch [10/40], Step [380/391], Loss: 1.5707


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [10/40], Step [390/391], Loss: 1.5828





Test Accuracy of the student model on the test images: 41.45 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.56it/s]

Epoch [11/40], Step [10/391], Loss: 1.5200


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [11/40], Step [20/391], Loss: 1.4716


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [11/40], Step [30/391], Loss: 1.4756


 10%|█         | 41/391 [00:08<00:59,  5.86it/s]

Epoch [11/40], Step [40/391], Loss: 1.4844


 13%|█▎        | 51/391 [00:09<00:57,  5.87it/s]

Epoch [11/40], Step [50/391], Loss: 1.4954


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [11/40], Step [60/391], Loss: 1.4765


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [11/40], Step [70/391], Loss: 1.4410


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [11/40], Step [80/391], Loss: 1.4739


 23%|██▎       | 91/391 [00:16<00:51,  5.87it/s]

Epoch [11/40], Step [90/391], Loss: 1.4322


 26%|██▌       | 101/391 [00:18<00:49,  5.87it/s]

Epoch [11/40], Step [100/391], Loss: 1.4608


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [11/40], Step [110/391], Loss: 1.4782


 31%|███       | 121/391 [00:21<00:46,  5.85it/s]

Epoch [11/40], Step [120/391], Loss: 1.4938


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [11/40], Step [130/391], Loss: 1.5087


 36%|███▌      | 141/391 [00:25<00:42,  5.84it/s]

Epoch [11/40], Step [140/391], Loss: 1.5181


 39%|███▊      | 151/391 [00:26<00:40,  5.87it/s]

Epoch [11/40], Step [150/391], Loss: 1.5038


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [11/40], Step [160/391], Loss: 1.4902


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [11/40], Step [170/391], Loss: 1.4765


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [11/40], Step [180/391], Loss: 1.4801


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [11/40], Step [190/391], Loss: 1.4311


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [11/40], Step [200/391], Loss: 1.5039


 54%|█████▍    | 211/391 [00:37<00:30,  5.86it/s]

Epoch [11/40], Step [210/391], Loss: 1.5062


 57%|█████▋    | 221/391 [00:38<00:29,  5.85it/s]

Epoch [11/40], Step [220/391], Loss: 1.4870


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [11/40], Step [230/391], Loss: 1.4752


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [11/40], Step [240/391], Loss: 1.5223


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [11/40], Step [250/391], Loss: 1.5028


 67%|██████▋   | 261/391 [00:45<00:22,  5.84it/s]

Epoch [11/40], Step [260/391], Loss: 1.4985


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [11/40], Step [270/391], Loss: 1.4788


 72%|███████▏  | 281/391 [00:49<00:18,  5.86it/s]

Epoch [11/40], Step [280/391], Loss: 1.5013


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [11/40], Step [290/391], Loss: 1.5270


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [11/40], Step [300/391], Loss: 1.5124


 80%|███████▉  | 311/391 [00:54<00:13,  5.87it/s]

Epoch [11/40], Step [310/391], Loss: 1.4602


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [11/40], Step [320/391], Loss: 1.5405


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [11/40], Step [330/391], Loss: 1.5607


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [11/40], Step [340/391], Loss: 1.5722


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [11/40], Step [350/391], Loss: 1.5637


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [11/40], Step [360/391], Loss: 1.5172


 95%|█████████▍| 371/391 [01:04<00:03,  5.85it/s]

Epoch [11/40], Step [370/391], Loss: 1.5510


 97%|█████████▋| 381/391 [01:06<00:01,  5.87it/s]

Epoch [11/40], Step [380/391], Loss: 1.5164


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [11/40], Step [390/391], Loss: 1.4934





Test Accuracy of the student model on the test images: 44.37 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.57it/s]

Epoch [12/40], Step [10/391], Loss: 1.4665


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [12/40], Step [20/391], Loss: 1.3809


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [12/40], Step [30/391], Loss: 1.3702


 10%|█         | 41/391 [00:08<00:59,  5.85it/s]

Epoch [12/40], Step [40/391], Loss: 1.4139


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [12/40], Step [50/391], Loss: 1.4134


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [12/40], Step [60/391], Loss: 1.4325


 18%|█▊        | 71/391 [00:13<00:54,  5.85it/s]

Epoch [12/40], Step [70/391], Loss: 1.4285


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [12/40], Step [80/391], Loss: 1.4062


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [12/40], Step [90/391], Loss: 1.3986


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [12/40], Step [100/391], Loss: 1.4322


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [12/40], Step [110/391], Loss: 1.4582


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [12/40], Step [120/391], Loss: 1.4468


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [12/40], Step [130/391], Loss: 1.4417


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [12/40], Step [140/391], Loss: 1.4425


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [12/40], Step [150/391], Loss: 1.4365


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [12/40], Step [160/391], Loss: 1.4622


 44%|████▎     | 171/391 [00:30<00:37,  5.85it/s]

Epoch [12/40], Step [170/391], Loss: 1.4856


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [12/40], Step [180/391], Loss: 1.4427


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [12/40], Step [190/391], Loss: 1.4525


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [12/40], Step [200/391], Loss: 1.4316


 54%|█████▍    | 211/391 [00:37<00:30,  5.86it/s]

Epoch [12/40], Step [210/391], Loss: 1.4709


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [12/40], Step [220/391], Loss: 1.4698


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [12/40], Step [230/391], Loss: 1.4807


 62%|██████▏   | 241/391 [00:42<00:25,  5.85it/s]

Epoch [12/40], Step [240/391], Loss: 1.4714


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [12/40], Step [250/391], Loss: 1.4842


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [12/40], Step [260/391], Loss: 1.4947


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [12/40], Step [270/391], Loss: 1.5127


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [12/40], Step [280/391], Loss: 1.5238


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [12/40], Step [290/391], Loss: 1.5137


 77%|███████▋  | 301/391 [00:52<00:15,  5.79it/s]

Epoch [12/40], Step [300/391], Loss: 1.4773


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [12/40], Step [310/391], Loss: 1.4975


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [12/40], Step [320/391], Loss: 1.5023


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [12/40], Step [330/391], Loss: 1.4835


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [12/40], Step [340/391], Loss: 1.4647


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [12/40], Step [350/391], Loss: 1.4564


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [12/40], Step [360/391], Loss: 1.4821


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [12/40], Step [370/391], Loss: 1.4934


 97%|█████████▋| 381/391 [01:06<00:01,  5.86it/s]

Epoch [12/40], Step [380/391], Loss: 1.4698


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [12/40], Step [390/391], Loss: 1.4517





Test Accuracy of the student model on the test images: 49.18 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [13/40], Step [10/391], Loss: 1.4189


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [13/40], Step [20/391], Loss: 1.3877


  8%|▊         | 31/391 [00:06<01:01,  5.87it/s]

Epoch [13/40], Step [30/391], Loss: 1.3823


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [13/40], Step [40/391], Loss: 1.3824


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [13/40], Step [50/391], Loss: 1.3892


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [13/40], Step [60/391], Loss: 1.3915


 18%|█▊        | 71/391 [00:13<00:54,  5.84it/s]

Epoch [13/40], Step [70/391], Loss: 1.4147


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [13/40], Step [80/391], Loss: 1.4088


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [13/40], Step [90/391], Loss: 1.3703


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [13/40], Step [100/391], Loss: 1.3735


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [13/40], Step [110/391], Loss: 1.4205


 31%|███       | 121/391 [00:21<00:46,  5.84it/s]

Epoch [13/40], Step [120/391], Loss: 1.3955


 34%|███▎      | 131/391 [00:23<00:44,  5.84it/s]

Epoch [13/40], Step [130/391], Loss: 1.4223


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [13/40], Step [140/391], Loss: 1.4483


 39%|███▊      | 151/391 [00:26<00:41,  5.83it/s]

Epoch [13/40], Step [150/391], Loss: 1.4321


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [13/40], Step [160/391], Loss: 1.4096


 44%|████▎     | 171/391 [00:30<00:37,  5.85it/s]

Epoch [13/40], Step [170/391], Loss: 1.4092


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [13/40], Step [180/391], Loss: 1.4277


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [13/40], Step [190/391], Loss: 1.4132


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [13/40], Step [200/391], Loss: 1.3542


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [13/40], Step [210/391], Loss: 1.3677


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [13/40], Step [220/391], Loss: 1.4161


 59%|█████▉    | 231/391 [00:40<00:28,  5.53it/s]

Epoch [13/40], Step [230/391], Loss: 1.3586


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [13/40], Step [240/391], Loss: 1.4218


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [13/40], Step [250/391], Loss: 1.3982


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [13/40], Step [260/391], Loss: 1.3787


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [13/40], Step [270/391], Loss: 1.3809


 72%|███████▏  | 281/391 [00:48<00:18,  5.87it/s]

Epoch [13/40], Step [280/391], Loss: 1.4005


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [13/40], Step [290/391], Loss: 1.4254


 77%|███████▋  | 301/391 [00:52<00:15,  5.84it/s]

Epoch [13/40], Step [300/391], Loss: 1.4425


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [13/40], Step [310/391], Loss: 1.4004


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [13/40], Step [320/391], Loss: 1.4019


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [13/40], Step [330/391], Loss: 1.4118


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [13/40], Step [340/391], Loss: 1.4411


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [13/40], Step [350/391], Loss: 1.4274


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [13/40], Step [360/391], Loss: 1.4223


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [13/40], Step [370/391], Loss: 1.4832


 97%|█████████▋| 381/391 [01:06<00:01,  5.85it/s]

Epoch [13/40], Step [380/391], Loss: 1.4731


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [13/40], Step [390/391], Loss: 1.4532





Test Accuracy of the student model on the test images: 45.74 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.57it/s]

Epoch [14/40], Step [10/391], Loss: 1.3683


  5%|▌         | 21/391 [00:04<01:03,  5.84it/s]

Epoch [14/40], Step [20/391], Loss: 1.3345


  8%|▊         | 31/391 [00:06<01:01,  5.87it/s]

Epoch [14/40], Step [30/391], Loss: 1.3316


 10%|█         | 41/391 [00:07<00:59,  5.87it/s]

Epoch [14/40], Step [40/391], Loss: 1.3514


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [14/40], Step [50/391], Loss: 1.3415


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [14/40], Step [60/391], Loss: 1.3383


 18%|█▊        | 71/391 [00:13<00:55,  5.81it/s]

Epoch [14/40], Step [70/391], Loss: 1.3510


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [14/40], Step [80/391], Loss: 1.3759


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [14/40], Step [90/391], Loss: 1.3283


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [14/40], Step [100/391], Loss: 1.3340


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [14/40], Step [110/391], Loss: 1.3647


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [14/40], Step [120/391], Loss: 1.3152


 34%|███▎      | 131/391 [00:23<00:44,  5.83it/s]

Epoch [14/40], Step [130/391], Loss: 1.3144


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [14/40], Step [140/391], Loss: 1.3736


 39%|███▊      | 151/391 [00:26<00:41,  5.84it/s]

Epoch [14/40], Step [150/391], Loss: 1.3597


 41%|████      | 161/391 [00:28<00:39,  5.87it/s]

Epoch [14/40], Step [160/391], Loss: 1.3617


 44%|████▎     | 171/391 [00:30<00:37,  5.84it/s]

Epoch [14/40], Step [170/391], Loss: 1.3554


 46%|████▋     | 181/391 [00:31<00:35,  5.85it/s]

Epoch [14/40], Step [180/391], Loss: 1.3502


 49%|████▉     | 191/391 [00:33<00:34,  5.84it/s]

Epoch [14/40], Step [190/391], Loss: 1.3890


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [14/40], Step [200/391], Loss: 1.3572


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [14/40], Step [210/391], Loss: 1.3666


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [14/40], Step [220/391], Loss: 1.3651


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [14/40], Step [230/391], Loss: 1.3333


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [14/40], Step [240/391], Loss: 1.3717


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [14/40], Step [250/391], Loss: 1.3669


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [14/40], Step [260/391], Loss: 1.3836


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [14/40], Step [270/391], Loss: 1.3784


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [14/40], Step [280/391], Loss: 1.3851


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [14/40], Step [290/391], Loss: 1.3386


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [14/40], Step [300/391], Loss: 1.3575


 80%|███████▉  | 311/391 [00:54<00:13,  5.87it/s]

Epoch [14/40], Step [310/391], Loss: 1.3838


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [14/40], Step [320/391], Loss: 1.4162


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [14/40], Step [330/391], Loss: 1.3956


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [14/40], Step [340/391], Loss: 1.3994


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [14/40], Step [350/391], Loss: 1.3778


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [14/40], Step [360/391], Loss: 1.3720


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [14/40], Step [370/391], Loss: 1.4197


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [14/40], Step [380/391], Loss: 1.4210


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [14/40], Step [390/391], Loss: 1.4282





Test Accuracy of the student model on the test images: 48.18 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.57it/s]

Epoch [15/40], Step [10/391], Loss: 1.3501


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [15/40], Step [20/391], Loss: 1.3214


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [15/40], Step [30/391], Loss: 1.3174


 10%|█         | 41/391 [00:08<00:59,  5.86it/s]

Epoch [15/40], Step [40/391], Loss: 1.2892


 13%|█▎        | 51/391 [00:09<00:57,  5.87it/s]

Epoch [15/40], Step [50/391], Loss: 1.3089


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [15/40], Step [60/391], Loss: 1.3126


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [15/40], Step [70/391], Loss: 1.2751


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [15/40], Step [80/391], Loss: 1.2796


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [15/40], Step [90/391], Loss: 1.2844


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [15/40], Step [100/391], Loss: 1.3414


 28%|██▊       | 111/391 [00:19<00:47,  5.84it/s]

Epoch [15/40], Step [110/391], Loss: 1.3065


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [15/40], Step [120/391], Loss: 1.3036


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [15/40], Step [130/391], Loss: 1.3499


 36%|███▌      | 141/391 [00:25<00:42,  5.87it/s]

Epoch [15/40], Step [140/391], Loss: 1.3458


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [15/40], Step [150/391], Loss: 1.3382


 41%|████      | 161/391 [00:28<00:39,  5.84it/s]

Epoch [15/40], Step [160/391], Loss: 1.3354


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [15/40], Step [170/391], Loss: 1.3527


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [15/40], Step [180/391], Loss: 1.3491


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [15/40], Step [190/391], Loss: 1.3421


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [15/40], Step [200/391], Loss: 1.3048


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [15/40], Step [210/391], Loss: 1.3048


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [15/40], Step [220/391], Loss: 1.3072


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [15/40], Step [230/391], Loss: 1.3475


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [15/40], Step [240/391], Loss: 1.3664


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [15/40], Step [250/391], Loss: 1.3633


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [15/40], Step [260/391], Loss: 1.3113


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [15/40], Step [270/391], Loss: 1.3451


 72%|███████▏  | 281/391 [00:48<00:18,  5.87it/s]

Epoch [15/40], Step [280/391], Loss: 1.3400


 74%|███████▍  | 291/391 [00:50<00:17,  5.82it/s]

Epoch [15/40], Step [290/391], Loss: 1.3266


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [15/40], Step [300/391], Loss: 1.3396


 80%|███████▉  | 311/391 [00:54<00:13,  5.87it/s]

Epoch [15/40], Step [310/391], Loss: 1.3111


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [15/40], Step [320/391], Loss: 1.3411


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [15/40], Step [330/391], Loss: 1.3617


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [15/40], Step [340/391], Loss: 1.3546


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [15/40], Step [350/391], Loss: 1.3871


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [15/40], Step [360/391], Loss: 1.3576


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [15/40], Step [370/391], Loss: 1.3626


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [15/40], Step [380/391], Loss: 1.3406


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [15/40], Step [390/391], Loss: 1.3687





Test Accuracy of the student model on the test images: 48.59 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [16/40], Step [10/391], Loss: 1.2923


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [16/40], Step [20/391], Loss: 1.2800


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [16/40], Step [30/391], Loss: 1.2713


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [16/40], Step [40/391], Loss: 1.2616


 13%|█▎        | 51/391 [00:09<00:57,  5.87it/s]

Epoch [16/40], Step [50/391], Loss: 1.2726


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [16/40], Step [60/391], Loss: 1.2673


 18%|█▊        | 71/391 [00:12<00:54,  5.86it/s]

Epoch [16/40], Step [70/391], Loss: 1.2826


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [16/40], Step [80/391], Loss: 1.2574


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [16/40], Step [90/391], Loss: 1.2455


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [16/40], Step [100/391], Loss: 1.2602


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [16/40], Step [110/391], Loss: 1.2585


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [16/40], Step [120/391], Loss: 1.2442


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [16/40], Step [130/391], Loss: 1.2711


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [16/40], Step [140/391], Loss: 1.2787


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [16/40], Step [150/391], Loss: 1.2737


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [16/40], Step [160/391], Loss: 1.3107


 44%|████▎     | 171/391 [00:30<00:37,  5.87it/s]

Epoch [16/40], Step [170/391], Loss: 1.3304


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [16/40], Step [180/391], Loss: 1.3323


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [16/40], Step [190/391], Loss: 1.2890


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [16/40], Step [200/391], Loss: 1.2809


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [16/40], Step [210/391], Loss: 1.3105


 57%|█████▋    | 221/391 [00:38<00:29,  5.84it/s]

Epoch [16/40], Step [220/391], Loss: 1.2866


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [16/40], Step [230/391], Loss: 1.2642


 62%|██████▏   | 241/391 [00:41<00:25,  5.86it/s]

Epoch [16/40], Step [240/391], Loss: 1.2734


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [16/40], Step [250/391], Loss: 1.2559


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [16/40], Step [260/391], Loss: 1.2867


 69%|██████▉   | 271/391 [00:47<00:20,  5.87it/s]

Epoch [16/40], Step [270/391], Loss: 1.2820


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [16/40], Step [280/391], Loss: 1.3118


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [16/40], Step [290/391], Loss: 1.3190


 77%|███████▋  | 301/391 [00:52<00:15,  5.84it/s]

Epoch [16/40], Step [300/391], Loss: 1.2785


 80%|███████▉  | 311/391 [00:53<00:13,  5.84it/s]

Epoch [16/40], Step [310/391], Loss: 1.3388


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [16/40], Step [320/391], Loss: 1.3927


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [16/40], Step [330/391], Loss: 1.3674


 87%|████████▋ | 341/391 [00:59<00:08,  5.85it/s]

Epoch [16/40], Step [340/391], Loss: 1.3553


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [16/40], Step [350/391], Loss: 1.3265


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [16/40], Step [360/391], Loss: 1.3150


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [16/40], Step [370/391], Loss: 1.3367


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [16/40], Step [380/391], Loss: 1.3561


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [16/40], Step [390/391], Loss: 1.3571





Test Accuracy of the student model on the test images: 50.20 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.59it/s]

Epoch [17/40], Step [10/391], Loss: 1.2613


  5%|▌         | 21/391 [00:04<01:03,  5.82it/s]

Epoch [17/40], Step [20/391], Loss: 1.2406


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [17/40], Step [30/391], Loss: 1.2405


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [17/40], Step [40/391], Loss: 1.2562


 13%|█▎        | 51/391 [00:09<00:57,  5.86it/s]

Epoch [17/40], Step [50/391], Loss: 1.2334


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [17/40], Step [60/391], Loss: 1.2266


 18%|█▊        | 71/391 [00:13<00:54,  5.87it/s]

Epoch [17/40], Step [70/391], Loss: 1.2185


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [17/40], Step [80/391], Loss: 1.2228


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [17/40], Step [90/391], Loss: 1.2399


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [17/40], Step [100/391], Loss: 1.2437


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [17/40], Step [110/391], Loss: 1.2581


 31%|███       | 121/391 [00:21<00:46,  5.87it/s]

Epoch [17/40], Step [120/391], Loss: 1.2440


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [17/40], Step [130/391], Loss: 1.2457


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [17/40], Step [140/391], Loss: 1.2918


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [17/40], Step [150/391], Loss: 1.2481


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [17/40], Step [160/391], Loss: 1.2488


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [17/40], Step [170/391], Loss: 1.2199


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [17/40], Step [180/391], Loss: 1.2315


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [17/40], Step [190/391], Loss: 1.2764


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [17/40], Step [200/391], Loss: 1.2630


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [17/40], Step [210/391], Loss: 1.2855


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [17/40], Step [220/391], Loss: 1.2850


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [17/40], Step [230/391], Loss: 1.2950


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [17/40], Step [240/391], Loss: 1.2507


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [17/40], Step [250/391], Loss: 1.2522


 67%|██████▋   | 261/391 [00:45<00:22,  5.87it/s]

Epoch [17/40], Step [260/391], Loss: 1.2537


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [17/40], Step [270/391], Loss: 1.2719


 72%|███████▏  | 281/391 [00:48<00:18,  5.84it/s]

Epoch [17/40], Step [280/391], Loss: 1.2965


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [17/40], Step [290/391], Loss: 1.2584


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [17/40], Step [300/391], Loss: 1.2745


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [17/40], Step [310/391], Loss: 1.2834


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [17/40], Step [320/391], Loss: 1.2670


 85%|████████▍ | 331/391 [00:57<00:10,  5.77it/s]

Epoch [17/40], Step [330/391], Loss: 1.2860


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [17/40], Step [340/391], Loss: 1.2899


 90%|████████▉ | 351/391 [01:00<00:06,  5.83it/s]

Epoch [17/40], Step [350/391], Loss: 1.2865


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [17/40], Step [360/391], Loss: 1.2901


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [17/40], Step [370/391], Loss: 1.2890


 97%|█████████▋| 381/391 [01:06<00:01,  5.87it/s]

Epoch [17/40], Step [380/391], Loss: 1.2615


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [17/40], Step [390/391], Loss: 1.2839





Test Accuracy of the student model on the test images: 51.40 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.64it/s]

Epoch [18/40], Step [10/391], Loss: 1.2145


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [18/40], Step [20/391], Loss: 1.2373


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [18/40], Step [30/391], Loss: 1.1901


 10%|█         | 41/391 [00:07<00:59,  5.87it/s]

Epoch [18/40], Step [40/391], Loss: 1.1753


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [18/40], Step [50/391], Loss: 1.1809


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [18/40], Step [60/391], Loss: 1.1729


 18%|█▊        | 71/391 [00:12<00:54,  5.87it/s]

Epoch [18/40], Step [70/391], Loss: 1.1930


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [18/40], Step [80/391], Loss: 1.1851


 23%|██▎       | 91/391 [00:16<00:51,  5.84it/s]

Epoch [18/40], Step [90/391], Loss: 1.2115


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [18/40], Step [100/391], Loss: 1.1760


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [18/40], Step [110/391], Loss: 1.1900


 31%|███       | 121/391 [00:21<00:45,  5.87it/s]

Epoch [18/40], Step [120/391], Loss: 1.2038


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [18/40], Step [130/391], Loss: 1.2059


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [18/40], Step [140/391], Loss: 1.1927


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [18/40], Step [150/391], Loss: 1.2257


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [18/40], Step [160/391], Loss: 1.2358


 44%|████▎     | 171/391 [00:29<00:37,  5.85it/s]

Epoch [18/40], Step [170/391], Loss: 1.2522


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [18/40], Step [180/391], Loss: 1.2038


 49%|████▉     | 191/391 [00:33<00:34,  5.83it/s]

Epoch [18/40], Step [190/391], Loss: 1.2037


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [18/40], Step [200/391], Loss: 1.2524


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [18/40], Step [210/391], Loss: 1.2454


 57%|█████▋    | 221/391 [00:38<00:28,  5.86it/s]

Epoch [18/40], Step [220/391], Loss: 1.2495


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [18/40], Step [230/391], Loss: 1.2188


 62%|██████▏   | 241/391 [00:41<00:25,  5.84it/s]

Epoch [18/40], Step [240/391], Loss: 1.2481


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [18/40], Step [250/391], Loss: 1.2299


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [18/40], Step [260/391], Loss: 1.2094


 69%|██████▉   | 271/391 [00:47<00:20,  5.82it/s]

Epoch [18/40], Step [270/391], Loss: 1.2695


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [18/40], Step [280/391], Loss: 1.2187


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [18/40], Step [290/391], Loss: 1.2600


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [18/40], Step [300/391], Loss: 1.2494


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [18/40], Step [310/391], Loss: 1.2614


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [18/40], Step [320/391], Loss: 1.2801


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [18/40], Step [330/391], Loss: 1.3183


 87%|████████▋ | 341/391 [00:58<00:08,  5.87it/s]

Epoch [18/40], Step [340/391], Loss: 1.2711


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [18/40], Step [350/391], Loss: 1.2473


 92%|█████████▏| 361/391 [01:02<00:05,  5.83it/s]

Epoch [18/40], Step [360/391], Loss: 1.2183


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [18/40], Step [370/391], Loss: 1.2533


 97%|█████████▋| 381/391 [01:05<00:01,  5.87it/s]

Epoch [18/40], Step [380/391], Loss: 1.2355


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [18/40], Step [390/391], Loss: 1.2619





Test Accuracy of the student model on the test images: 52.89 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.56it/s]

Epoch [19/40], Step [10/391], Loss: 1.1874


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [19/40], Step [20/391], Loss: 1.1939


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [19/40], Step [30/391], Loss: 1.1832


 10%|█         | 41/391 [00:08<00:59,  5.86it/s]

Epoch [19/40], Step [40/391], Loss: 1.1426


 13%|█▎        | 51/391 [00:09<00:57,  5.86it/s]

Epoch [19/40], Step [50/391], Loss: 1.1619


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [19/40], Step [60/391], Loss: 1.1487


 18%|█▊        | 71/391 [00:13<00:54,  5.87it/s]

Epoch [19/40], Step [70/391], Loss: 1.1520


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [19/40], Step [80/391], Loss: 1.1430


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [19/40], Step [90/391], Loss: 1.1246


 26%|██▌       | 101/391 [00:18<00:49,  5.87it/s]

Epoch [19/40], Step [100/391], Loss: 1.1448


 28%|██▊       | 111/391 [00:19<00:47,  5.87it/s]

Epoch [19/40], Step [110/391], Loss: 1.1609


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [19/40], Step [120/391], Loss: 1.1569


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [19/40], Step [130/391], Loss: 1.1821


 36%|███▌      | 141/391 [00:25<00:42,  5.85it/s]

Epoch [19/40], Step [140/391], Loss: 1.2135


 39%|███▊      | 151/391 [00:26<00:41,  5.84it/s]

Epoch [19/40], Step [150/391], Loss: 1.1813


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [19/40], Step [160/391], Loss: 1.1473


 44%|████▎     | 171/391 [00:30<00:37,  5.85it/s]

Epoch [19/40], Step [170/391], Loss: 1.1575


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [19/40], Step [180/391], Loss: 1.2011


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [19/40], Step [190/391], Loss: 1.2031


 51%|█████▏    | 201/391 [00:35<00:33,  5.73it/s]

Epoch [19/40], Step [200/391], Loss: 1.1778


 54%|█████▍    | 211/391 [00:37<00:30,  5.85it/s]

Epoch [19/40], Step [210/391], Loss: 1.2124


 57%|█████▋    | 221/391 [00:38<00:29,  5.85it/s]

Epoch [19/40], Step [220/391], Loss: 1.2019


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [19/40], Step [230/391], Loss: 1.2210


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [19/40], Step [240/391], Loss: 1.1855


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [19/40], Step [250/391], Loss: 1.2305


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [19/40], Step [260/391], Loss: 1.1791


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [19/40], Step [270/391], Loss: 1.2009


 72%|███████▏  | 281/391 [00:49<00:18,  5.87it/s]

Epoch [19/40], Step [280/391], Loss: 1.2009


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [19/40], Step [290/391], Loss: 1.1872


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [19/40], Step [300/391], Loss: 1.2228


 80%|███████▉  | 311/391 [00:54<00:13,  5.85it/s]

Epoch [19/40], Step [310/391], Loss: 1.2059


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [19/40], Step [320/391], Loss: 1.1849


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [19/40], Step [330/391], Loss: 1.1736


 87%|████████▋ | 341/391 [00:59<00:08,  5.85it/s]

Epoch [19/40], Step [340/391], Loss: 1.1723


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [19/40], Step [350/391], Loss: 1.2324


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [19/40], Step [360/391], Loss: 1.2011


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [19/40], Step [370/391], Loss: 1.2350


 97%|█████████▋| 381/391 [01:06<00:01,  5.84it/s]

Epoch [19/40], Step [380/391], Loss: 1.1793


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [19/40], Step [390/391], Loss: 1.2283





Test Accuracy of the student model on the test images: 50.33 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.55it/s]

Epoch [20/40], Step [10/391], Loss: 1.1230


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [20/40], Step [20/391], Loss: 1.1263


  8%|▊         | 31/391 [00:06<01:01,  5.87it/s]

Epoch [20/40], Step [30/391], Loss: 1.1150


 10%|█         | 41/391 [00:07<00:59,  5.87it/s]

Epoch [20/40], Step [40/391], Loss: 1.1256


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [20/40], Step [50/391], Loss: 1.1039


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [20/40], Step [60/391], Loss: 1.1125


 18%|█▊        | 71/391 [00:12<00:54,  5.84it/s]

Epoch [20/40], Step [70/391], Loss: 1.1134


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [20/40], Step [80/391], Loss: 1.1353


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [20/40], Step [90/391], Loss: 1.1389


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [20/40], Step [100/391], Loss: 1.1506


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [20/40], Step [110/391], Loss: 1.1239


 31%|███       | 121/391 [00:21<00:46,  5.87it/s]

Epoch [20/40], Step [120/391], Loss: 1.1527


 34%|███▎      | 131/391 [00:23<00:44,  5.84it/s]

Epoch [20/40], Step [130/391], Loss: 1.1188


 36%|███▌      | 141/391 [00:24<00:42,  5.82it/s]

Epoch [20/40], Step [140/391], Loss: 1.1607


 39%|███▊      | 151/391 [00:26<00:41,  5.85it/s]

Epoch [20/40], Step [150/391], Loss: 1.1612


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [20/40], Step [160/391], Loss: 1.1394


 44%|████▎     | 171/391 [00:30<00:37,  5.84it/s]

Epoch [20/40], Step [170/391], Loss: 1.1314


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [20/40], Step [180/391], Loss: 1.1541


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [20/40], Step [190/391], Loss: 1.1468


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [20/40], Step [200/391], Loss: 1.1342


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [20/40], Step [210/391], Loss: 1.1333


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [20/40], Step [220/391], Loss: 1.1249


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [20/40], Step [230/391], Loss: 1.1431


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [20/40], Step [240/391], Loss: 1.1460


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [20/40], Step [250/391], Loss: 1.1787


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [20/40], Step [260/391], Loss: 1.1665


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [20/40], Step [270/391], Loss: 1.1541


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [20/40], Step [280/391], Loss: 1.1692


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [20/40], Step [290/391], Loss: 1.1837


 77%|███████▋  | 301/391 [00:52<00:15,  5.88it/s]

Epoch [20/40], Step [300/391], Loss: 1.1692


 80%|███████▉  | 311/391 [00:53<00:13,  5.86it/s]

Epoch [20/40], Step [310/391], Loss: 1.1847


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [20/40], Step [320/391], Loss: 1.1921


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [20/40], Step [330/391], Loss: 1.1754


 87%|████████▋ | 341/391 [00:59<00:08,  5.84it/s]

Epoch [20/40], Step [340/391], Loss: 1.1503


 90%|████████▉ | 351/391 [01:00<00:06,  5.84it/s]

Epoch [20/40], Step [350/391], Loss: 1.1817


 92%|█████████▏| 361/391 [01:02<00:05,  5.85it/s]

Epoch [20/40], Step [360/391], Loss: 1.1782


 95%|█████████▍| 371/391 [01:04<00:03,  5.85it/s]

Epoch [20/40], Step [370/391], Loss: 1.1979


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [20/40], Step [380/391], Loss: 1.1823


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [20/40], Step [390/391], Loss: 1.1714





Test Accuracy of the student model on the test images: 53.88 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.64it/s]

Epoch [21/40], Step [10/391], Loss: 1.1036


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [21/40], Step [20/391], Loss: 1.0977


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [21/40], Step [30/391], Loss: 1.0668


 10%|█         | 41/391 [00:07<00:59,  5.84it/s]

Epoch [21/40], Step [40/391], Loss: 1.0726


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [21/40], Step [50/391], Loss: 1.0697


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [21/40], Step [60/391], Loss: 1.0493


 18%|█▊        | 71/391 [00:12<00:55,  5.82it/s]

Epoch [21/40], Step [70/391], Loss: 1.0747


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [21/40], Step [80/391], Loss: 1.0904


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [21/40], Step [90/391], Loss: 1.0773


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [21/40], Step [100/391], Loss: 1.0722


 28%|██▊       | 111/391 [00:19<00:47,  5.84it/s]

Epoch [21/40], Step [110/391], Loss: 1.0733


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [21/40], Step [120/391], Loss: 1.0989


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [21/40], Step [130/391], Loss: 1.0899


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [21/40], Step [140/391], Loss: 1.1178


 39%|███▊      | 151/391 [00:26<00:40,  5.87it/s]

Epoch [21/40], Step [150/391], Loss: 1.0814


 41%|████      | 161/391 [00:28<00:39,  5.83it/s]

Epoch [21/40], Step [160/391], Loss: 1.1107


 44%|████▎     | 171/391 [00:29<00:37,  5.86it/s]

Epoch [21/40], Step [170/391], Loss: 1.1305


 46%|████▋     | 181/391 [00:31<00:35,  5.85it/s]

Epoch [21/40], Step [180/391], Loss: 1.1391


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [21/40], Step [190/391], Loss: 1.1337


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [21/40], Step [200/391], Loss: 1.1184


 54%|█████▍    | 211/391 [00:36<00:30,  5.87it/s]

Epoch [21/40], Step [210/391], Loss: 1.1256


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [21/40], Step [220/391], Loss: 1.1110


 59%|█████▉    | 231/391 [00:40<00:27,  5.85it/s]

Epoch [21/40], Step [230/391], Loss: 1.1045


 62%|██████▏   | 241/391 [00:41<00:25,  5.86it/s]

Epoch [21/40], Step [240/391], Loss: 1.1297


 64%|██████▍   | 251/391 [00:43<00:23,  5.88it/s]

Epoch [21/40], Step [250/391], Loss: 1.0824


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [21/40], Step [260/391], Loss: 1.1127


 69%|██████▉   | 271/391 [00:47<00:20,  5.84it/s]

Epoch [21/40], Step [270/391], Loss: 1.1287


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [21/40], Step [280/391], Loss: 1.1468


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [21/40], Step [290/391], Loss: 1.1459


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [21/40], Step [300/391], Loss: 1.1475


 80%|███████▉  | 311/391 [00:53<00:13,  5.85it/s]

Epoch [21/40], Step [310/391], Loss: 1.1289


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [21/40], Step [320/391], Loss: 1.1570


 85%|████████▍ | 331/391 [00:57<00:10,  5.85it/s]

Epoch [21/40], Step [330/391], Loss: 1.1202


 87%|████████▋ | 341/391 [00:59<00:08,  5.85it/s]

Epoch [21/40], Step [340/391], Loss: 1.1545


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [21/40], Step [350/391], Loss: 1.1267


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [21/40], Step [360/391], Loss: 1.1316


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [21/40], Step [370/391], Loss: 1.1317


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [21/40], Step [380/391], Loss: 1.1466


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [21/40], Step [390/391], Loss: 1.1622





Test Accuracy of the student model on the test images: 54.56 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.58it/s]

Epoch [22/40], Step [10/391], Loss: 1.0943


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [22/40], Step [20/391], Loss: 1.0958


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [22/40], Step [30/391], Loss: 1.0503


 10%|█         | 41/391 [00:08<00:59,  5.86it/s]

Epoch [22/40], Step [40/391], Loss: 1.0481


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [22/40], Step [50/391], Loss: 1.0607


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [22/40], Step [60/391], Loss: 1.0470


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [22/40], Step [70/391], Loss: 1.0483


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [22/40], Step [80/391], Loss: 1.0853


 23%|██▎       | 91/391 [00:16<00:51,  5.87it/s]

Epoch [22/40], Step [90/391], Loss: 1.0601


 26%|██▌       | 101/391 [00:18<00:49,  5.84it/s]

Epoch [22/40], Step [100/391], Loss: 1.0647


 28%|██▊       | 111/391 [00:19<00:47,  5.87it/s]

Epoch [22/40], Step [110/391], Loss: 1.0200


 31%|███       | 121/391 [00:21<00:46,  5.85it/s]

Epoch [22/40], Step [120/391], Loss: 1.0653


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [22/40], Step [130/391], Loss: 1.0706


 36%|███▌      | 141/391 [00:25<00:42,  5.87it/s]

Epoch [22/40], Step [140/391], Loss: 1.0723


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [22/40], Step [150/391], Loss: 1.0846


 41%|████      | 161/391 [00:28<00:39,  5.87it/s]

Epoch [22/40], Step [160/391], Loss: 1.1099


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [22/40], Step [170/391], Loss: 1.0952


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [22/40], Step [180/391], Loss: 1.0814


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [22/40], Step [190/391], Loss: 1.0847


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [22/40], Step [200/391], Loss: 1.0884


 54%|█████▍    | 211/391 [00:37<00:30,  5.85it/s]

Epoch [22/40], Step [210/391], Loss: 1.0712


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [22/40], Step [220/391], Loss: 1.0721


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [22/40], Step [230/391], Loss: 1.0542


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [22/40], Step [240/391], Loss: 1.0828


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [22/40], Step [250/391], Loss: 1.0998


 67%|██████▋   | 261/391 [00:45<00:22,  5.84it/s]

Epoch [22/40], Step [260/391], Loss: 1.0583


 69%|██████▉   | 271/391 [00:47<00:20,  5.85it/s]

Epoch [22/40], Step [270/391], Loss: 1.0674


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [22/40], Step [280/391], Loss: 1.0982


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [22/40], Step [290/391], Loss: 1.0587


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [22/40], Step [300/391], Loss: 1.0931


 80%|███████▉  | 311/391 [00:54<00:13,  5.87it/s]

Epoch [22/40], Step [310/391], Loss: 1.0835


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [22/40], Step [320/391], Loss: 1.0977


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [22/40], Step [330/391], Loss: 1.1267


 87%|████████▋ | 341/391 [00:59<00:08,  5.87it/s]

Epoch [22/40], Step [340/391], Loss: 1.0777


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [22/40], Step [350/391], Loss: 1.1001


 92%|█████████▏| 361/391 [01:02<00:05,  5.87it/s]

Epoch [22/40], Step [360/391], Loss: 1.0958


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [22/40], Step [370/391], Loss: 1.1112


 97%|█████████▋| 381/391 [01:06<00:01,  5.86it/s]

Epoch [22/40], Step [380/391], Loss: 1.0966


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [22/40], Step [390/391], Loss: 1.1035





Test Accuracy of the student model on the test images: 55.75 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.64it/s]

Epoch [23/40], Step [10/391], Loss: 1.0459


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [23/40], Step [20/391], Loss: 1.0409


  8%|▊         | 31/391 [00:06<01:01,  5.83it/s]

Epoch [23/40], Step [30/391], Loss: 1.0212


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [23/40], Step [40/391], Loss: 1.0138


 13%|█▎        | 51/391 [00:09<00:58,  5.84it/s]

Epoch [23/40], Step [50/391], Loss: 1.0050


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [23/40], Step [60/391], Loss: 0.9982


 18%|█▊        | 71/391 [00:12<00:54,  5.85it/s]

Epoch [23/40], Step [70/391], Loss: 0.9838


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [23/40], Step [80/391], Loss: 0.9869


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [23/40], Step [90/391], Loss: 1.0184


 26%|██▌       | 101/391 [00:18<00:49,  5.82it/s]

Epoch [23/40], Step [100/391], Loss: 1.0323


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [23/40], Step [110/391], Loss: 1.0541


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [23/40], Step [120/391], Loss: 1.0580


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [23/40], Step [130/391], Loss: 1.0470


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [23/40], Step [140/391], Loss: 1.0247


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [23/40], Step [150/391], Loss: 1.0447


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [23/40], Step [160/391], Loss: 1.0076


 44%|████▎     | 171/391 [00:29<00:37,  5.86it/s]

Epoch [23/40], Step [170/391], Loss: 1.0211


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [23/40], Step [180/391], Loss: 1.0249


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [23/40], Step [190/391], Loss: 1.0440


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [23/40], Step [200/391], Loss: 1.0484


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [23/40], Step [210/391], Loss: 1.0623


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [23/40], Step [220/391], Loss: 1.0498


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [23/40], Step [230/391], Loss: 1.0337


 62%|██████▏   | 241/391 [00:41<00:25,  5.85it/s]

Epoch [23/40], Step [240/391], Loss: 1.0392


 64%|██████▍   | 251/391 [00:43<00:23,  5.84it/s]

Epoch [23/40], Step [250/391], Loss: 1.0462


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [23/40], Step [260/391], Loss: 1.0577


 69%|██████▉   | 271/391 [00:47<00:20,  5.88it/s]

Epoch [23/40], Step [270/391], Loss: 1.0574


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [23/40], Step [280/391], Loss: 1.0532


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [23/40], Step [290/391], Loss: 1.0320


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [23/40], Step [300/391], Loss: 1.0588


 80%|███████▉  | 311/391 [00:53<00:13,  5.86it/s]

Epoch [23/40], Step [310/391], Loss: 1.0793


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [23/40], Step [320/391], Loss: 1.0721


 85%|████████▍ | 331/391 [00:57<00:10,  5.85it/s]

Epoch [23/40], Step [330/391], Loss: 1.0819


 87%|████████▋ | 341/391 [00:58<00:08,  5.85it/s]

Epoch [23/40], Step [340/391], Loss: 1.0549


 90%|████████▉ | 351/391 [01:00<00:06,  5.87it/s]

Epoch [23/40], Step [350/391], Loss: 1.0734


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [23/40], Step [360/391], Loss: 1.0787


 95%|█████████▍| 371/391 [01:04<00:03,  5.83it/s]

Epoch [23/40], Step [370/391], Loss: 1.0471


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [23/40], Step [380/391], Loss: 1.0448


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [23/40], Step [390/391], Loss: 1.0562





Test Accuracy of the student model on the test images: 54.51 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.62it/s]

Epoch [24/40], Step [10/391], Loss: 0.9564


  5%|▌         | 21/391 [00:04<01:03,  5.84it/s]

Epoch [24/40], Step [20/391], Loss: 0.9459


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [24/40], Step [30/391], Loss: 0.9733


 10%|█         | 41/391 [00:07<00:59,  5.87it/s]

Epoch [24/40], Step [40/391], Loss: 0.9705


 13%|█▎        | 51/391 [00:09<00:57,  5.86it/s]

Epoch [24/40], Step [50/391], Loss: 0.9610


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [24/40], Step [60/391], Loss: 0.9697


 18%|█▊        | 71/391 [00:12<00:54,  5.86it/s]

Epoch [24/40], Step [70/391], Loss: 0.9652


 21%|██        | 81/391 [00:14<00:53,  5.85it/s]

Epoch [24/40], Step [80/391], Loss: 0.9777


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [24/40], Step [90/391], Loss: 0.9829


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [24/40], Step [100/391], Loss: 0.9783


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [24/40], Step [110/391], Loss: 0.9676


 31%|███       | 121/391 [00:21<00:46,  5.84it/s]

Epoch [24/40], Step [120/391], Loss: 0.9929


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [24/40], Step [130/391], Loss: 1.0174


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [24/40], Step [140/391], Loss: 1.0019


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [24/40], Step [150/391], Loss: 1.0130


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [24/40], Step [160/391], Loss: 0.9994


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [24/40], Step [170/391], Loss: 0.9973


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [24/40], Step [180/391], Loss: 1.0305


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [24/40], Step [190/391], Loss: 0.9865


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [24/40], Step [200/391], Loss: 1.0255


 54%|█████▍    | 211/391 [00:36<00:30,  5.88it/s]

Epoch [24/40], Step [210/391], Loss: 1.0057


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [24/40], Step [220/391], Loss: 1.0172


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [24/40], Step [230/391], Loss: 1.0272


 62%|██████▏   | 241/391 [00:42<00:25,  5.84it/s]

Epoch [24/40], Step [240/391], Loss: 1.0058


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [24/40], Step [250/391], Loss: 0.9885


 67%|██████▋   | 261/391 [00:45<00:22,  5.84it/s]

Epoch [24/40], Step [260/391], Loss: 1.0049


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [24/40], Step [270/391], Loss: 1.0119


 72%|███████▏  | 281/391 [00:48<00:18,  5.87it/s]

Epoch [24/40], Step [280/391], Loss: 1.0393


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [24/40], Step [290/391], Loss: 1.0167


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [24/40], Step [300/391], Loss: 1.0307


 80%|███████▉  | 311/391 [00:53<00:13,  5.86it/s]

Epoch [24/40], Step [310/391], Loss: 1.0287


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [24/40], Step [320/391], Loss: 1.0078


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [24/40], Step [330/391], Loss: 1.0162


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [24/40], Step [340/391], Loss: 1.0499


 90%|████████▉ | 351/391 [01:00<00:06,  5.87it/s]

Epoch [24/40], Step [350/391], Loss: 1.0002


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [24/40], Step [360/391], Loss: 1.0179


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [24/40], Step [370/391], Loss: 1.0192


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [24/40], Step [380/391], Loss: 1.0145


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [24/40], Step [390/391], Loss: 1.0135





Test Accuracy of the student model on the test images: 54.23 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [25/40], Step [10/391], Loss: 0.9758


  5%|▌         | 21/391 [00:04<01:03,  5.84it/s]

Epoch [25/40], Step [20/391], Loss: 0.9692


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [25/40], Step [30/391], Loss: 0.9417


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [25/40], Step [40/391], Loss: 0.9119


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [25/40], Step [50/391], Loss: 0.9488


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [25/40], Step [60/391], Loss: 0.9484


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [25/40], Step [70/391], Loss: 0.9163


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [25/40], Step [80/391], Loss: 0.9656


 23%|██▎       | 91/391 [00:16<00:51,  5.87it/s]

Epoch [25/40], Step [90/391], Loss: 0.9398


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [25/40], Step [100/391], Loss: 0.9262


 28%|██▊       | 111/391 [00:19<00:47,  5.87it/s]

Epoch [25/40], Step [110/391], Loss: 0.9430


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [25/40], Step [120/391], Loss: 0.9793


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [25/40], Step [130/391], Loss: 0.9587


 36%|███▌      | 141/391 [00:24<00:42,  5.87it/s]

Epoch [25/40], Step [140/391], Loss: 0.9773


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [25/40], Step [150/391], Loss: 0.9717


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [25/40], Step [160/391], Loss: 0.9781


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [25/40], Step [170/391], Loss: 0.9475


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [25/40], Step [180/391], Loss: 0.9755


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [25/40], Step [190/391], Loss: 0.9788


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [25/40], Step [200/391], Loss: 0.9750


 54%|█████▍    | 211/391 [00:36<00:30,  5.87it/s]

Epoch [25/40], Step [210/391], Loss: 0.9539


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [25/40], Step [220/391], Loss: 0.9780


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [25/40], Step [230/391], Loss: 0.9585


 62%|██████▏   | 241/391 [00:42<00:25,  5.85it/s]

Epoch [25/40], Step [240/391], Loss: 0.9515


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [25/40], Step [250/391], Loss: 0.9649


 67%|██████▋   | 261/391 [00:45<00:22,  5.87it/s]

Epoch [25/40], Step [260/391], Loss: 0.9694


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [25/40], Step [270/391], Loss: 1.0072


 72%|███████▏  | 281/391 [00:48<00:18,  5.87it/s]

Epoch [25/40], Step [280/391], Loss: 0.9795


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [25/40], Step [290/391], Loss: 0.9792


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [25/40], Step [300/391], Loss: 1.0032


 80%|███████▉  | 311/391 [00:53<00:13,  5.86it/s]

Epoch [25/40], Step [310/391], Loss: 0.9646


 82%|████████▏ | 321/391 [00:55<00:11,  5.83it/s]

Epoch [25/40], Step [320/391], Loss: 0.9879


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [25/40], Step [330/391], Loss: 1.0022


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [25/40], Step [340/391], Loss: 1.0304


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [25/40], Step [350/391], Loss: 1.0007


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [25/40], Step [360/391], Loss: 1.0119


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [25/40], Step [370/391], Loss: 0.9719


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [25/40], Step [380/391], Loss: 1.0012


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [25/40], Step [390/391], Loss: 0.9835





Test Accuracy of the student model on the test images: 56.29 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.61it/s]

Epoch [26/40], Step [10/391], Loss: 0.9400


  5%|▌         | 21/391 [00:04<01:03,  5.84it/s]

Epoch [26/40], Step [20/391], Loss: 0.9194


  8%|▊         | 31/391 [00:06<01:01,  5.87it/s]

Epoch [26/40], Step [30/391], Loss: 0.8962


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [26/40], Step [40/391], Loss: 0.8990


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [26/40], Step [50/391], Loss: 0.9016


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [26/40], Step [60/391], Loss: 0.8981


 18%|█▊        | 71/391 [00:12<00:54,  5.85it/s]

Epoch [26/40], Step [70/391], Loss: 0.8989


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [26/40], Step [80/391], Loss: 0.8849


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [26/40], Step [90/391], Loss: 0.9087


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [26/40], Step [100/391], Loss: 0.9094


 28%|██▊       | 111/391 [00:19<00:47,  5.87it/s]

Epoch [26/40], Step [110/391], Loss: 0.9083


 31%|███       | 121/391 [00:21<00:46,  5.84it/s]

Epoch [26/40], Step [120/391], Loss: 0.8866


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [26/40], Step [130/391], Loss: 0.9150


 36%|███▌      | 141/391 [00:24<00:42,  5.85it/s]

Epoch [26/40], Step [140/391], Loss: 0.9286


 39%|███▊      | 151/391 [00:26<00:40,  5.87it/s]

Epoch [26/40], Step [150/391], Loss: 0.9394


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [26/40], Step [160/391], Loss: 0.9200


 44%|████▎     | 171/391 [00:30<00:37,  5.87it/s]

Epoch [26/40], Step [170/391], Loss: 0.9618


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [26/40], Step [180/391], Loss: 0.9297


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [26/40], Step [190/391], Loss: 0.9288


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [26/40], Step [200/391], Loss: 0.9263


 54%|█████▍    | 211/391 [00:36<00:30,  5.87it/s]

Epoch [26/40], Step [210/391], Loss: 0.9505


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [26/40], Step [220/391], Loss: 0.9488


 59%|█████▉    | 231/391 [00:40<00:27,  5.85it/s]

Epoch [26/40], Step [230/391], Loss: 0.9211


 62%|██████▏   | 241/391 [00:41<00:25,  5.86it/s]

Epoch [26/40], Step [240/391], Loss: 0.9203


 64%|██████▍   | 251/391 [00:43<00:23,  5.89it/s]

Epoch [26/40], Step [250/391], Loss: 0.9461


 67%|██████▋   | 261/391 [00:45<00:22,  5.87it/s]

Epoch [26/40], Step [260/391], Loss: 0.9288


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [26/40], Step [270/391], Loss: 0.9664


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [26/40], Step [280/391], Loss: 0.9447


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [26/40], Step [290/391], Loss: 0.9352


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [26/40], Step [300/391], Loss: 0.9347


 80%|███████▉  | 311/391 [00:53<00:13,  5.86it/s]

Epoch [26/40], Step [310/391], Loss: 0.9222


 82%|████████▏ | 321/391 [00:55<00:11,  5.87it/s]

Epoch [26/40], Step [320/391], Loss: 0.9514


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [26/40], Step [330/391], Loss: 0.9263


 87%|████████▋ | 341/391 [00:58<00:08,  5.86it/s]

Epoch [26/40], Step [340/391], Loss: 0.9518


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [26/40], Step [350/391], Loss: 0.9617


 92%|█████████▏| 361/391 [01:02<00:05,  5.85it/s]

Epoch [26/40], Step [360/391], Loss: 0.9537


 95%|█████████▍| 371/391 [01:04<00:03,  5.85it/s]

Epoch [26/40], Step [370/391], Loss: 0.9600


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [26/40], Step [380/391], Loss: 0.9529


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [26/40], Step [390/391], Loss: 0.9505





Test Accuracy of the student model on the test images: 57.92 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.58it/s]

Epoch [27/40], Step [10/391], Loss: 0.8797


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [27/40], Step [20/391], Loss: 0.8651


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [27/40], Step [30/391], Loss: 0.8569


 10%|█         | 41/391 [00:08<01:01,  5.71it/s]

Epoch [27/40], Step [40/391], Loss: 0.8609


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [27/40], Step [50/391], Loss: 0.8964


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [27/40], Step [60/391], Loss: 0.8778


 18%|█▊        | 71/391 [00:13<00:54,  5.85it/s]

Epoch [27/40], Step [70/391], Loss: 0.8742


 21%|██        | 81/391 [00:14<00:53,  5.84it/s]

Epoch [27/40], Step [80/391], Loss: 0.8621


 23%|██▎       | 91/391 [00:16<00:51,  5.84it/s]

Epoch [27/40], Step [90/391], Loss: 0.8649


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [27/40], Step [100/391], Loss: 0.8819


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [27/40], Step [110/391], Loss: 0.8648


 31%|███       | 121/391 [00:21<00:46,  5.85it/s]

Epoch [27/40], Step [120/391], Loss: 0.8623


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [27/40], Step [130/391], Loss: 0.8753


 36%|███▌      | 141/391 [00:25<00:42,  5.85it/s]

Epoch [27/40], Step [140/391], Loss: 0.8998


 39%|███▊      | 151/391 [00:26<00:41,  5.85it/s]

Epoch [27/40], Step [150/391], Loss: 0.8883


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [27/40], Step [160/391], Loss: 0.8850


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [27/40], Step [170/391], Loss: 0.8910


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [27/40], Step [180/391], Loss: 0.8914


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [27/40], Step [190/391], Loss: 0.8888


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [27/40], Step [200/391], Loss: 0.9175


 54%|█████▍    | 211/391 [00:37<00:30,  5.87it/s]

Epoch [27/40], Step [210/391], Loss: 0.8954


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [27/40], Step [220/391], Loss: 0.8999


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [27/40], Step [230/391], Loss: 0.9189


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [27/40], Step [240/391], Loss: 0.8825


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [27/40], Step [250/391], Loss: 0.8998


 67%|██████▋   | 261/391 [00:45<00:22,  5.84it/s]

Epoch [27/40], Step [260/391], Loss: 0.9016


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [27/40], Step [270/391], Loss: 0.9168


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [27/40], Step [280/391], Loss: 0.8995


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [27/40], Step [290/391], Loss: 0.9069


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [27/40], Step [300/391], Loss: 0.9176


 80%|███████▉  | 311/391 [00:54<00:13,  5.87it/s]

Epoch [27/40], Step [310/391], Loss: 0.9082


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [27/40], Step [320/391], Loss: 0.9014


 85%|████████▍ | 331/391 [00:57<00:10,  5.84it/s]

Epoch [27/40], Step [330/391], Loss: 0.8924


 87%|████████▋ | 341/391 [00:59<00:08,  5.85it/s]

Epoch [27/40], Step [340/391], Loss: 0.9043


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [27/40], Step [350/391], Loss: 0.9131


 92%|█████████▏| 361/391 [01:02<00:05,  5.83it/s]

Epoch [27/40], Step [360/391], Loss: 0.8893


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [27/40], Step [370/391], Loss: 0.9020


 97%|█████████▋| 381/391 [01:06<00:01,  5.86it/s]

Epoch [27/40], Step [380/391], Loss: 0.8981


100%|██████████| 391/391 [01:07<00:00,  5.77it/s]

Epoch [27/40], Step [390/391], Loss: 0.8939





Test Accuracy of the student model on the test images: 59.41 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.61it/s]

Epoch [28/40], Step [10/391], Loss: 0.8678


  5%|▌         | 21/391 [00:04<01:03,  5.87it/s]

Epoch [28/40], Step [20/391], Loss: 0.8428


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [28/40], Step [30/391], Loss: 0.8320


 10%|█         | 41/391 [00:07<00:59,  5.87it/s]

Epoch [28/40], Step [40/391], Loss: 0.8453


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [28/40], Step [50/391], Loss: 0.8446


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [28/40], Step [60/391], Loss: 0.8446


 18%|█▊        | 71/391 [00:12<00:54,  5.87it/s]

Epoch [28/40], Step [70/391], Loss: 0.8190


 21%|██        | 81/391 [00:14<00:53,  5.85it/s]

Epoch [28/40], Step [80/391], Loss: 0.8343


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [28/40], Step [90/391], Loss: 0.8424


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [28/40], Step [100/391], Loss: 0.8498


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [28/40], Step [110/391], Loss: 0.8538


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [28/40], Step [120/391], Loss: 0.8544


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [28/40], Step [130/391], Loss: 0.8399


 36%|███▌      | 141/391 [00:24<00:42,  5.85it/s]

Epoch [28/40], Step [140/391], Loss: 0.8466


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [28/40], Step [150/391], Loss: 0.8553


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [28/40], Step [160/391], Loss: 0.8436


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [28/40], Step [170/391], Loss: 0.8535


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [28/40], Step [180/391], Loss: 0.8443


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [28/40], Step [190/391], Loss: 0.8762


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [28/40], Step [200/391], Loss: 0.8712


 54%|█████▍    | 211/391 [00:36<00:30,  5.87it/s]

Epoch [28/40], Step [210/391], Loss: 0.8774


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [28/40], Step [220/391], Loss: 0.8735


 59%|█████▉    | 231/391 [00:40<00:27,  5.87it/s]

Epoch [28/40], Step [230/391], Loss: 0.8508


 62%|██████▏   | 241/391 [00:41<00:25,  5.87it/s]

Epoch [28/40], Step [240/391], Loss: 0.8545


 64%|██████▍   | 251/391 [00:43<00:23,  5.84it/s]

Epoch [28/40], Step [250/391], Loss: 0.8484


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [28/40], Step [260/391], Loss: 0.8486


 69%|██████▉   | 271/391 [00:47<00:20,  5.87it/s]

Epoch [28/40], Step [270/391], Loss: 0.8575


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [28/40], Step [280/391], Loss: 0.8507


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [28/40], Step [290/391], Loss: 0.8521


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [28/40], Step [300/391], Loss: 0.8527


 80%|███████▉  | 311/391 [00:53<00:13,  5.83it/s]

Epoch [28/40], Step [310/391], Loss: 0.8478


 82%|████████▏ | 321/391 [00:55<00:11,  5.88it/s]

Epoch [28/40], Step [320/391], Loss: 0.8536


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [28/40], Step [330/391], Loss: 0.8703


 87%|████████▋ | 341/391 [00:59<00:08,  5.85it/s]

Epoch [28/40], Step [340/391], Loss: 0.8558


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [28/40], Step [350/391], Loss: 0.8820


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [28/40], Step [360/391], Loss: 0.8523


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [28/40], Step [370/391], Loss: 0.8759


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [28/40], Step [380/391], Loss: 0.8652


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [28/40], Step [390/391], Loss: 0.8814





Test Accuracy of the student model on the test images: 60.16 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.59it/s]

Epoch [29/40], Step [10/391], Loss: 0.8310


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [29/40], Step [20/391], Loss: 0.8081


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [29/40], Step [30/391], Loss: 0.7931


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [29/40], Step [40/391], Loss: 0.8015


 13%|█▎        | 51/391 [00:09<00:57,  5.87it/s]

Epoch [29/40], Step [50/391], Loss: 0.8032


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [29/40], Step [60/391], Loss: 0.7995


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [29/40], Step [70/391], Loss: 0.7908


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [29/40], Step [80/391], Loss: 0.7889


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [29/40], Step [90/391], Loss: 0.7842


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [29/40], Step [100/391], Loss: 0.8045


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [29/40], Step [110/391], Loss: 0.7934


 31%|███       | 121/391 [00:21<00:46,  5.87it/s]

Epoch [29/40], Step [120/391], Loss: 0.7811


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [29/40], Step [130/391], Loss: 0.7782


 36%|███▌      | 141/391 [00:24<00:42,  5.87it/s]

Epoch [29/40], Step [140/391], Loss: 0.8150


 39%|███▊      | 151/391 [00:26<00:41,  5.83it/s]

Epoch [29/40], Step [150/391], Loss: 0.8045


 41%|████      | 161/391 [00:28<00:39,  5.83it/s]

Epoch [29/40], Step [160/391], Loss: 0.8020


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [29/40], Step [170/391], Loss: 0.8150


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [29/40], Step [180/391], Loss: 0.7809


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [29/40], Step [190/391], Loss: 0.8184


 51%|█████▏    | 201/391 [00:35<00:32,  5.84it/s]

Epoch [29/40], Step [200/391], Loss: 0.8209


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [29/40], Step [210/391], Loss: 0.8301


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [29/40], Step [220/391], Loss: 0.8046


 59%|█████▉    | 231/391 [00:40<00:27,  5.87it/s]

Epoch [29/40], Step [230/391], Loss: 0.7962


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [29/40], Step [240/391], Loss: 0.8281


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [29/40], Step [250/391], Loss: 0.8162


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [29/40], Step [260/391], Loss: 0.8197


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [29/40], Step [270/391], Loss: 0.8379


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [29/40], Step [280/391], Loss: 0.8232


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [29/40], Step [290/391], Loss: 0.8076


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [29/40], Step [300/391], Loss: 0.8248


 80%|███████▉  | 311/391 [00:54<00:13,  5.87it/s]

Epoch [29/40], Step [310/391], Loss: 0.8264


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [29/40], Step [320/391], Loss: 0.8375


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [29/40], Step [330/391], Loss: 0.8248


 87%|████████▋ | 341/391 [00:59<00:08,  5.87it/s]

Epoch [29/40], Step [340/391], Loss: 0.8452


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [29/40], Step [350/391], Loss: 0.8404


 92%|█████████▏| 361/391 [01:02<00:05,  5.85it/s]

Epoch [29/40], Step [360/391], Loss: 0.8487


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [29/40], Step [370/391], Loss: 0.8193


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [29/40], Step [380/391], Loss: 0.8433


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [29/40], Step [390/391], Loss: 0.8517





Test Accuracy of the student model on the test images: 60.43 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.62it/s]

Epoch [30/40], Step [10/391], Loss: 0.7665


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [30/40], Step [20/391], Loss: 0.7771


  8%|▊         | 31/391 [00:06<01:01,  5.83it/s]

Epoch [30/40], Step [30/391], Loss: 0.7865


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [30/40], Step [40/391], Loss: 0.7861


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [30/40], Step [50/391], Loss: 0.7603


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [30/40], Step [60/391], Loss: 0.7629


 18%|█▊        | 71/391 [00:12<00:54,  5.85it/s]

Epoch [30/40], Step [70/391], Loss: 0.7666


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [30/40], Step [80/391], Loss: 0.7638


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [30/40], Step [90/391], Loss: 0.7674


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [30/40], Step [100/391], Loss: 0.7560


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [30/40], Step [110/391], Loss: 0.7726


 31%|███       | 121/391 [00:21<00:46,  5.85it/s]

Epoch [30/40], Step [120/391], Loss: 0.7725


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [30/40], Step [130/391], Loss: 0.7724


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [30/40], Step [140/391], Loss: 0.7621


 39%|███▊      | 151/391 [00:26<00:41,  5.85it/s]

Epoch [30/40], Step [150/391], Loss: 0.7586


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [30/40], Step [160/391], Loss: 0.7661


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [30/40], Step [170/391], Loss: 0.7826


 46%|████▋     | 181/391 [00:31<00:35,  5.84it/s]

Epoch [30/40], Step [180/391], Loss: 0.7830


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [30/40], Step [190/391], Loss: 0.7816


 51%|█████▏    | 201/391 [00:35<00:32,  5.84it/s]

Epoch [30/40], Step [200/391], Loss: 0.7866


 54%|█████▍    | 211/391 [00:36<00:30,  5.87it/s]

Epoch [30/40], Step [210/391], Loss: 0.7859


 57%|█████▋    | 221/391 [00:38<00:29,  5.85it/s]

Epoch [30/40], Step [220/391], Loss: 0.7644


 59%|█████▉    | 231/391 [00:40<00:27,  5.87it/s]

Epoch [30/40], Step [230/391], Loss: 0.7939


 62%|██████▏   | 241/391 [00:41<00:25,  5.87it/s]

Epoch [30/40], Step [240/391], Loss: 0.7928


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [30/40], Step [250/391], Loss: 0.7725


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [30/40], Step [260/391], Loss: 0.7976


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [30/40], Step [270/391], Loss: 0.7861


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [30/40], Step [280/391], Loss: 0.7905


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [30/40], Step [290/391], Loss: 0.7875


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [30/40], Step [300/391], Loss: 0.7973


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [30/40], Step [310/391], Loss: 0.8101


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [30/40], Step [320/391], Loss: 0.8109


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [30/40], Step [330/391], Loss: 0.7938


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [30/40], Step [340/391], Loss: 0.7787


 90%|████████▉ | 351/391 [01:00<00:06,  5.87it/s]

Epoch [30/40], Step [350/391], Loss: 0.7976


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [30/40], Step [360/391], Loss: 0.7877


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [30/40], Step [370/391], Loss: 0.7826


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [30/40], Step [380/391], Loss: 0.8034


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [30/40], Step [390/391], Loss: 0.7859





Test Accuracy of the student model on the test images: 62.06 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.63it/s]

Epoch [31/40], Step [10/391], Loss: 0.7423


  5%|▌         | 21/391 [00:04<01:03,  5.87it/s]

Epoch [31/40], Step [20/391], Loss: 0.7227


  8%|▊         | 31/391 [00:06<01:01,  5.87it/s]

Epoch [31/40], Step [30/391], Loss: 0.7364


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [31/40], Step [40/391], Loss: 0.7430


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [31/40], Step [50/391], Loss: 0.7361


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [31/40], Step [60/391], Loss: 0.7404


 18%|█▊        | 71/391 [00:12<00:54,  5.86it/s]

Epoch [31/40], Step [70/391], Loss: 0.7408


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [31/40], Step [80/391], Loss: 0.7365


 23%|██▎       | 91/391 [00:16<00:51,  5.84it/s]

Epoch [31/40], Step [90/391], Loss: 0.7417


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [31/40], Step [100/391], Loss: 0.7159


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [31/40], Step [110/391], Loss: 0.7476


 31%|███       | 121/391 [00:21<00:46,  5.85it/s]

Epoch [31/40], Step [120/391], Loss: 0.7349


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [31/40], Step [130/391], Loss: 0.7317


 36%|███▌      | 141/391 [00:24<00:43,  5.71it/s]

Epoch [31/40], Step [140/391], Loss: 0.7407


 39%|███▊      | 151/391 [00:26<00:40,  5.87it/s]

Epoch [31/40], Step [150/391], Loss: 0.7338


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [31/40], Step [160/391], Loss: 0.7396


 44%|████▎     | 171/391 [00:30<00:37,  5.85it/s]

Epoch [31/40], Step [170/391], Loss: 0.7197


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [31/40], Step [180/391], Loss: 0.7317


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [31/40], Step [190/391], Loss: 0.7476


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [31/40], Step [200/391], Loss: 0.7373


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [31/40], Step [210/391], Loss: 0.7331


 57%|█████▋    | 221/391 [00:38<00:29,  5.84it/s]

Epoch [31/40], Step [220/391], Loss: 0.7248


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [31/40], Step [230/391], Loss: 0.7373


 62%|██████▏   | 241/391 [00:41<00:25,  5.85it/s]

Epoch [31/40], Step [240/391], Loss: 0.7637


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [31/40], Step [250/391], Loss: 0.7342


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [31/40], Step [260/391], Loss: 0.7333


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [31/40], Step [270/391], Loss: 0.7488


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [31/40], Step [280/391], Loss: 0.7498


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [31/40], Step [290/391], Loss: 0.7494


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [31/40], Step [300/391], Loss: 0.7486


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [31/40], Step [310/391], Loss: 0.7325


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [31/40], Step [320/391], Loss: 0.7394


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [31/40], Step [330/391], Loss: 0.7362


 87%|████████▋ | 341/391 [00:59<00:08,  5.87it/s]

Epoch [31/40], Step [340/391], Loss: 0.7516


 90%|████████▉ | 351/391 [01:00<00:06,  5.87it/s]

Epoch [31/40], Step [350/391], Loss: 0.7625


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [31/40], Step [360/391], Loss: 0.7685


 95%|█████████▍| 371/391 [01:04<00:03,  5.84it/s]

Epoch [31/40], Step [370/391], Loss: 0.7571


 97%|█████████▋| 381/391 [01:05<00:01,  5.87it/s]

Epoch [31/40], Step [380/391], Loss: 0.7583


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [31/40], Step [390/391], Loss: 0.7469





Test Accuracy of the student model on the test images: 63.40 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.61it/s]

Epoch [32/40], Step [10/391], Loss: 0.7028


  5%|▌         | 21/391 [00:04<01:03,  5.87it/s]

Epoch [32/40], Step [20/391], Loss: 0.7068


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [32/40], Step [30/391], Loss: 0.7067


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [32/40], Step [40/391], Loss: 0.7053


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [32/40], Step [50/391], Loss: 0.7012


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [32/40], Step [60/391], Loss: 0.7136


 18%|█▊        | 71/391 [00:13<00:54,  5.87it/s]

Epoch [32/40], Step [70/391], Loss: 0.7181


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [32/40], Step [80/391], Loss: 0.7107


 23%|██▎       | 91/391 [00:16<00:51,  5.84it/s]

Epoch [32/40], Step [90/391], Loss: 0.7047


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [32/40], Step [100/391], Loss: 0.7093


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [32/40], Step [110/391], Loss: 0.7035


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [32/40], Step [120/391], Loss: 0.7076


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [32/40], Step [130/391], Loss: 0.7212


 36%|███▌      | 141/391 [00:24<00:42,  5.87it/s]

Epoch [32/40], Step [140/391], Loss: 0.7064


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [32/40], Step [150/391], Loss: 0.6870


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [32/40], Step [160/391], Loss: 0.7097


 44%|████▎     | 171/391 [00:30<00:37,  5.87it/s]

Epoch [32/40], Step [170/391], Loss: 0.7054


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [32/40], Step [180/391], Loss: 0.7182


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [32/40], Step [190/391], Loss: 0.7081


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [32/40], Step [200/391], Loss: 0.7184


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [32/40], Step [210/391], Loss: 0.7100


 57%|█████▋    | 221/391 [00:38<00:28,  5.88it/s]

Epoch [32/40], Step [220/391], Loss: 0.7059


 59%|█████▉    | 231/391 [00:40<00:27,  5.87it/s]

Epoch [32/40], Step [230/391], Loss: 0.7033


 62%|██████▏   | 241/391 [00:42<00:25,  5.84it/s]

Epoch [32/40], Step [240/391], Loss: 0.7030


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [32/40], Step [250/391], Loss: 0.6882


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [32/40], Step [260/391], Loss: 0.7111


 69%|██████▉   | 271/391 [00:47<00:20,  5.87it/s]

Epoch [32/40], Step [270/391], Loss: 0.7168


 72%|███████▏  | 281/391 [00:48<00:18,  5.87it/s]

Epoch [32/40], Step [280/391], Loss: 0.7127


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [32/40], Step [290/391], Loss: 0.7076


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [32/40], Step [300/391], Loss: 0.6991


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [32/40], Step [310/391], Loss: 0.7065


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [32/40], Step [320/391], Loss: 0.7163


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [32/40], Step [330/391], Loss: 0.7202


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [32/40], Step [340/391], Loss: 0.7282


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [32/40], Step [350/391], Loss: 0.7239


 92%|█████████▏| 361/391 [01:02<00:05,  5.87it/s]

Epoch [32/40], Step [360/391], Loss: 0.7218


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [32/40], Step [370/391], Loss: 0.7330


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [32/40], Step [380/391], Loss: 0.7241


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [32/40], Step [390/391], Loss: 0.7114





Test Accuracy of the student model on the test images: 63.44 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.58it/s]

Epoch [33/40], Step [10/391], Loss: 0.6975


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [33/40], Step [20/391], Loss: 0.6612


  8%|▊         | 31/391 [00:06<01:01,  5.84it/s]

Epoch [33/40], Step [30/391], Loss: 0.6760


 10%|█         | 41/391 [00:07<00:59,  5.87it/s]

Epoch [33/40], Step [40/391], Loss: 0.6785


 13%|█▎        | 51/391 [00:09<00:58,  5.83it/s]

Epoch [33/40], Step [50/391], Loss: 0.6862


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [33/40], Step [60/391], Loss: 0.6784


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [33/40], Step [70/391], Loss: 0.6673


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [33/40], Step [80/391], Loss: 0.6826


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [33/40], Step [90/391], Loss: 0.6849


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [33/40], Step [100/391], Loss: 0.6698


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [33/40], Step [110/391], Loss: 0.6762


 31%|███       | 121/391 [00:21<00:45,  5.87it/s]

Epoch [33/40], Step [120/391], Loss: 0.6725


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [33/40], Step [130/391], Loss: 0.6801


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [33/40], Step [140/391], Loss: 0.6670


 39%|███▊      | 151/391 [00:26<00:40,  5.87it/s]

Epoch [33/40], Step [150/391], Loss: 0.6770


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [33/40], Step [160/391], Loss: 0.6815


 44%|████▎     | 171/391 [00:30<00:37,  5.84it/s]

Epoch [33/40], Step [170/391], Loss: 0.6619


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [33/40], Step [180/391], Loss: 0.6812


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [33/40], Step [190/391], Loss: 0.6912


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [33/40], Step [200/391], Loss: 0.6868


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [33/40], Step [210/391], Loss: 0.6824


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [33/40], Step [220/391], Loss: 0.6854


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [33/40], Step [230/391], Loss: 0.6987


 62%|██████▏   | 241/391 [00:42<00:25,  5.85it/s]

Epoch [33/40], Step [240/391], Loss: 0.6733


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [33/40], Step [250/391], Loss: 0.6828


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [33/40], Step [260/391], Loss: 0.6774


 69%|██████▉   | 271/391 [00:47<00:20,  5.85it/s]

Epoch [33/40], Step [270/391], Loss: 0.6964


 72%|███████▏  | 281/391 [00:48<00:18,  5.87it/s]

Epoch [33/40], Step [280/391], Loss: 0.6943


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [33/40], Step [290/391], Loss: 0.6960


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [33/40], Step [300/391], Loss: 0.6928


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [33/40], Step [310/391], Loss: 0.6773


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [33/40], Step [320/391], Loss: 0.6815


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [33/40], Step [330/391], Loss: 0.6800


 87%|████████▋ | 341/391 [00:59<00:08,  5.87it/s]

Epoch [33/40], Step [340/391], Loss: 0.6800


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [33/40], Step [350/391], Loss: 0.6844


 92%|█████████▏| 361/391 [01:02<00:05,  5.85it/s]

Epoch [33/40], Step [360/391], Loss: 0.6887


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [33/40], Step [370/391], Loss: 0.6948


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [33/40], Step [380/391], Loss: 0.6827


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [33/40], Step [390/391], Loss: 0.6887





Test Accuracy of the student model on the test images: 63.76 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [34/40], Step [10/391], Loss: 0.6686


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [34/40], Step [20/391], Loss: 0.6570


  8%|▊         | 31/391 [00:06<01:01,  5.81it/s]

Epoch [34/40], Step [30/391], Loss: 0.6493


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [34/40], Step [40/391], Loss: 0.6490


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [34/40], Step [50/391], Loss: 0.6446


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [34/40], Step [60/391], Loss: 0.6471


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [34/40], Step [70/391], Loss: 0.6544


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [34/40], Step [80/391], Loss: 0.6494


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [34/40], Step [90/391], Loss: 0.6460


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [34/40], Step [100/391], Loss: 0.6338


 28%|██▊       | 111/391 [00:19<00:47,  5.88it/s]

Epoch [34/40], Step [110/391], Loss: 0.6462


 31%|███       | 121/391 [00:21<00:46,  5.87it/s]

Epoch [34/40], Step [120/391], Loss: 0.6482


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [34/40], Step [130/391], Loss: 0.6534


 36%|███▌      | 141/391 [00:24<00:42,  5.87it/s]

Epoch [34/40], Step [140/391], Loss: 0.6395


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [34/40], Step [150/391], Loss: 0.6426


 41%|████      | 161/391 [00:28<00:39,  5.83it/s]

Epoch [34/40], Step [160/391], Loss: 0.6581


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [34/40], Step [170/391], Loss: 0.6538


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [34/40], Step [180/391], Loss: 0.6614


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [34/40], Step [190/391], Loss: 0.6470


 51%|█████▏    | 201/391 [00:35<00:32,  5.84it/s]

Epoch [34/40], Step [200/391], Loss: 0.6484


 54%|█████▍    | 211/391 [00:36<00:30,  5.87it/s]

Epoch [34/40], Step [210/391], Loss: 0.6578


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [34/40], Step [220/391], Loss: 0.6556


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [34/40], Step [230/391], Loss: 0.6469


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [34/40], Step [240/391], Loss: 0.6560


 64%|██████▍   | 251/391 [00:43<00:23,  5.87it/s]

Epoch [34/40], Step [250/391], Loss: 0.6445


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [34/40], Step [260/391], Loss: 0.6641


 69%|██████▉   | 271/391 [00:47<00:20,  5.87it/s]

Epoch [34/40], Step [270/391], Loss: 0.6678


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [34/40], Step [280/391], Loss: 0.6399


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [34/40], Step [290/391], Loss: 0.6568


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [34/40], Step [300/391], Loss: 0.6578


 80%|███████▉  | 311/391 [00:53<00:13,  5.86it/s]

Epoch [34/40], Step [310/391], Loss: 0.6421


 82%|████████▏ | 321/391 [00:55<00:11,  5.85it/s]

Epoch [34/40], Step [320/391], Loss: 0.6563


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [34/40], Step [330/391], Loss: 0.6472


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [34/40], Step [340/391], Loss: 0.6462


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [34/40], Step [350/391], Loss: 0.6770


 92%|█████████▏| 361/391 [01:02<00:05,  5.87it/s]

Epoch [34/40], Step [360/391], Loss: 0.6598


 95%|█████████▍| 371/391 [01:04<00:03,  5.85it/s]

Epoch [34/40], Step [370/391], Loss: 0.6337


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [34/40], Step [380/391], Loss: 0.6644


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [34/40], Step [390/391], Loss: 0.6725





Test Accuracy of the student model on the test images: 64.41 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.62it/s]

Epoch [35/40], Step [10/391], Loss: 0.6329


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [35/40], Step [20/391], Loss: 0.6220


  8%|▊         | 31/391 [00:06<01:01,  5.87it/s]

Epoch [35/40], Step [30/391], Loss: 0.6248


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [35/40], Step [40/391], Loss: 0.6289


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [35/40], Step [50/391], Loss: 0.6267


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [35/40], Step [60/391], Loss: 0.6219


 18%|█▊        | 71/391 [00:12<00:54,  5.86it/s]

Epoch [35/40], Step [70/391], Loss: 0.6313


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [35/40], Step [80/391], Loss: 0.6249


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [35/40], Step [90/391], Loss: 0.6404


 26%|██▌       | 101/391 [00:18<00:49,  5.87it/s]

Epoch [35/40], Step [100/391], Loss: 0.6180


 28%|██▊       | 111/391 [00:19<00:47,  5.87it/s]

Epoch [35/40], Step [110/391], Loss: 0.6213


 31%|███       | 121/391 [00:21<00:46,  5.85it/s]

Epoch [35/40], Step [120/391], Loss: 0.6129


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [35/40], Step [130/391], Loss: 0.6347


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [35/40], Step [140/391], Loss: 0.6093


 39%|███▊      | 151/391 [00:26<00:41,  5.85it/s]

Epoch [35/40], Step [150/391], Loss: 0.6391


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [35/40], Step [160/391], Loss: 0.6413


 44%|████▎     | 171/391 [00:29<00:37,  5.85it/s]

Epoch [35/40], Step [170/391], Loss: 0.6327


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [35/40], Step [180/391], Loss: 0.6332


 49%|████▉     | 191/391 [00:33<00:34,  5.85it/s]

Epoch [35/40], Step [190/391], Loss: 0.6234


 51%|█████▏    | 201/391 [00:35<00:32,  5.87it/s]

Epoch [35/40], Step [200/391], Loss: 0.6260


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [35/40], Step [210/391], Loss: 0.6350


 57%|█████▋    | 221/391 [00:38<00:29,  5.85it/s]

Epoch [35/40], Step [220/391], Loss: 0.6188


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [35/40], Step [230/391], Loss: 0.6369


 62%|██████▏   | 241/391 [00:41<00:25,  5.86it/s]

Epoch [35/40], Step [240/391], Loss: 0.6267


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [35/40], Step [250/391], Loss: 0.6394


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [35/40], Step [260/391], Loss: 0.6156


 69%|██████▉   | 271/391 [00:47<00:20,  5.87it/s]

Epoch [35/40], Step [270/391], Loss: 0.6229


 72%|███████▏  | 281/391 [00:48<00:18,  5.87it/s]

Epoch [35/40], Step [280/391], Loss: 0.6423


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [35/40], Step [290/391], Loss: 0.6235


 77%|███████▋  | 301/391 [00:52<00:15,  5.85it/s]

Epoch [35/40], Step [300/391], Loss: 0.6330


 80%|███████▉  | 311/391 [00:53<00:13,  5.86it/s]

Epoch [35/40], Step [310/391], Loss: 0.6333


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [35/40], Step [320/391], Loss: 0.6251


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [35/40], Step [330/391], Loss: 0.6321


 87%|████████▋ | 341/391 [00:58<00:08,  5.86it/s]

Epoch [35/40], Step [340/391], Loss: 0.6303


 90%|████████▉ | 351/391 [01:00<00:06,  5.84it/s]

Epoch [35/40], Step [350/391], Loss: 0.6387


 92%|█████████▏| 361/391 [01:02<00:05,  5.85it/s]

Epoch [35/40], Step [360/391], Loss: 0.6287


 95%|█████████▍| 371/391 [01:04<00:03,  5.85it/s]

Epoch [35/40], Step [370/391], Loss: 0.6298


 97%|█████████▋| 381/391 [01:05<00:01,  5.83it/s]

Epoch [35/40], Step [380/391], Loss: 0.6304


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [35/40], Step [390/391], Loss: 0.6466





Test Accuracy of the student model on the test images: 65.24 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.60it/s]

Epoch [36/40], Step [10/391], Loss: 0.6231


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [36/40], Step [20/391], Loss: 0.6006


  8%|▊         | 31/391 [00:06<01:01,  5.86it/s]

Epoch [36/40], Step [30/391], Loss: 0.6240


 10%|█         | 41/391 [00:07<00:59,  5.87it/s]

Epoch [36/40], Step [40/391], Loss: 0.6135


 13%|█▎        | 51/391 [00:09<00:58,  5.84it/s]

Epoch [36/40], Step [50/391], Loss: 0.6077


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [36/40], Step [60/391], Loss: 0.6119


 18%|█▊        | 71/391 [00:12<00:54,  5.86it/s]

Epoch [36/40], Step [70/391], Loss: 0.5960


 21%|██        | 81/391 [00:14<00:53,  5.85it/s]

Epoch [36/40], Step [80/391], Loss: 0.6012


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [36/40], Step [90/391], Loss: 0.6180


 26%|██▌       | 101/391 [00:18<00:49,  5.87it/s]

Epoch [36/40], Step [100/391], Loss: 0.6091


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [36/40], Step [110/391], Loss: 0.6099


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [36/40], Step [120/391], Loss: 0.5969


 34%|███▎      | 131/391 [00:23<00:44,  5.88it/s]

Epoch [36/40], Step [130/391], Loss: 0.6064


 36%|███▌      | 141/391 [00:24<00:42,  5.87it/s]

Epoch [36/40], Step [140/391], Loss: 0.6217


 39%|███▊      | 151/391 [00:26<00:41,  5.84it/s]

Epoch [36/40], Step [150/391], Loss: 0.6103


 41%|████      | 161/391 [00:28<00:39,  5.87it/s]

Epoch [36/40], Step [160/391], Loss: 0.6114


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [36/40], Step [170/391], Loss: 0.6110


 46%|████▋     | 181/391 [00:31<00:35,  5.84it/s]

Epoch [36/40], Step [180/391], Loss: 0.6022


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [36/40], Step [190/391], Loss: 0.6136


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [36/40], Step [200/391], Loss: 0.6097


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [36/40], Step [210/391], Loss: 0.6026


 57%|█████▋    | 221/391 [00:38<00:29,  5.84it/s]

Epoch [36/40], Step [220/391], Loss: 0.6197


 59%|█████▉    | 231/391 [00:40<00:27,  5.85it/s]

Epoch [36/40], Step [230/391], Loss: 0.6211


 62%|██████▏   | 241/391 [00:41<00:25,  5.86it/s]

Epoch [36/40], Step [240/391], Loss: 0.6192


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [36/40], Step [250/391], Loss: 0.6063


 67%|██████▋   | 261/391 [00:45<00:22,  5.85it/s]

Epoch [36/40], Step [260/391], Loss: 0.6085


 69%|██████▉   | 271/391 [00:47<00:20,  5.82it/s]

Epoch [36/40], Step [270/391], Loss: 0.6093


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [36/40], Step [280/391], Loss: 0.6247


 74%|███████▍  | 291/391 [00:50<00:17,  5.86it/s]

Epoch [36/40], Step [290/391], Loss: 0.6059


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [36/40], Step [300/391], Loss: 0.6078


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [36/40], Step [310/391], Loss: 0.6173


 82%|████████▏ | 321/391 [00:55<00:11,  5.87it/s]

Epoch [36/40], Step [320/391], Loss: 0.6082


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [36/40], Step [330/391], Loss: 0.6135


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [36/40], Step [340/391], Loss: 0.6089


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [36/40], Step [350/391], Loss: 0.6164


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [36/40], Step [360/391], Loss: 0.6115


 95%|█████████▍| 371/391 [01:04<00:03,  5.84it/s]

Epoch [36/40], Step [370/391], Loss: 0.6073


 97%|█████████▋| 381/391 [01:05<00:01,  5.86it/s]

Epoch [36/40], Step [380/391], Loss: 0.6119


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [36/40], Step [390/391], Loss: 0.6290





Test Accuracy of the student model on the test images: 65.67 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.61it/s]

Epoch [37/40], Step [10/391], Loss: 0.6109


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [37/40], Step [20/391], Loss: 0.5960


  8%|▊         | 31/391 [00:06<01:02,  5.79it/s]

Epoch [37/40], Step [30/391], Loss: 0.5930


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [37/40], Step [40/391], Loss: 0.5850


 13%|█▎        | 51/391 [00:09<00:58,  5.86it/s]

Epoch [37/40], Step [50/391], Loss: 0.5995


 16%|█▌        | 61/391 [00:11<00:56,  5.86it/s]

Epoch [37/40], Step [60/391], Loss: 0.5958


 18%|█▊        | 71/391 [00:13<00:54,  5.85it/s]

Epoch [37/40], Step [70/391], Loss: 0.6039


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [37/40], Step [80/391], Loss: 0.6030


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [37/40], Step [90/391], Loss: 0.5949


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [37/40], Step [100/391], Loss: 0.5983


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [37/40], Step [110/391], Loss: 0.6133


 31%|███       | 121/391 [00:21<00:45,  5.87it/s]

Epoch [37/40], Step [120/391], Loss: 0.5793


 34%|███▎      | 131/391 [00:23<00:44,  5.86it/s]

Epoch [37/40], Step [130/391], Loss: 0.5965


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [37/40], Step [140/391], Loss: 0.5913


 39%|███▊      | 151/391 [00:26<00:40,  5.87it/s]

Epoch [37/40], Step [150/391], Loss: 0.5993


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [37/40], Step [160/391], Loss: 0.5874


 44%|████▎     | 171/391 [00:30<00:37,  5.87it/s]

Epoch [37/40], Step [170/391], Loss: 0.5902


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [37/40], Step [180/391], Loss: 0.5758


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [37/40], Step [190/391], Loss: 0.5949


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [37/40], Step [200/391], Loss: 0.6041


 54%|█████▍    | 211/391 [00:36<00:30,  5.84it/s]

Epoch [37/40], Step [210/391], Loss: 0.6016


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [37/40], Step [220/391], Loss: 0.5903


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [37/40], Step [230/391], Loss: 0.5925


 62%|██████▏   | 241/391 [00:42<00:25,  5.87it/s]

Epoch [37/40], Step [240/391], Loss: 0.5900


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [37/40], Step [250/391], Loss: 0.5921


 67%|██████▋   | 261/391 [00:45<00:22,  5.83it/s]

Epoch [37/40], Step [260/391], Loss: 0.6024


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [37/40], Step [270/391], Loss: 0.6016


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [37/40], Step [280/391], Loss: 0.5877


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [37/40], Step [290/391], Loss: 0.6171


 77%|███████▋  | 301/391 [00:52<00:15,  5.88it/s]

Epoch [37/40], Step [300/391], Loss: 0.5997


 80%|███████▉  | 311/391 [00:53<00:13,  5.87it/s]

Epoch [37/40], Step [310/391], Loss: 0.5986


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [37/40], Step [320/391], Loss: 0.5914


 85%|████████▍ | 331/391 [00:57<00:10,  5.86it/s]

Epoch [37/40], Step [330/391], Loss: 0.6108


 87%|████████▋ | 341/391 [00:59<00:08,  5.86it/s]

Epoch [37/40], Step [340/391], Loss: 0.5981


 90%|████████▉ | 351/391 [01:00<00:06,  5.85it/s]

Epoch [37/40], Step [350/391], Loss: 0.5907


 92%|█████████▏| 361/391 [01:02<00:05,  5.85it/s]

Epoch [37/40], Step [360/391], Loss: 0.6024


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [37/40], Step [370/391], Loss: 0.5969


 97%|█████████▋| 381/391 [01:05<00:01,  5.87it/s]

Epoch [37/40], Step [380/391], Loss: 0.5968


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [37/40], Step [390/391], Loss: 0.6016





Test Accuracy of the student model on the test images: 66.07 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.61it/s]

Epoch [38/40], Step [10/391], Loss: 0.5909


  5%|▌         | 21/391 [00:04<01:03,  5.85it/s]

Epoch [38/40], Step [20/391], Loss: 0.5929


  8%|▊         | 31/391 [00:06<01:01,  5.87it/s]

Epoch [38/40], Step [30/391], Loss: 0.5884


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [38/40], Step [40/391], Loss: 0.5834


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [38/40], Step [50/391], Loss: 0.5747


 16%|█▌        | 61/391 [00:11<00:56,  5.84it/s]

Epoch [38/40], Step [60/391], Loss: 0.5841


 18%|█▊        | 71/391 [00:12<00:54,  5.83it/s]

Epoch [38/40], Step [70/391], Loss: 0.5875


 21%|██        | 81/391 [00:14<00:52,  5.87it/s]

Epoch [38/40], Step [80/391], Loss: 0.6018


 23%|██▎       | 91/391 [00:16<00:51,  5.87it/s]

Epoch [38/40], Step [90/391], Loss: 0.5858


 26%|██▌       | 101/391 [00:18<00:49,  5.85it/s]

Epoch [38/40], Step [100/391], Loss: 0.5873


 28%|██▊       | 111/391 [00:19<00:47,  5.84it/s]

Epoch [38/40], Step [110/391], Loss: 0.5895


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [38/40], Step [120/391], Loss: 0.5765


 34%|███▎      | 131/391 [00:23<00:44,  5.85it/s]

Epoch [38/40], Step [130/391], Loss: 0.5776


 36%|███▌      | 141/391 [00:24<00:42,  5.86it/s]

Epoch [38/40], Step [140/391], Loss: 0.5799


 39%|███▊      | 151/391 [00:26<00:41,  5.85it/s]

Epoch [38/40], Step [150/391], Loss: 0.5800


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [38/40], Step [160/391], Loss: 0.5909


 44%|████▎     | 171/391 [00:30<00:37,  5.87it/s]

Epoch [38/40], Step [170/391], Loss: 0.5862


 46%|████▋     | 181/391 [00:31<00:35,  5.87it/s]

Epoch [38/40], Step [180/391], Loss: 0.5806


 49%|████▉     | 191/391 [00:33<00:34,  5.87it/s]

Epoch [38/40], Step [190/391], Loss: 0.5823


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [38/40], Step [200/391], Loss: 0.5750


 54%|█████▍    | 211/391 [00:36<00:30,  5.87it/s]

Epoch [38/40], Step [210/391], Loss: 0.5910


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [38/40], Step [220/391], Loss: 0.5840


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [38/40], Step [230/391], Loss: 0.5815


 62%|██████▏   | 241/391 [00:41<00:25,  5.86it/s]

Epoch [38/40], Step [240/391], Loss: 0.5840


 64%|██████▍   | 251/391 [00:43<00:23,  5.85it/s]

Epoch [38/40], Step [250/391], Loss: 0.5761


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [38/40], Step [260/391], Loss: 0.5855


 69%|██████▉   | 271/391 [00:47<00:20,  5.85it/s]

Epoch [38/40], Step [270/391], Loss: 0.5870


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [38/40], Step [280/391], Loss: 0.5799


 74%|███████▍  | 291/391 [00:50<00:17,  5.85it/s]

Epoch [38/40], Step [290/391], Loss: 0.5780


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [38/40], Step [300/391], Loss: 0.5796


 80%|███████▉  | 311/391 [00:53<00:13,  5.85it/s]

Epoch [38/40], Step [310/391], Loss: 0.5838


 82%|████████▏ | 321/391 [00:55<00:11,  5.84it/s]

Epoch [38/40], Step [320/391], Loss: 0.5860


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [38/40], Step [330/391], Loss: 0.5857


 87%|████████▋ | 341/391 [00:59<00:08,  5.85it/s]

Epoch [38/40], Step [340/391], Loss: 0.5767


 90%|████████▉ | 351/391 [01:00<00:06,  5.86it/s]

Epoch [38/40], Step [350/391], Loss: 0.5641


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [38/40], Step [360/391], Loss: 0.5944


 95%|█████████▍| 371/391 [01:04<00:03,  5.86it/s]

Epoch [38/40], Step [370/391], Loss: 0.5892


 97%|█████████▋| 381/391 [01:05<00:01,  5.87it/s]

Epoch [38/40], Step [380/391], Loss: 0.5837


100%|██████████| 391/391 [01:07<00:00,  5.79it/s]

Epoch [38/40], Step [390/391], Loss: 0.5822





Test Accuracy of the student model on the test images: 65.98 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:07,  5.61it/s]

Epoch [39/40], Step [10/391], Loss: 0.5851


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [39/40], Step [20/391], Loss: 0.5753


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [39/40], Step [30/391], Loss: 0.5756


 10%|█         | 41/391 [00:07<00:59,  5.85it/s]

Epoch [39/40], Step [40/391], Loss: 0.5742


 13%|█▎        | 51/391 [00:09<00:58,  5.84it/s]

Epoch [39/40], Step [50/391], Loss: 0.5767


 16%|█▌        | 61/391 [00:11<00:56,  5.85it/s]

Epoch [39/40], Step [60/391], Loss: 0.5613


 18%|█▊        | 71/391 [00:13<00:54,  5.86it/s]

Epoch [39/40], Step [70/391], Loss: 0.5801


 21%|██        | 81/391 [00:14<00:52,  5.85it/s]

Epoch [39/40], Step [80/391], Loss: 0.5735


 23%|██▎       | 91/391 [00:16<00:51,  5.86it/s]

Epoch [39/40], Step [90/391], Loss: 0.5840


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [39/40], Step [100/391], Loss: 0.5857


 28%|██▊       | 111/391 [00:19<00:47,  5.86it/s]

Epoch [39/40], Step [110/391], Loss: 0.5706


 31%|███       | 121/391 [00:21<00:46,  5.82it/s]

Epoch [39/40], Step [120/391], Loss: 0.5768


 34%|███▎      | 131/391 [00:23<00:44,  5.83it/s]

Epoch [39/40], Step [130/391], Loss: 0.5775


 36%|███▌      | 141/391 [00:24<00:42,  5.87it/s]

Epoch [39/40], Step [140/391], Loss: 0.5758


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [39/40], Step [150/391], Loss: 0.5729


 41%|████      | 161/391 [00:28<00:39,  5.86it/s]

Epoch [39/40], Step [160/391], Loss: 0.5748


 44%|████▎     | 171/391 [00:30<00:37,  5.82it/s]

Epoch [39/40], Step [170/391], Loss: 0.5717


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [39/40], Step [180/391], Loss: 0.5735


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [39/40], Step [190/391], Loss: 0.5912


 51%|█████▏    | 201/391 [00:35<00:32,  5.86it/s]

Epoch [39/40], Step [200/391], Loss: 0.5893


 54%|█████▍    | 211/391 [00:36<00:30,  5.85it/s]

Epoch [39/40], Step [210/391], Loss: 0.5786


 57%|█████▋    | 221/391 [00:38<00:28,  5.87it/s]

Epoch [39/40], Step [220/391], Loss: 0.5852


 59%|█████▉    | 231/391 [00:40<00:27,  5.86it/s]

Epoch [39/40], Step [230/391], Loss: 0.5567


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [39/40], Step [240/391], Loss: 0.5778


 64%|██████▍   | 251/391 [00:43<00:23,  5.84it/s]

Epoch [39/40], Step [250/391], Loss: 0.5902


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [39/40], Step [260/391], Loss: 0.5845


 69%|██████▉   | 271/391 [00:47<00:20,  5.86it/s]

Epoch [39/40], Step [270/391], Loss: 0.5797


 72%|███████▏  | 281/391 [00:48<00:18,  5.86it/s]

Epoch [39/40], Step [280/391], Loss: 0.5927


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [39/40], Step [290/391], Loss: 0.5852


 77%|███████▋  | 301/391 [00:52<00:15,  5.87it/s]

Epoch [39/40], Step [300/391], Loss: 0.5775


 80%|███████▉  | 311/391 [00:53<00:13,  5.85it/s]

Epoch [39/40], Step [310/391], Loss: 0.5823


 82%|████████▏ | 321/391 [00:55<00:11,  5.86it/s]

Epoch [39/40], Step [320/391], Loss: 0.5732


 85%|████████▍ | 331/391 [00:57<00:10,  5.84it/s]

Epoch [39/40], Step [330/391], Loss: 0.5849


 87%|████████▋ | 341/391 [00:59<00:08,  5.79it/s]

Epoch [39/40], Step [340/391], Loss: 0.5767


 90%|████████▉ | 351/391 [01:00<00:06,  5.83it/s]

Epoch [39/40], Step [350/391], Loss: 0.5808


 92%|█████████▏| 361/391 [01:02<00:05,  5.86it/s]

Epoch [39/40], Step [360/391], Loss: 0.5846


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [39/40], Step [370/391], Loss: 0.5770


 97%|█████████▋| 381/391 [01:05<00:01,  5.87it/s]

Epoch [39/40], Step [380/391], Loss: 0.5706


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [39/40], Step [390/391], Loss: 0.5816





Test Accuracy of the student model on the test images: 66.12 %
Saved best model to interim_ta2.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:08,  5.58it/s]

Epoch [40/40], Step [10/391], Loss: 0.5801


  5%|▌         | 21/391 [00:04<01:03,  5.86it/s]

Epoch [40/40], Step [20/391], Loss: 0.5723


  8%|▊         | 31/391 [00:06<01:01,  5.85it/s]

Epoch [40/40], Step [30/391], Loss: 0.5770


 10%|█         | 41/391 [00:07<00:59,  5.86it/s]

Epoch [40/40], Step [40/391], Loss: 0.5625


 13%|█▎        | 51/391 [00:09<00:58,  5.85it/s]

Epoch [40/40], Step [50/391], Loss: 0.5793


 16%|█▌        | 61/391 [00:11<00:56,  5.87it/s]

Epoch [40/40], Step [60/391], Loss: 0.5632


 18%|█▊        | 71/391 [00:13<00:54,  5.87it/s]

Epoch [40/40], Step [70/391], Loss: 0.5844


 21%|██        | 81/391 [00:14<00:52,  5.86it/s]

Epoch [40/40], Step [80/391], Loss: 0.5708


 23%|██▎       | 91/391 [00:16<00:51,  5.85it/s]

Epoch [40/40], Step [90/391], Loss: 0.5783


 26%|██▌       | 101/391 [00:18<00:49,  5.86it/s]

Epoch [40/40], Step [100/391], Loss: 0.5755


 28%|██▊       | 111/391 [00:19<00:47,  5.85it/s]

Epoch [40/40], Step [110/391], Loss: 0.5741


 31%|███       | 121/391 [00:21<00:46,  5.86it/s]

Epoch [40/40], Step [120/391], Loss: 0.5731


 34%|███▎      | 131/391 [00:23<00:44,  5.87it/s]

Epoch [40/40], Step [130/391], Loss: 0.5747


 36%|███▌      | 141/391 [00:25<00:42,  5.86it/s]

Epoch [40/40], Step [140/391], Loss: 0.5654


 39%|███▊      | 151/391 [00:26<00:40,  5.86it/s]

Epoch [40/40], Step [150/391], Loss: 0.5702


 41%|████      | 161/391 [00:28<00:39,  5.85it/s]

Epoch [40/40], Step [160/391], Loss: 0.5702


 44%|████▎     | 171/391 [00:30<00:37,  5.86it/s]

Epoch [40/40], Step [170/391], Loss: 0.5728


 46%|████▋     | 181/391 [00:31<00:35,  5.86it/s]

Epoch [40/40], Step [180/391], Loss: 0.5740


 49%|████▉     | 191/391 [00:33<00:34,  5.86it/s]

Epoch [40/40], Step [190/391], Loss: 0.5858


 51%|█████▏    | 201/391 [00:35<00:32,  5.85it/s]

Epoch [40/40], Step [200/391], Loss: 0.5754


 54%|█████▍    | 211/391 [00:36<00:30,  5.86it/s]

Epoch [40/40], Step [210/391], Loss: 0.5694


 57%|█████▋    | 221/391 [00:38<00:29,  5.86it/s]

Epoch [40/40], Step [220/391], Loss: 0.5685


 59%|█████▉    | 231/391 [00:40<00:27,  5.87it/s]

Epoch [40/40], Step [230/391], Loss: 0.5623


 62%|██████▏   | 241/391 [00:42<00:25,  5.86it/s]

Epoch [40/40], Step [240/391], Loss: 0.5798


 64%|██████▍   | 251/391 [00:43<00:23,  5.86it/s]

Epoch [40/40], Step [250/391], Loss: 0.5784


 67%|██████▋   | 261/391 [00:45<00:22,  5.86it/s]

Epoch [40/40], Step [260/391], Loss: 0.5775


 69%|██████▉   | 271/391 [00:47<00:20,  5.87it/s]

Epoch [40/40], Step [270/391], Loss: 0.5663


 72%|███████▏  | 281/391 [00:48<00:18,  5.85it/s]

Epoch [40/40], Step [280/391], Loss: 0.5699


 74%|███████▍  | 291/391 [00:50<00:17,  5.87it/s]

Epoch [40/40], Step [290/391], Loss: 0.5784


 77%|███████▋  | 301/391 [00:52<00:15,  5.86it/s]

Epoch [40/40], Step [300/391], Loss: 0.5670


 80%|███████▉  | 311/391 [00:54<00:13,  5.86it/s]

Epoch [40/40], Step [310/391], Loss: 0.5620


 82%|████████▏ | 321/391 [00:55<00:12,  5.82it/s]

Epoch [40/40], Step [320/391], Loss: 0.5630


 85%|████████▍ | 331/391 [00:57<00:10,  5.87it/s]

Epoch [40/40], Step [330/391], Loss: 0.5674


 87%|████████▋ | 341/391 [00:59<00:08,  5.84it/s]

Epoch [40/40], Step [340/391], Loss: 0.5756


 90%|████████▉ | 351/391 [01:00<00:06,  5.88it/s]

Epoch [40/40], Step [350/391], Loss: 0.5700


 92%|█████████▏| 361/391 [01:02<00:05,  5.87it/s]

Epoch [40/40], Step [360/391], Loss: 0.5695


 95%|█████████▍| 371/391 [01:04<00:03,  5.87it/s]

Epoch [40/40], Step [370/391], Loss: 0.5808


 97%|█████████▋| 381/391 [01:05<00:01,  5.85it/s]

Epoch [40/40], Step [380/391], Loss: 0.5837


100%|██████████| 391/391 [01:07<00:00,  5.78it/s]

Epoch [40/40], Step [390/391], Loss: 0.5797





Test Accuracy of the student model on the test images: 66.09 %
Best Accuracy: 66.12 %
TA3
Adjusted learning rate: 0.1


  3%|▎         | 11/391 [00:02<01:03,  6.01it/s]

Epoch [1/40], Step [10/391], Loss: 6.7865


  5%|▌         | 21/391 [00:04<00:58,  6.31it/s]

Epoch [1/40], Step [20/391], Loss: 6.7238


  8%|▊         | 31/391 [00:05<00:56,  6.32it/s]

Epoch [1/40], Step [30/391], Loss: 6.4367


 10%|█         | 41/391 [00:07<00:55,  6.32it/s]

Epoch [1/40], Step [40/391], Loss: 6.3017


 13%|█▎        | 51/391 [00:08<00:53,  6.34it/s]

Epoch [1/40], Step [50/391], Loss: 5.8149


 16%|█▌        | 61/391 [00:10<00:52,  6.33it/s]

Epoch [1/40], Step [60/391], Loss: 5.4756


 18%|█▊        | 71/391 [00:12<00:50,  6.33it/s]

Epoch [1/40], Step [70/391], Loss: 5.3212


 21%|██        | 81/391 [00:13<00:49,  6.32it/s]

Epoch [1/40], Step [80/391], Loss: 5.1246


 23%|██▎       | 91/391 [00:15<00:47,  6.33it/s]

Epoch [1/40], Step [90/391], Loss: 5.1334


 26%|██▌       | 101/391 [00:16<00:45,  6.33it/s]

Epoch [1/40], Step [100/391], Loss: 4.9601


 28%|██▊       | 111/391 [00:18<00:44,  6.33it/s]

Epoch [1/40], Step [110/391], Loss: 4.9241


 31%|███       | 121/391 [00:20<00:42,  6.33it/s]

Epoch [1/40], Step [120/391], Loss: 4.7220


 34%|███▎      | 131/391 [00:21<00:41,  6.33it/s]

Epoch [1/40], Step [130/391], Loss: 4.8263


 36%|███▌      | 141/391 [00:23<00:39,  6.32it/s]

Epoch [1/40], Step [140/391], Loss: 4.5875


 39%|███▊      | 151/391 [00:24<00:37,  6.33it/s]

Epoch [1/40], Step [150/391], Loss: 4.5995


 41%|████      | 161/391 [00:26<00:36,  6.32it/s]

Epoch [1/40], Step [160/391], Loss: 4.5070


 44%|████▎     | 171/391 [00:27<00:34,  6.33it/s]

Epoch [1/40], Step [170/391], Loss: 4.6180


 46%|████▋     | 181/391 [00:29<00:33,  6.32it/s]

Epoch [1/40], Step [180/391], Loss: 4.4456


 49%|████▉     | 191/391 [00:31<00:31,  6.31it/s]

Epoch [1/40], Step [190/391], Loss: 4.4244


 51%|█████▏    | 201/391 [00:32<00:30,  6.33it/s]

Epoch [1/40], Step [200/391], Loss: 4.4812


 54%|█████▍    | 211/391 [00:34<00:28,  6.31it/s]

Epoch [1/40], Step [210/391], Loss: 4.4662


 57%|█████▋    | 221/391 [00:35<00:26,  6.32it/s]

Epoch [1/40], Step [220/391], Loss: 4.3124


 59%|█████▉    | 231/391 [00:37<00:25,  6.30it/s]

Epoch [1/40], Step [230/391], Loss: 4.2995


 62%|██████▏   | 241/391 [00:39<00:23,  6.33it/s]

Epoch [1/40], Step [240/391], Loss: 4.3815


 64%|██████▍   | 251/391 [00:40<00:22,  6.33it/s]

Epoch [1/40], Step [250/391], Loss: 4.1554


 67%|██████▋   | 261/391 [00:42<00:20,  6.32it/s]

Epoch [1/40], Step [260/391], Loss: 4.2582


 69%|██████▉   | 271/391 [00:43<00:19,  6.31it/s]

Epoch [1/40], Step [270/391], Loss: 4.3879


 72%|███████▏  | 281/391 [00:45<00:17,  6.33it/s]

Epoch [1/40], Step [280/391], Loss: 4.2305


 74%|███████▍  | 291/391 [00:46<00:15,  6.33it/s]

Epoch [1/40], Step [290/391], Loss: 4.2373


 77%|███████▋  | 301/391 [00:48<00:14,  6.33it/s]

Epoch [1/40], Step [300/391], Loss: 4.1613


 80%|███████▉  | 311/391 [00:50<00:12,  6.32it/s]

Epoch [1/40], Step [310/391], Loss: 4.0792


 82%|████████▏ | 321/391 [00:51<00:11,  6.30it/s]

Epoch [1/40], Step [320/391], Loss: 4.1129


 85%|████████▍ | 331/391 [00:53<00:09,  6.34it/s]

Epoch [1/40], Step [330/391], Loss: 4.1865


 87%|████████▋ | 341/391 [00:54<00:07,  6.32it/s]

Epoch [1/40], Step [340/391], Loss: 4.1278


 90%|████████▉ | 351/391 [00:56<00:06,  6.33it/s]

Epoch [1/40], Step [350/391], Loss: 4.1565


 92%|█████████▏| 361/391 [00:57<00:04,  6.31it/s]

Epoch [1/40], Step [360/391], Loss: 4.0401


 95%|█████████▍| 371/391 [00:59<00:03,  6.34it/s]

Epoch [1/40], Step [370/391], Loss: 4.0932


 97%|█████████▋| 381/391 [01:01<00:01,  6.33it/s]

Epoch [1/40], Step [380/391], Loss: 3.9800


100%|██████████| 391/391 [01:02<00:00,  6.23it/s]

Epoch [1/40], Step [390/391], Loss: 3.9923





Test Accuracy of the student model on the test images: 11.47 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.79it/s]

Epoch [2/40], Step [10/391], Loss: 5.6392


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [2/40], Step [20/391], Loss: 5.5853


  8%|▊         | 31/391 [00:06<00:59,  6.07it/s]

Epoch [2/40], Step [30/391], Loss: 5.3265


 10%|█         | 41/391 [00:07<00:58,  6.03it/s]

Epoch [2/40], Step [40/391], Loss: 5.0656


 13%|█▎        | 51/391 [00:09<00:56,  6.05it/s]

Epoch [2/40], Step [50/391], Loss: 4.7750


 16%|█▌        | 61/391 [00:11<00:54,  6.05it/s]

Epoch [2/40], Step [60/391], Loss: 4.7428


 18%|█▊        | 71/391 [00:12<00:52,  6.04it/s]

Epoch [2/40], Step [70/391], Loss: 4.5988


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [2/40], Step [80/391], Loss: 4.5849


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [2/40], Step [90/391], Loss: 4.4992


 26%|██▌       | 101/391 [00:17<00:48,  6.02it/s]

Epoch [2/40], Step [100/391], Loss: 4.4931


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [2/40], Step [110/391], Loss: 4.2690


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [2/40], Step [120/391], Loss: 4.1595


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [2/40], Step [130/391], Loss: 4.1835


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [2/40], Step [140/391], Loss: 4.1689


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [2/40], Step [150/391], Loss: 4.1305


 41%|████      | 161/391 [00:27<00:37,  6.07it/s]

Epoch [2/40], Step [160/391], Loss: 4.1239


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [2/40], Step [170/391], Loss: 4.0577


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [2/40], Step [180/391], Loss: 3.9004


 49%|████▉     | 191/391 [00:32<00:32,  6.08it/s]

Epoch [2/40], Step [190/391], Loss: 4.0178


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [2/40], Step [200/391], Loss: 3.8520


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [2/40], Step [210/391], Loss: 3.8466


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [2/40], Step [220/391], Loss: 3.8828


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [2/40], Step [230/391], Loss: 3.8014


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [2/40], Step [240/391], Loss: 3.7302


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [2/40], Step [250/391], Loss: 3.7603


 67%|██████▋   | 261/391 [00:43<00:21,  6.07it/s]

Epoch [2/40], Step [260/391], Loss: 3.7660


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [2/40], Step [270/391], Loss: 3.7204


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [2/40], Step [280/391], Loss: 3.5797


 74%|███████▍  | 291/391 [00:48<00:16,  6.07it/s]

Epoch [2/40], Step [290/391], Loss: 3.6360


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [2/40], Step [300/391], Loss: 3.6802


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [2/40], Step [310/391], Loss: 3.6328


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [2/40], Step [320/391], Loss: 3.5893


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [2/40], Step [330/391], Loss: 3.6165


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [2/40], Step [340/391], Loss: 3.5389


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [2/40], Step [350/391], Loss: 3.4204


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [2/40], Step [360/391], Loss: 3.4786


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [2/40], Step [370/391], Loss: 3.4647


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [2/40], Step [380/391], Loss: 3.4599


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [2/40], Step [390/391], Loss: 3.5345





Test Accuracy of the student model on the test images: 17.79 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [3/40], Step [10/391], Loss: 3.4512


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [3/40], Step [20/391], Loss: 3.3901


  8%|▊         | 31/391 [00:05<00:59,  6.06it/s]

Epoch [3/40], Step [30/391], Loss: 3.3431


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [3/40], Step [40/391], Loss: 3.2419


 13%|█▎        | 51/391 [00:09<00:55,  6.07it/s]

Epoch [3/40], Step [50/391], Loss: 3.3808


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [3/40], Step [60/391], Loss: 3.2756


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [3/40], Step [70/391], Loss: 3.2987


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [3/40], Step [80/391], Loss: 3.2569


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [3/40], Step [90/391], Loss: 3.2060


 26%|██▌       | 101/391 [00:17<00:47,  6.07it/s]

Epoch [3/40], Step [100/391], Loss: 3.2934


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [3/40], Step [110/391], Loss: 3.3553


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [3/40], Step [120/391], Loss: 3.1764


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [3/40], Step [130/391], Loss: 3.2000


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [3/40], Step [140/391], Loss: 3.2251


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [3/40], Step [150/391], Loss: 3.2063


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [3/40], Step [160/391], Loss: 3.0376


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [3/40], Step [170/391], Loss: 3.1253


 46%|████▋     | 181/391 [00:30<00:34,  6.05it/s]

Epoch [3/40], Step [180/391], Loss: 3.0773


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [3/40], Step [190/391], Loss: 3.0914


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [3/40], Step [200/391], Loss: 3.0745


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [3/40], Step [210/391], Loss: 3.0722


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [3/40], Step [220/391], Loss: 2.9748


 59%|█████▉    | 231/391 [00:38<00:26,  6.07it/s]

Epoch [3/40], Step [230/391], Loss: 3.0936


 62%|██████▏   | 241/391 [00:40<00:24,  6.07it/s]

Epoch [3/40], Step [240/391], Loss: 2.9344


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [3/40], Step [250/391], Loss: 3.0215


 67%|██████▋   | 261/391 [00:43<00:21,  6.06it/s]

Epoch [3/40], Step [260/391], Loss: 2.9348


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [3/40], Step [270/391], Loss: 2.9966


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [3/40], Step [280/391], Loss: 2.9743


 74%|███████▍  | 291/391 [00:48<00:16,  6.07it/s]

Epoch [3/40], Step [290/391], Loss: 3.0778


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [3/40], Step [300/391], Loss: 3.0391


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [3/40], Step [310/391], Loss: 2.9725


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [3/40], Step [320/391], Loss: 2.9199


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [3/40], Step [330/391], Loss: 2.9296


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [3/40], Step [340/391], Loss: 2.9407


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [3/40], Step [350/391], Loss: 2.9582


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [3/40], Step [360/391], Loss: 2.8822


 95%|█████████▍| 371/391 [01:02<00:03,  6.03it/s]

Epoch [3/40], Step [370/391], Loss: 2.8594


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [3/40], Step [380/391], Loss: 2.9075


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [3/40], Step [390/391], Loss: 2.8725





Test Accuracy of the student model on the test images: 27.98 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.76it/s]

Epoch [4/40], Step [10/391], Loss: 2.7720


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [4/40], Step [20/391], Loss: 2.7109


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [4/40], Step [30/391], Loss: 2.8061


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [4/40], Step [40/391], Loss: 2.7590


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [4/40], Step [50/391], Loss: 2.7452


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [4/40], Step [60/391], Loss: 2.7902


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [4/40], Step [70/391], Loss: 2.7861


 21%|██        | 81/391 [00:14<00:51,  6.08it/s]

Epoch [4/40], Step [80/391], Loss: 2.7210


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [4/40], Step [90/391], Loss: 2.7652


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [4/40], Step [100/391], Loss: 2.6319


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [4/40], Step [110/391], Loss: 2.7104


 31%|███       | 121/391 [00:20<00:44,  6.08it/s]

Epoch [4/40], Step [120/391], Loss: 2.7071


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [4/40], Step [130/391], Loss: 2.6629


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [4/40], Step [140/391], Loss: 2.6282


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [4/40], Step [150/391], Loss: 2.6492


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [4/40], Step [160/391], Loss: 2.6571


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [4/40], Step [170/391], Loss: 2.6785


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [4/40], Step [180/391], Loss: 2.5766


 49%|████▉     | 191/391 [00:32<00:33,  6.05it/s]

Epoch [4/40], Step [190/391], Loss: 2.5403


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [4/40], Step [200/391], Loss: 2.6480


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [4/40], Step [210/391], Loss: 2.6116


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [4/40], Step [220/391], Loss: 2.5057


 59%|█████▉    | 231/391 [00:39<00:26,  6.07it/s]

Epoch [4/40], Step [230/391], Loss: 2.5685


 62%|██████▏   | 241/391 [00:40<00:24,  6.07it/s]

Epoch [4/40], Step [240/391], Loss: 2.5656


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [4/40], Step [250/391], Loss: 2.5865


 67%|██████▋   | 261/391 [00:43<00:21,  6.06it/s]

Epoch [4/40], Step [260/391], Loss: 2.4979


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [4/40], Step [270/391], Loss: 2.5138


 72%|███████▏  | 281/391 [00:47<00:18,  6.04it/s]

Epoch [4/40], Step [280/391], Loss: 2.5021


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [4/40], Step [290/391], Loss: 2.5520


 77%|███████▋  | 301/391 [00:50<00:14,  6.08it/s]

Epoch [4/40], Step [300/391], Loss: 2.6247


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [4/40], Step [310/391], Loss: 2.4543


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [4/40], Step [320/391], Loss: 2.5426


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [4/40], Step [330/391], Loss: 2.5537


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [4/40], Step [340/391], Loss: 2.4796


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [4/40], Step [350/391], Loss: 2.4296


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [4/40], Step [360/391], Loss: 2.5406


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [4/40], Step [370/391], Loss: 2.4980


 97%|█████████▋| 381/391 [01:03<00:01,  6.07it/s]

Epoch [4/40], Step [380/391], Loss: 2.5349


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [4/40], Step [390/391], Loss: 2.4971





Test Accuracy of the student model on the test images: 29.96 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.76it/s]

Epoch [5/40], Step [10/391], Loss: 2.4294


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [5/40], Step [20/391], Loss: 2.4353


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [5/40], Step [30/391], Loss: 2.3703


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [5/40], Step [40/391], Loss: 2.3706


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [5/40], Step [50/391], Loss: 2.3454


 16%|█▌        | 61/391 [00:11<00:54,  6.07it/s]

Epoch [5/40], Step [60/391], Loss: 2.3565


 18%|█▊        | 71/391 [00:12<00:53,  6.04it/s]

Epoch [5/40], Step [70/391], Loss: 2.3639


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [5/40], Step [80/391], Loss: 2.3811


 23%|██▎       | 91/391 [00:16<00:49,  6.06it/s]

Epoch [5/40], Step [90/391], Loss: 2.4318


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [5/40], Step [100/391], Loss: 2.3253


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [5/40], Step [110/391], Loss: 2.2647


 31%|███       | 121/391 [00:20<00:44,  6.04it/s]

Epoch [5/40], Step [120/391], Loss: 2.3740


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [5/40], Step [130/391], Loss: 2.3826


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [5/40], Step [140/391], Loss: 2.3167


 39%|███▊      | 151/391 [00:25<00:39,  6.04it/s]

Epoch [5/40], Step [150/391], Loss: 2.3175


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [5/40], Step [160/391], Loss: 2.3472


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [5/40], Step [170/391], Loss: 2.3684


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [5/40], Step [180/391], Loss: 2.3555


 49%|████▉     | 191/391 [00:32<00:32,  6.06it/s]

Epoch [5/40], Step [190/391], Loss: 2.3036


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [5/40], Step [200/391], Loss: 2.3811


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [5/40], Step [210/391], Loss: 2.3667


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [5/40], Step [220/391], Loss: 2.2914


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [5/40], Step [230/391], Loss: 2.3383


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [5/40], Step [240/391], Loss: 2.2548


 64%|██████▍   | 251/391 [00:42<00:23,  6.04it/s]

Epoch [5/40], Step [250/391], Loss: 2.2957


 67%|██████▋   | 261/391 [00:44<00:21,  6.05it/s]

Epoch [5/40], Step [260/391], Loss: 2.2568


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [5/40], Step [270/391], Loss: 2.2899


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [5/40], Step [280/391], Loss: 2.3255


 74%|███████▍  | 291/391 [00:49<00:16,  6.06it/s]

Epoch [5/40], Step [290/391], Loss: 2.2494


 77%|███████▋  | 301/391 [00:50<00:14,  6.07it/s]

Epoch [5/40], Step [300/391], Loss: 2.2055


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [5/40], Step [310/391], Loss: 2.2349


 82%|████████▏ | 321/391 [00:53<00:11,  6.07it/s]

Epoch [5/40], Step [320/391], Loss: 2.2005


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [5/40], Step [330/391], Loss: 2.2131


 87%|████████▋ | 341/391 [00:57<00:08,  6.08it/s]

Epoch [5/40], Step [340/391], Loss: 2.2216


 90%|████████▉ | 351/391 [00:58<00:06,  6.04it/s]

Epoch [5/40], Step [350/391], Loss: 2.2334


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [5/40], Step [360/391], Loss: 2.1626


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [5/40], Step [370/391], Loss: 2.2438


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [5/40], Step [380/391], Loss: 2.1961


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [5/40], Step [390/391], Loss: 2.3197





Test Accuracy of the student model on the test images: 30.79 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.81it/s]

Epoch [6/40], Step [10/391], Loss: 2.1344


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [6/40], Step [20/391], Loss: 2.1199


  8%|▊         | 31/391 [00:05<00:59,  6.07it/s]

Epoch [6/40], Step [30/391], Loss: 2.1032


 10%|█         | 41/391 [00:07<00:57,  6.05it/s]

Epoch [6/40], Step [40/391], Loss: 2.1007


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [6/40], Step [50/391], Loss: 2.1530


 16%|█▌        | 61/391 [00:10<00:54,  6.05it/s]

Epoch [6/40], Step [60/391], Loss: 2.1027


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [6/40], Step [70/391], Loss: 2.2116


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [6/40], Step [80/391], Loss: 2.1446


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [6/40], Step [90/391], Loss: 2.1046


 26%|██▌       | 101/391 [00:17<00:47,  6.07it/s]

Epoch [6/40], Step [100/391], Loss: 2.0531


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [6/40], Step [110/391], Loss: 2.0972


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [6/40], Step [120/391], Loss: 2.0709


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [6/40], Step [130/391], Loss: 2.1275


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [6/40], Step [140/391], Loss: 2.1117


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [6/40], Step [150/391], Loss: 2.0973


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [6/40], Step [160/391], Loss: 2.0940


 44%|████▎     | 171/391 [00:28<00:36,  6.05it/s]

Epoch [6/40], Step [170/391], Loss: 2.1443


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [6/40], Step [180/391], Loss: 2.1558


 49%|████▉     | 191/391 [00:32<00:32,  6.06it/s]

Epoch [6/40], Step [190/391], Loss: 2.0700


 51%|█████▏    | 201/391 [00:33<00:31,  6.06it/s]

Epoch [6/40], Step [200/391], Loss: 2.1236


 54%|█████▍    | 211/391 [00:35<00:29,  6.08it/s]

Epoch [6/40], Step [210/391], Loss: 2.0767


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [6/40], Step [220/391], Loss: 2.0965


 59%|█████▉    | 231/391 [00:38<00:26,  6.06it/s]

Epoch [6/40], Step [230/391], Loss: 2.0761


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [6/40], Step [240/391], Loss: 2.1320


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [6/40], Step [250/391], Loss: 2.0480


 67%|██████▋   | 261/391 [00:43<00:21,  6.06it/s]

Epoch [6/40], Step [260/391], Loss: 2.0705


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [6/40], Step [270/391], Loss: 2.1021


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [6/40], Step [280/391], Loss: 2.1105


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [6/40], Step [290/391], Loss: 2.0783


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [6/40], Step [300/391], Loss: 2.1006


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [6/40], Step [310/391], Loss: 2.0667


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [6/40], Step [320/391], Loss: 2.0640


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [6/40], Step [330/391], Loss: 2.0319


 87%|████████▋ | 341/391 [00:57<00:08,  6.03it/s]

Epoch [6/40], Step [340/391], Loss: 2.0775


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [6/40], Step [350/391], Loss: 2.0921


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [6/40], Step [360/391], Loss: 2.0504


 95%|█████████▍| 371/391 [01:01<00:03,  6.07it/s]

Epoch [6/40], Step [370/391], Loss: 2.0633


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [6/40], Step [380/391], Loss: 2.0365


100%|██████████| 391/391 [01:05<00:00,  5.99it/s]

Epoch [6/40], Step [390/391], Loss: 2.0568





Test Accuracy of the student model on the test images: 32.05 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.79it/s]

Epoch [7/40], Step [10/391], Loss: 1.9781


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [7/40], Step [20/391], Loss: 1.9706


  8%|▊         | 31/391 [00:05<00:59,  6.06it/s]

Epoch [7/40], Step [30/391], Loss: 1.9960


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [7/40], Step [40/391], Loss: 1.9776


 13%|█▎        | 51/391 [00:09<00:56,  6.07it/s]

Epoch [7/40], Step [50/391], Loss: 1.9372


 16%|█▌        | 61/391 [00:10<00:54,  6.05it/s]

Epoch [7/40], Step [60/391], Loss: 1.9292


 18%|█▊        | 71/391 [00:12<00:52,  6.05it/s]

Epoch [7/40], Step [70/391], Loss: 1.9271


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [7/40], Step [80/391], Loss: 2.0134


 23%|██▎       | 91/391 [00:15<00:49,  6.05it/s]

Epoch [7/40], Step [90/391], Loss: 1.9948


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [7/40], Step [100/391], Loss: 1.9809


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [7/40], Step [110/391], Loss: 1.9850


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [7/40], Step [120/391], Loss: 1.9829


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [7/40], Step [130/391], Loss: 1.9980


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [7/40], Step [140/391], Loss: 1.9810


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [7/40], Step [150/391], Loss: 1.9727


 41%|████      | 161/391 [00:27<00:37,  6.07it/s]

Epoch [7/40], Step [160/391], Loss: 1.9657


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [7/40], Step [170/391], Loss: 1.9643


 46%|████▋     | 181/391 [00:30<00:34,  6.05it/s]

Epoch [7/40], Step [180/391], Loss: 1.9941


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [7/40], Step [190/391], Loss: 1.9179


 51%|█████▏    | 201/391 [00:33<00:31,  6.05it/s]

Epoch [7/40], Step [200/391], Loss: 1.9708


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [7/40], Step [210/391], Loss: 1.9617


 57%|█████▋    | 221/391 [00:37<00:27,  6.07it/s]

Epoch [7/40], Step [220/391], Loss: 1.9518


 59%|█████▉    | 231/391 [00:38<00:26,  6.05it/s]

Epoch [7/40], Step [230/391], Loss: 1.9250


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [7/40], Step [240/391], Loss: 1.8706


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [7/40], Step [250/391], Loss: 1.9046


 67%|██████▋   | 261/391 [00:43<00:21,  6.06it/s]

Epoch [7/40], Step [260/391], Loss: 1.9710


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [7/40], Step [270/391], Loss: 1.9370


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [7/40], Step [280/391], Loss: 2.0059


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [7/40], Step [290/391], Loss: 1.9626


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [7/40], Step [300/391], Loss: 1.9654


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [7/40], Step [310/391], Loss: 1.9454


 82%|████████▏ | 321/391 [00:53<00:11,  6.08it/s]

Epoch [7/40], Step [320/391], Loss: 1.9346


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [7/40], Step [330/391], Loss: 1.8973


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [7/40], Step [340/391], Loss: 1.9902


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [7/40], Step [350/391], Loss: 1.9303


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [7/40], Step [360/391], Loss: 1.9181


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [7/40], Step [370/391], Loss: 1.8870


 97%|█████████▋| 381/391 [01:03<00:01,  6.04it/s]

Epoch [7/40], Step [380/391], Loss: 1.9248


100%|██████████| 391/391 [01:05<00:00,  5.99it/s]

Epoch [7/40], Step [390/391], Loss: 1.9044





Test Accuracy of the student model on the test images: 36.46 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.79it/s]

Epoch [8/40], Step [10/391], Loss: 1.8588


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [8/40], Step [20/391], Loss: 1.9056


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [8/40], Step [30/391], Loss: 1.8786


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [8/40], Step [40/391], Loss: 1.7966


 13%|█▎        | 51/391 [00:09<00:56,  6.07it/s]

Epoch [8/40], Step [50/391], Loss: 1.7991


 16%|█▌        | 61/391 [00:10<00:54,  6.07it/s]

Epoch [8/40], Step [60/391], Loss: 1.8159


 18%|█▊        | 71/391 [00:12<00:52,  6.07it/s]

Epoch [8/40], Step [70/391], Loss: 1.8587


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [8/40], Step [80/391], Loss: 1.8494


 23%|██▎       | 91/391 [00:15<00:49,  6.05it/s]

Epoch [8/40], Step [90/391], Loss: 1.8415


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [8/40], Step [100/391], Loss: 1.9183


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [8/40], Step [110/391], Loss: 1.8882


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [8/40], Step [120/391], Loss: 1.9074


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [8/40], Step [130/391], Loss: 1.8645


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [8/40], Step [140/391], Loss: 1.8251


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [8/40], Step [150/391], Loss: 1.8382


 41%|████      | 161/391 [00:27<00:38,  6.05it/s]

Epoch [8/40], Step [160/391], Loss: 1.8482


 44%|████▎     | 171/391 [00:29<00:36,  6.05it/s]

Epoch [8/40], Step [170/391], Loss: 1.8034


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [8/40], Step [180/391], Loss: 1.8250


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [8/40], Step [190/391], Loss: 1.8787


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [8/40], Step [200/391], Loss: 1.8885


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [8/40], Step [210/391], Loss: 1.9278


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [8/40], Step [220/391], Loss: 1.9000


 59%|█████▉    | 231/391 [00:38<00:26,  6.08it/s]

Epoch [8/40], Step [230/391], Loss: 1.8874


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [8/40], Step [240/391], Loss: 1.9669


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [8/40], Step [250/391], Loss: 1.8877


 67%|██████▋   | 261/391 [00:43<00:21,  6.07it/s]

Epoch [8/40], Step [260/391], Loss: 1.9110


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [8/40], Step [270/391], Loss: 1.8456


 72%|███████▏  | 281/391 [00:47<00:18,  6.08it/s]

Epoch [8/40], Step [280/391], Loss: 1.8962


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [8/40], Step [290/391], Loss: 1.8706


 77%|███████▋  | 301/391 [00:50<00:14,  6.07it/s]

Epoch [8/40], Step [300/391], Loss: 1.7718


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [8/40], Step [310/391], Loss: 1.8248


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [8/40], Step [320/391], Loss: 1.7639


 85%|████████▍ | 331/391 [00:55<00:09,  6.04it/s]

Epoch [8/40], Step [330/391], Loss: 1.7794


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [8/40], Step [340/391], Loss: 1.8475


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [8/40], Step [350/391], Loss: 1.8616


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [8/40], Step [360/391], Loss: 1.8685


 95%|█████████▍| 371/391 [01:02<00:03,  6.08it/s]

Epoch [8/40], Step [370/391], Loss: 1.7903


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [8/40], Step [380/391], Loss: 1.8282


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [8/40], Step [390/391], Loss: 1.7515





Test Accuracy of the student model on the test images: 37.97 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.79it/s]

Epoch [9/40], Step [10/391], Loss: 1.7579


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [9/40], Step [20/391], Loss: 1.7338


  8%|▊         | 31/391 [00:05<00:59,  6.04it/s]

Epoch [9/40], Step [30/391], Loss: 1.7474


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [9/40], Step [40/391], Loss: 1.7366


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [9/40], Step [50/391], Loss: 1.7621


 16%|█▌        | 61/391 [00:10<00:54,  6.07it/s]

Epoch [9/40], Step [60/391], Loss: 1.8209


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [9/40], Step [70/391], Loss: 1.7796


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [9/40], Step [80/391], Loss: 1.7492


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [9/40], Step [90/391], Loss: 1.7903


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [9/40], Step [100/391], Loss: 1.8008


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [9/40], Step [110/391], Loss: 1.7851


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [9/40], Step [120/391], Loss: 1.7836


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [9/40], Step [130/391], Loss: 1.7601


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [9/40], Step [140/391], Loss: 1.7291


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [9/40], Step [150/391], Loss: 1.7341


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [9/40], Step [160/391], Loss: 1.7534


 44%|████▎     | 171/391 [00:29<00:36,  6.04it/s]

Epoch [9/40], Step [170/391], Loss: 1.7595


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [9/40], Step [180/391], Loss: 1.6996


 49%|████▉     | 191/391 [00:32<00:32,  6.06it/s]

Epoch [9/40], Step [190/391], Loss: 1.8283


 51%|█████▏    | 201/391 [00:34<00:31,  6.07it/s]

Epoch [9/40], Step [200/391], Loss: 1.7861


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [9/40], Step [210/391], Loss: 1.8328


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [9/40], Step [220/391], Loss: 1.7952


 59%|█████▉    | 231/391 [00:38<00:26,  6.05it/s]

Epoch [9/40], Step [230/391], Loss: 1.7580


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [9/40], Step [240/391], Loss: 1.7529


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [9/40], Step [250/391], Loss: 1.8011


 67%|██████▋   | 261/391 [00:43<00:21,  6.05it/s]

Epoch [9/40], Step [260/391], Loss: 1.7564


 69%|██████▉   | 271/391 [00:45<00:19,  6.05it/s]

Epoch [9/40], Step [270/391], Loss: 1.7120


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [9/40], Step [280/391], Loss: 1.7325


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [9/40], Step [290/391], Loss: 1.7642


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [9/40], Step [300/391], Loss: 1.7634


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [9/40], Step [310/391], Loss: 1.7481


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [9/40], Step [320/391], Loss: 1.7307


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [9/40], Step [330/391], Loss: 1.7080


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [9/40], Step [340/391], Loss: 1.7296


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [9/40], Step [350/391], Loss: 1.7878


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [9/40], Step [360/391], Loss: 1.8110


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [9/40], Step [370/391], Loss: 1.8139


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [9/40], Step [380/391], Loss: 1.7537


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [9/40], Step [390/391], Loss: 1.8081





Test Accuracy of the student model on the test images: 36.33 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.81it/s]

Epoch [10/40], Step [10/391], Loss: 1.7095


  5%|▌         | 21/391 [00:04<01:01,  6.03it/s]

Epoch [10/40], Step [20/391], Loss: 1.6925


  8%|▊         | 31/391 [00:05<00:59,  6.05it/s]

Epoch [10/40], Step [30/391], Loss: 1.6725


 10%|█         | 41/391 [00:07<00:57,  6.05it/s]

Epoch [10/40], Step [40/391], Loss: 1.7069


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [10/40], Step [50/391], Loss: 1.6427


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [10/40], Step [60/391], Loss: 1.6967


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [10/40], Step [70/391], Loss: 1.6747


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [10/40], Step [80/391], Loss: 1.6491


 23%|██▎       | 91/391 [00:15<00:49,  6.07it/s]

Epoch [10/40], Step [90/391], Loss: 1.6398


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [10/40], Step [100/391], Loss: 1.6711


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [10/40], Step [110/391], Loss: 1.6750


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [10/40], Step [120/391], Loss: 1.6380


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [10/40], Step [130/391], Loss: 1.6674


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [10/40], Step [140/391], Loss: 1.6878


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [10/40], Step [150/391], Loss: 1.6830


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [10/40], Step [160/391], Loss: 1.6551


 44%|████▎     | 171/391 [00:29<00:36,  6.05it/s]

Epoch [10/40], Step [170/391], Loss: 1.6449


 46%|████▋     | 181/391 [00:30<00:34,  6.05it/s]

Epoch [10/40], Step [180/391], Loss: 1.7422


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [10/40], Step [190/391], Loss: 1.7289


 51%|█████▏    | 201/391 [00:33<00:31,  6.07it/s]

Epoch [10/40], Step [200/391], Loss: 1.7229


 54%|█████▍    | 211/391 [00:35<00:29,  6.07it/s]

Epoch [10/40], Step [210/391], Loss: 1.6967


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [10/40], Step [220/391], Loss: 1.6690


 59%|█████▉    | 231/391 [00:38<00:26,  6.07it/s]

Epoch [10/40], Step [230/391], Loss: 1.6576


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [10/40], Step [240/391], Loss: 1.6782


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [10/40], Step [250/391], Loss: 1.6587


 67%|██████▋   | 261/391 [00:43<00:21,  6.07it/s]

Epoch [10/40], Step [260/391], Loss: 1.6455


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [10/40], Step [270/391], Loss: 1.6548


 72%|███████▏  | 281/391 [00:47<00:18,  6.05it/s]

Epoch [10/40], Step [280/391], Loss: 1.6744


 74%|███████▍  | 291/391 [00:48<00:16,  6.07it/s]

Epoch [10/40], Step [290/391], Loss: 1.6623


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [10/40], Step [300/391], Loss: 1.7059


 80%|███████▉  | 311/391 [00:52<00:13,  6.02it/s]

Epoch [10/40], Step [310/391], Loss: 1.6694


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [10/40], Step [320/391], Loss: 1.7130


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [10/40], Step [330/391], Loss: 1.6864


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [10/40], Step [340/391], Loss: 1.7425


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [10/40], Step [350/391], Loss: 1.7869


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [10/40], Step [360/391], Loss: 1.6597


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [10/40], Step [370/391], Loss: 1.6845


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [10/40], Step [380/391], Loss: 1.7053


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [10/40], Step [390/391], Loss: 1.6780





Test Accuracy of the student model on the test images: 43.50 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [11/40], Step [10/391], Loss: 1.6282


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [11/40], Step [20/391], Loss: 1.5754


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [11/40], Step [30/391], Loss: 1.5701


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [11/40], Step [40/391], Loss: 1.6370


 13%|█▎        | 51/391 [00:09<00:55,  6.08it/s]

Epoch [11/40], Step [50/391], Loss: 1.6676


 16%|█▌        | 61/391 [00:10<00:54,  6.04it/s]

Epoch [11/40], Step [60/391], Loss: 1.6098


 18%|█▊        | 71/391 [00:12<00:53,  6.03it/s]

Epoch [11/40], Step [70/391], Loss: 1.6219


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [11/40], Step [80/391], Loss: 1.6080


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [11/40], Step [90/391], Loss: 1.6024


 26%|██▌       | 101/391 [00:17<00:47,  6.08it/s]

Epoch [11/40], Step [100/391], Loss: 1.6556


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [11/40], Step [110/391], Loss: 1.6433


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [11/40], Step [120/391], Loss: 1.5934


 34%|███▎      | 131/391 [00:22<00:42,  6.05it/s]

Epoch [11/40], Step [130/391], Loss: 1.6465


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [11/40], Step [140/391], Loss: 1.6089


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [11/40], Step [150/391], Loss: 1.6134


 41%|████      | 161/391 [00:27<00:37,  6.07it/s]

Epoch [11/40], Step [160/391], Loss: 1.6704


 44%|████▎     | 171/391 [00:29<00:36,  6.05it/s]

Epoch [11/40], Step [170/391], Loss: 1.5849


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [11/40], Step [180/391], Loss: 1.6244


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [11/40], Step [190/391], Loss: 1.5989


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [11/40], Step [200/391], Loss: 1.6123


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [11/40], Step [210/391], Loss: 1.6263


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [11/40], Step [220/391], Loss: 1.6061


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [11/40], Step [230/391], Loss: 1.6726


 62%|██████▏   | 241/391 [00:40<00:24,  6.07it/s]

Epoch [11/40], Step [240/391], Loss: 1.5810


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [11/40], Step [250/391], Loss: 1.6520


 67%|██████▋   | 261/391 [00:43<00:21,  6.05it/s]

Epoch [11/40], Step [260/391], Loss: 1.6321


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [11/40], Step [270/391], Loss: 1.6265


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [11/40], Step [280/391], Loss: 1.6408


 74%|███████▍  | 291/391 [00:48<00:16,  6.04it/s]

Epoch [11/40], Step [290/391], Loss: 1.6543


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [11/40], Step [300/391], Loss: 1.6144


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [11/40], Step [310/391], Loss: 1.6112


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [11/40], Step [320/391], Loss: 1.6568


 85%|████████▍ | 331/391 [00:55<00:09,  6.04it/s]

Epoch [11/40], Step [330/391], Loss: 1.6637


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [11/40], Step [340/391], Loss: 1.6149


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [11/40], Step [350/391], Loss: 1.6122


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [11/40], Step [360/391], Loss: 1.6413


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [11/40], Step [370/391], Loss: 1.6179


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [11/40], Step [380/391], Loss: 1.6481


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [11/40], Step [390/391], Loss: 1.6671





Test Accuracy of the student model on the test images: 43.80 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.77it/s]

Epoch [12/40], Step [10/391], Loss: 1.5821


  5%|▌         | 21/391 [00:04<01:00,  6.07it/s]

Epoch [12/40], Step [20/391], Loss: 1.5873


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [12/40], Step [30/391], Loss: 1.5858


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [12/40], Step [40/391], Loss: 1.5858


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [12/40], Step [50/391], Loss: 1.5692


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [12/40], Step [60/391], Loss: 1.5346


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [12/40], Step [70/391], Loss: 1.5726


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [12/40], Step [80/391], Loss: 1.5254


 23%|██▎       | 91/391 [00:15<00:49,  6.02it/s]

Epoch [12/40], Step [90/391], Loss: 1.5001


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [12/40], Step [100/391], Loss: 1.5680


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [12/40], Step [110/391], Loss: 1.5855


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [12/40], Step [120/391], Loss: 1.5890


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [12/40], Step [130/391], Loss: 1.5622


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [12/40], Step [140/391], Loss: 1.5780


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [12/40], Step [150/391], Loss: 1.5504


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [12/40], Step [160/391], Loss: 1.5471


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [12/40], Step [170/391], Loss: 1.5760


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [12/40], Step [180/391], Loss: 1.5975


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [12/40], Step [190/391], Loss: 1.5335


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [12/40], Step [200/391], Loss: 1.5763


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [12/40], Step [210/391], Loss: 1.5764


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [12/40], Step [220/391], Loss: 1.6177


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [12/40], Step [230/391], Loss: 1.6332


 62%|██████▏   | 241/391 [00:40<00:24,  6.04it/s]

Epoch [12/40], Step [240/391], Loss: 1.6027


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [12/40], Step [250/391], Loss: 1.6137


 67%|██████▋   | 261/391 [00:44<00:21,  6.05it/s]

Epoch [12/40], Step [260/391], Loss: 1.5948


 69%|██████▉   | 271/391 [00:45<00:19,  6.05it/s]

Epoch [12/40], Step [270/391], Loss: 1.5429


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [12/40], Step [280/391], Loss: 1.5661


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [12/40], Step [290/391], Loss: 1.5648


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [12/40], Step [300/391], Loss: 1.5777


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [12/40], Step [310/391], Loss: 1.5354


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [12/40], Step [320/391], Loss: 1.5600


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [12/40], Step [330/391], Loss: 1.6470


 87%|████████▋ | 341/391 [00:57<00:08,  6.07it/s]

Epoch [12/40], Step [340/391], Loss: 1.6194


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [12/40], Step [350/391], Loss: 1.6062


 92%|█████████▏| 361/391 [01:00<00:04,  6.05it/s]

Epoch [12/40], Step [360/391], Loss: 1.5976


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [12/40], Step [370/391], Loss: 1.5677


 97%|█████████▋| 381/391 [01:03<00:01,  6.07it/s]

Epoch [12/40], Step [380/391], Loss: 1.6103


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [12/40], Step [390/391], Loss: 1.5659





Test Accuracy of the student model on the test images: 43.18 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [13/40], Step [10/391], Loss: 1.5485


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [13/40], Step [20/391], Loss: 1.5544


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [13/40], Step [30/391], Loss: 1.5078


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [13/40], Step [40/391], Loss: 1.4750


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [13/40], Step [50/391], Loss: 1.5620


 16%|█▌        | 61/391 [00:10<00:54,  6.07it/s]

Epoch [13/40], Step [60/391], Loss: 1.4979


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [13/40], Step [70/391], Loss: 1.4798


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [13/40], Step [80/391], Loss: 1.5241


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [13/40], Step [90/391], Loss: 1.5093


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [13/40], Step [100/391], Loss: 1.5203


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [13/40], Step [110/391], Loss: 1.5185


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [13/40], Step [120/391], Loss: 1.5231


 34%|███▎      | 131/391 [00:22<00:43,  6.04it/s]

Epoch [13/40], Step [130/391], Loss: 1.5248


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [13/40], Step [140/391], Loss: 1.4803


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [13/40], Step [150/391], Loss: 1.5024


 41%|████      | 161/391 [00:27<00:37,  6.05it/s]

Epoch [13/40], Step [160/391], Loss: 1.5664


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [13/40], Step [170/391], Loss: 1.5187


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [13/40], Step [180/391], Loss: 1.5571


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [13/40], Step [190/391], Loss: 1.5472


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [13/40], Step [200/391], Loss: 1.5101


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [13/40], Step [210/391], Loss: 1.5472


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [13/40], Step [220/391], Loss: 1.5673


 59%|█████▉    | 231/391 [00:39<00:26,  6.07it/s]

Epoch [13/40], Step [230/391], Loss: 1.5424


 62%|██████▏   | 241/391 [00:40<00:24,  6.04it/s]

Epoch [13/40], Step [240/391], Loss: 1.5575


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [13/40], Step [250/391], Loss: 1.5640


 67%|██████▋   | 261/391 [00:43<00:21,  6.07it/s]

Epoch [13/40], Step [260/391], Loss: 1.5440


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [13/40], Step [270/391], Loss: 1.5540


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [13/40], Step [280/391], Loss: 1.5504


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [13/40], Step [290/391], Loss: 1.5350


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [13/40], Step [300/391], Loss: 1.5136


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [13/40], Step [310/391], Loss: 1.5414


 82%|████████▏ | 321/391 [00:53<00:11,  6.07it/s]

Epoch [13/40], Step [320/391], Loss: 1.5340


 84%|████████▍ | 330/391 [00:55<00:10,  6.07it/s]

Epoch [13/40], Step [330/391], Loss: 1.5063


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [13/40], Step [340/391], Loss: 1.5393


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [13/40], Step [350/391], Loss: 1.5237


 92%|█████████▏| 361/391 [01:00<00:04,  6.05it/s]

Epoch [13/40], Step [360/391], Loss: 1.5308


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [13/40], Step [370/391], Loss: 1.5358


 97%|█████████▋| 381/391 [01:03<00:01,  6.07it/s]

Epoch [13/40], Step [380/391], Loss: 1.5350


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [13/40], Step [390/391], Loss: 1.5292





Test Accuracy of the student model on the test images: 43.17 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [14/40], Step [10/391], Loss: 1.4740


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [14/40], Step [20/391], Loss: 1.4903


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [14/40], Step [30/391], Loss: 1.4535


 10%|█         | 41/391 [00:07<00:57,  6.05it/s]

Epoch [14/40], Step [40/391], Loss: 1.4499


 13%|█▎        | 51/391 [00:09<00:56,  6.04it/s]

Epoch [14/40], Step [50/391], Loss: 1.4270


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [14/40], Step [60/391], Loss: 1.4279


 18%|█▊        | 71/391 [00:12<00:52,  6.07it/s]

Epoch [14/40], Step [70/391], Loss: 1.4080


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [14/40], Step [80/391], Loss: 1.4421


 23%|██▎       | 91/391 [00:16<00:49,  6.03it/s]

Epoch [14/40], Step [90/391], Loss: 1.4343


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [14/40], Step [100/391], Loss: 1.4551


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [14/40], Step [110/391], Loss: 1.4569


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [14/40], Step [120/391], Loss: 1.4537


 34%|███▎      | 131/391 [00:22<00:42,  6.05it/s]

Epoch [14/40], Step [130/391], Loss: 1.4635


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [14/40], Step [140/391], Loss: 1.4715


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [14/40], Step [150/391], Loss: 1.4213


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [14/40], Step [160/391], Loss: 1.4397


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [14/40], Step [170/391], Loss: 1.4294


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [14/40], Step [180/391], Loss: 1.4286


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [14/40], Step [190/391], Loss: 1.4730


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [14/40], Step [200/391], Loss: 1.4684


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [14/40], Step [210/391], Loss: 1.4880


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [14/40], Step [220/391], Loss: 1.4873


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [14/40], Step [230/391], Loss: 1.4895


 62%|██████▏   | 241/391 [00:40<00:24,  6.07it/s]

Epoch [14/40], Step [240/391], Loss: 1.4869


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [14/40], Step [250/391], Loss: 1.4767


 67%|██████▋   | 261/391 [00:44<00:21,  6.03it/s]

Epoch [14/40], Step [260/391], Loss: 1.4925


 69%|██████▉   | 271/391 [00:45<00:19,  6.04it/s]

Epoch [14/40], Step [270/391], Loss: 1.4708


 72%|███████▏  | 281/391 [00:47<00:18,  5.97it/s]

Epoch [14/40], Step [280/391], Loss: 1.5256


 74%|███████▍  | 291/391 [00:49<00:16,  6.05it/s]

Epoch [14/40], Step [290/391], Loss: 1.5445


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [14/40], Step [300/391], Loss: 1.5449


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [14/40], Step [310/391], Loss: 1.5187


 82%|████████▏ | 321/391 [00:54<00:11,  6.05it/s]

Epoch [14/40], Step [320/391], Loss: 1.5788


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [14/40], Step [330/391], Loss: 1.5315


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [14/40], Step [340/391], Loss: 1.5220


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [14/40], Step [350/391], Loss: 1.5234


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [14/40], Step [360/391], Loss: 1.4902


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [14/40], Step [370/391], Loss: 1.5750


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [14/40], Step [380/391], Loss: 1.5255


100%|██████████| 391/391 [01:05<00:00,  5.96it/s]

Epoch [14/40], Step [390/391], Loss: 1.4955





Test Accuracy of the student model on the test images: 43.77 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.77it/s]

Epoch [15/40], Step [10/391], Loss: 1.4298


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [15/40], Step [20/391], Loss: 1.3764


  8%|▊         | 31/391 [00:06<00:59,  6.04it/s]

Epoch [15/40], Step [30/391], Loss: 1.3837


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [15/40], Step [40/391], Loss: 1.4206


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [15/40], Step [50/391], Loss: 1.3854


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [15/40], Step [60/391], Loss: 1.4226


 18%|█▊        | 71/391 [00:12<00:52,  6.07it/s]

Epoch [15/40], Step [70/391], Loss: 1.3791


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [15/40], Step [80/391], Loss: 1.3948


 23%|██▎       | 91/391 [00:15<00:49,  6.07it/s]

Epoch [15/40], Step [90/391], Loss: 1.4061


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [15/40], Step [100/391], Loss: 1.3804


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [15/40], Step [110/391], Loss: 1.4442


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [15/40], Step [120/391], Loss: 1.4341


 34%|███▎      | 131/391 [00:22<00:42,  6.05it/s]

Epoch [15/40], Step [130/391], Loss: 1.4441


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [15/40], Step [140/391], Loss: 1.4133


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [15/40], Step [150/391], Loss: 1.4158


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [15/40], Step [160/391], Loss: 1.4261


 44%|████▎     | 171/391 [00:29<00:36,  6.05it/s]

Epoch [15/40], Step [170/391], Loss: 1.4705


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [15/40], Step [180/391], Loss: 1.4530


 49%|████▉     | 191/391 [00:32<00:33,  6.04it/s]

Epoch [15/40], Step [190/391], Loss: 1.4203


 51%|█████▏    | 201/391 [00:34<00:31,  6.04it/s]

Epoch [15/40], Step [200/391], Loss: 1.4293


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [15/40], Step [210/391], Loss: 1.3906


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [15/40], Step [220/391], Loss: 1.3857


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [15/40], Step [230/391], Loss: 1.4378


 62%|██████▏   | 241/391 [00:40<00:24,  6.04it/s]

Epoch [15/40], Step [240/391], Loss: 1.4240


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [15/40], Step [250/391], Loss: 1.4107


 67%|██████▋   | 261/391 [00:44<00:21,  6.05it/s]

Epoch [15/40], Step [260/391], Loss: 1.4029


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [15/40], Step [270/391], Loss: 1.4171


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [15/40], Step [280/391], Loss: 1.4663


 74%|███████▍  | 291/391 [00:48<00:16,  6.04it/s]

Epoch [15/40], Step [290/391], Loss: 1.4415


 77%|███████▋  | 301/391 [00:50<00:14,  6.07it/s]

Epoch [15/40], Step [300/391], Loss: 1.4538


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [15/40], Step [310/391], Loss: 1.4415


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [15/40], Step [320/391], Loss: 1.4244


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [15/40], Step [330/391], Loss: 1.4399


 87%|████████▋ | 341/391 [00:57<00:08,  6.07it/s]

Epoch [15/40], Step [340/391], Loss: 1.4513


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [15/40], Step [350/391], Loss: 1.4634


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [15/40], Step [360/391], Loss: 1.4835


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [15/40], Step [370/391], Loss: 1.4623


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [15/40], Step [380/391], Loss: 1.4874


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [15/40], Step [390/391], Loss: 1.4200





Test Accuracy of the student model on the test images: 40.48 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.81it/s]

Epoch [16/40], Step [10/391], Loss: 1.3889


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [16/40], Step [20/391], Loss: 1.3570


  8%|▊         | 31/391 [00:05<00:59,  6.05it/s]

Epoch [16/40], Step [30/391], Loss: 1.3142


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [16/40], Step [40/391], Loss: 1.3450


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [16/40], Step [50/391], Loss: 1.3657


 16%|█▌        | 61/391 [00:10<00:54,  6.07it/s]

Epoch [16/40], Step [60/391], Loss: 1.3627


 18%|█▊        | 71/391 [00:12<00:52,  6.04it/s]

Epoch [16/40], Step [70/391], Loss: 1.3274


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [16/40], Step [80/391], Loss: 1.3530


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [16/40], Step [90/391], Loss: 1.3617


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [16/40], Step [100/391], Loss: 1.3314


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [16/40], Step [110/391], Loss: 1.3589


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [16/40], Step [120/391], Loss: 1.3740


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [16/40], Step [130/391], Loss: 1.3467


 36%|███▌      | 141/391 [00:24<00:41,  6.04it/s]

Epoch [16/40], Step [140/391], Loss: 1.3368


 39%|███▊      | 151/391 [00:25<00:39,  6.03it/s]

Epoch [16/40], Step [150/391], Loss: 1.3816


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [16/40], Step [160/391], Loss: 1.4081


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [16/40], Step [170/391], Loss: 1.3732


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [16/40], Step [180/391], Loss: 1.3900


 49%|████▉     | 191/391 [00:32<00:33,  6.05it/s]

Epoch [16/40], Step [190/391], Loss: 1.3791


 51%|█████▏    | 201/391 [00:33<00:31,  6.07it/s]

Epoch [16/40], Step [200/391], Loss: 1.4150


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [16/40], Step [210/391], Loss: 1.4122


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [16/40], Step [220/391], Loss: 1.3825


 59%|█████▉    | 231/391 [00:38<00:26,  6.06it/s]

Epoch [16/40], Step [230/391], Loss: 1.3959


 62%|██████▏   | 241/391 [00:40<00:24,  6.07it/s]

Epoch [16/40], Step [240/391], Loss: 1.4045


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [16/40], Step [250/391], Loss: 1.4186


 67%|██████▋   | 261/391 [00:43<00:21,  6.07it/s]

Epoch [16/40], Step [260/391], Loss: 1.4226


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [16/40], Step [270/391], Loss: 1.4375


 72%|███████▏  | 281/391 [00:47<00:18,  6.05it/s]

Epoch [16/40], Step [280/391], Loss: 1.4509


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [16/40], Step [290/391], Loss: 1.4435


 77%|███████▋  | 301/391 [00:50<00:14,  6.07it/s]

Epoch [16/40], Step [300/391], Loss: 1.4284


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [16/40], Step [310/391], Loss: 1.4148


 82%|████████▏ | 321/391 [00:53<00:11,  6.07it/s]

Epoch [16/40], Step [320/391], Loss: 1.4592


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [16/40], Step [330/391], Loss: 1.4513


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [16/40], Step [340/391], Loss: 1.4949


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [16/40], Step [350/391], Loss: 1.4654


 92%|█████████▏| 361/391 [01:00<00:04,  6.05it/s]

Epoch [16/40], Step [360/391], Loss: 1.4281


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [16/40], Step [370/391], Loss: 1.4191


 97%|█████████▋| 381/391 [01:03<00:01,  6.07it/s]

Epoch [16/40], Step [380/391], Loss: 1.4301


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [16/40], Step [390/391], Loss: 1.4429





Test Accuracy of the student model on the test images: 43.25 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [17/40], Step [10/391], Loss: 1.3660


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [17/40], Step [20/391], Loss: 1.3210


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [17/40], Step [30/391], Loss: 1.3464


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [17/40], Step [40/391], Loss: 1.3051


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [17/40], Step [50/391], Loss: 1.3478


 16%|█▌        | 61/391 [00:11<00:54,  6.07it/s]

Epoch [17/40], Step [60/391], Loss: 1.3372


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [17/40], Step [70/391], Loss: 1.3252


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [17/40], Step [80/391], Loss: 1.3294


 23%|██▎       | 91/391 [00:15<00:49,  6.07it/s]

Epoch [17/40], Step [90/391], Loss: 1.3376


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [17/40], Step [100/391], Loss: 1.3537


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [17/40], Step [110/391], Loss: 1.3269


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [17/40], Step [120/391], Loss: 1.3529


 34%|███▎      | 131/391 [00:22<00:42,  6.05it/s]

Epoch [17/40], Step [130/391], Loss: 1.3530


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [17/40], Step [140/391], Loss: 1.3514


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [17/40], Step [150/391], Loss: 1.3423


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [17/40], Step [160/391], Loss: 1.3343


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [17/40], Step [170/391], Loss: 1.3547


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [17/40], Step [180/391], Loss: 1.3176


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [17/40], Step [190/391], Loss: 1.3336


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [17/40], Step [200/391], Loss: 1.3300


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [17/40], Step [210/391], Loss: 1.3456


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [17/40], Step [220/391], Loss: 1.3273


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [17/40], Step [230/391], Loss: 1.4068


 62%|██████▏   | 241/391 [00:40<00:24,  6.02it/s]

Epoch [17/40], Step [240/391], Loss: 1.3728


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [17/40], Step [250/391], Loss: 1.3400


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [17/40], Step [260/391], Loss: 1.3541


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [17/40], Step [270/391], Loss: 1.3925


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [17/40], Step [280/391], Loss: 1.3625


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [17/40], Step [290/391], Loss: 1.3646


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [17/40], Step [300/391], Loss: 1.3406


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [17/40], Step [310/391], Loss: 1.3439


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [17/40], Step [320/391], Loss: 1.3593


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [17/40], Step [330/391], Loss: 1.3749


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [17/40], Step [340/391], Loss: 1.4321


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [17/40], Step [350/391], Loss: 1.4091


 92%|█████████▏| 361/391 [01:00<00:04,  6.04it/s]

Epoch [17/40], Step [360/391], Loss: 1.3680


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [17/40], Step [370/391], Loss: 1.4041


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [17/40], Step [380/391], Loss: 1.4130


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [17/40], Step [390/391], Loss: 1.3909





Test Accuracy of the student model on the test images: 47.18 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.77it/s]

Epoch [18/40], Step [10/391], Loss: 1.3227


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [18/40], Step [20/391], Loss: 1.2600


  8%|▊         | 31/391 [00:06<00:59,  6.07it/s]

Epoch [18/40], Step [30/391], Loss: 1.3012


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [18/40], Step [40/391], Loss: 1.2993


 13%|█▎        | 51/391 [00:09<00:55,  6.07it/s]

Epoch [18/40], Step [50/391], Loss: 1.3008


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [18/40], Step [60/391], Loss: 1.2511


 18%|█▊        | 71/391 [00:12<00:52,  6.08it/s]

Epoch [18/40], Step [70/391], Loss: 1.3142


 21%|██        | 81/391 [00:14<00:51,  6.04it/s]

Epoch [18/40], Step [80/391], Loss: 1.3353


 23%|██▎       | 91/391 [00:15<00:49,  6.05it/s]

Epoch [18/40], Step [90/391], Loss: 1.2844


 26%|██▌       | 101/391 [00:17<00:47,  6.07it/s]

Epoch [18/40], Step [100/391], Loss: 1.3013


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [18/40], Step [110/391], Loss: 1.3383


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [18/40], Step [120/391], Loss: 1.3125


 34%|███▎      | 131/391 [00:22<00:42,  6.07it/s]

Epoch [18/40], Step [130/391], Loss: 1.2832


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [18/40], Step [140/391], Loss: 1.3049


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [18/40], Step [150/391], Loss: 1.2924


 41%|████      | 161/391 [00:27<00:38,  6.05it/s]

Epoch [18/40], Step [160/391], Loss: 1.2800


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [18/40], Step [170/391], Loss: 1.2787


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [18/40], Step [180/391], Loss: 1.3028


 49%|████▉     | 191/391 [00:32<00:32,  6.06it/s]

Epoch [18/40], Step [190/391], Loss: 1.3132


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [18/40], Step [200/391], Loss: 1.3077


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [18/40], Step [210/391], Loss: 1.3322


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [18/40], Step [220/391], Loss: 1.2946


 59%|█████▉    | 231/391 [00:39<00:26,  6.04it/s]

Epoch [18/40], Step [230/391], Loss: 1.2942


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [18/40], Step [240/391], Loss: 1.3041


 64%|██████▍   | 251/391 [00:42<00:23,  6.08it/s]

Epoch [18/40], Step [250/391], Loss: 1.3075


 67%|██████▋   | 261/391 [00:43<00:21,  6.07it/s]

Epoch [18/40], Step [260/391], Loss: 1.3027


 69%|██████▉   | 271/391 [00:45<00:19,  6.04it/s]

Epoch [18/40], Step [270/391], Loss: 1.2932


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [18/40], Step [280/391], Loss: 1.3274


 74%|███████▍  | 291/391 [00:48<00:16,  6.07it/s]

Epoch [18/40], Step [290/391], Loss: 1.3352


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [18/40], Step [300/391], Loss: 1.3564


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [18/40], Step [310/391], Loss: 1.3552


 82%|████████▏ | 321/391 [00:53<00:11,  6.07it/s]

Epoch [18/40], Step [320/391], Loss: 1.3439


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [18/40], Step [330/391], Loss: 1.3408


 87%|████████▋ | 341/391 [00:57<00:08,  6.07it/s]

Epoch [18/40], Step [340/391], Loss: 1.3302


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [18/40], Step [350/391], Loss: 1.3186


 92%|█████████▏| 361/391 [01:00<00:04,  6.05it/s]

Epoch [18/40], Step [360/391], Loss: 1.3293


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [18/40], Step [370/391], Loss: 1.3600


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [18/40], Step [380/391], Loss: 1.3438


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [18/40], Step [390/391], Loss: 1.3269





Test Accuracy of the student model on the test images: 43.49 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [19/40], Step [10/391], Loss: 1.2876


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [19/40], Step [20/391], Loss: 1.2803


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [19/40], Step [30/391], Loss: 1.2768


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [19/40], Step [40/391], Loss: 1.2461


 13%|█▎        | 51/391 [00:09<00:56,  6.07it/s]

Epoch [19/40], Step [50/391], Loss: 1.2368


 16%|█▌        | 61/391 [00:11<00:54,  6.07it/s]

Epoch [19/40], Step [60/391], Loss: 1.2536


 18%|█▊        | 71/391 [00:12<00:52,  6.05it/s]

Epoch [19/40], Step [70/391], Loss: 1.2270


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [19/40], Step [80/391], Loss: 1.2998


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [19/40], Step [90/391], Loss: 1.2838


 26%|██▌       | 101/391 [00:17<00:48,  6.03it/s]

Epoch [19/40], Step [100/391], Loss: 1.2339


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [19/40], Step [110/391], Loss: 1.2124


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [19/40], Step [120/391], Loss: 1.2633


 34%|███▎      | 131/391 [00:22<00:43,  6.04it/s]

Epoch [19/40], Step [130/391], Loss: 1.2204


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [19/40], Step [140/391], Loss: 1.2625


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [19/40], Step [150/391], Loss: 1.2492


 41%|████      | 161/391 [00:27<00:38,  6.04it/s]

Epoch [19/40], Step [160/391], Loss: 1.2732


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [19/40], Step [170/391], Loss: 1.2973


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [19/40], Step [180/391], Loss: 1.3150


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [19/40], Step [190/391], Loss: 1.2633


 51%|█████▏    | 201/391 [00:34<00:31,  6.07it/s]

Epoch [19/40], Step [200/391], Loss: 1.2853


 54%|█████▍    | 211/391 [00:35<00:29,  6.07it/s]

Epoch [19/40], Step [210/391], Loss: 1.2597


 57%|█████▋    | 221/391 [00:37<00:28,  6.05it/s]

Epoch [19/40], Step [220/391], Loss: 1.2752


 59%|█████▉    | 231/391 [00:39<00:26,  6.04it/s]

Epoch [19/40], Step [230/391], Loss: 1.2778


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [19/40], Step [240/391], Loss: 1.2745


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [19/40], Step [250/391], Loss: 1.2988


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [19/40], Step [260/391], Loss: 1.2864


 69%|██████▉   | 271/391 [00:45<00:19,  6.08it/s]

Epoch [19/40], Step [270/391], Loss: 1.2946


 72%|███████▏  | 281/391 [00:47<00:18,  6.05it/s]

Epoch [19/40], Step [280/391], Loss: 1.3196


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [19/40], Step [290/391], Loss: 1.3348


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [19/40], Step [300/391], Loss: 1.3009


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [19/40], Step [310/391], Loss: 1.2934


 82%|████████▏ | 321/391 [00:53<00:11,  6.07it/s]

Epoch [19/40], Step [320/391], Loss: 1.2652


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [19/40], Step [330/391], Loss: 1.3000


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [19/40], Step [340/391], Loss: 1.3179


 90%|████████▉ | 351/391 [00:58<00:06,  6.04it/s]

Epoch [19/40], Step [350/391], Loss: 1.2685


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [19/40], Step [360/391], Loss: 1.3049


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [19/40], Step [370/391], Loss: 1.3131


 97%|█████████▋| 381/391 [01:03<00:01,  5.93it/s]

Epoch [19/40], Step [380/391], Loss: 1.3214


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [19/40], Step [390/391], Loss: 1.3117





Test Accuracy of the student model on the test images: 48.50 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.77it/s]

Epoch [20/40], Step [10/391], Loss: 1.2370


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [20/40], Step [20/391], Loss: 1.2429


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [20/40], Step [30/391], Loss: 1.1940


 10%|█         | 41/391 [00:07<00:57,  6.05it/s]

Epoch [20/40], Step [40/391], Loss: 1.2054


 13%|█▎        | 51/391 [00:09<00:56,  6.05it/s]

Epoch [20/40], Step [50/391], Loss: 1.2297


 16%|█▌        | 61/391 [00:10<00:54,  6.07it/s]

Epoch [20/40], Step [60/391], Loss: 1.2005


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [20/40], Step [70/391], Loss: 1.2305


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [20/40], Step [80/391], Loss: 1.2289


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [20/40], Step [90/391], Loss: 1.2458


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [20/40], Step [100/391], Loss: 1.2626


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [20/40], Step [110/391], Loss: 1.2287


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [20/40], Step [120/391], Loss: 1.2280


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [20/40], Step [130/391], Loss: 1.2110


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [20/40], Step [140/391], Loss: 1.2451


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [20/40], Step [150/391], Loss: 1.2537


 41%|████      | 161/391 [00:27<00:38,  6.05it/s]

Epoch [20/40], Step [160/391], Loss: 1.2452


 44%|████▎     | 171/391 [00:29<00:36,  6.05it/s]

Epoch [20/40], Step [170/391], Loss: 1.2302


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [20/40], Step [180/391], Loss: 1.2312


 49%|████▉     | 191/391 [00:32<00:33,  6.04it/s]

Epoch [20/40], Step [190/391], Loss: 1.2228


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [20/40], Step [200/391], Loss: 1.2505


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [20/40], Step [210/391], Loss: 1.2217


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [20/40], Step [220/391], Loss: 1.2268


 59%|█████▉    | 231/391 [00:38<00:26,  6.08it/s]

Epoch [20/40], Step [230/391], Loss: 1.2363


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [20/40], Step [240/391], Loss: 1.2778


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [20/40], Step [250/391], Loss: 1.2179


 67%|██████▋   | 261/391 [00:43<00:21,  6.08it/s]

Epoch [20/40], Step [260/391], Loss: 1.2717


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [20/40], Step [270/391], Loss: 1.2690


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [20/40], Step [280/391], Loss: 1.2795


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [20/40], Step [290/391], Loss: 1.2297


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [20/40], Step [300/391], Loss: 1.2547


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [20/40], Step [310/391], Loss: 1.2358


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [20/40], Step [320/391], Loss: 1.2355


 85%|████████▍ | 331/391 [00:55<00:09,  6.03it/s]

Epoch [20/40], Step [330/391], Loss: 1.2592


 87%|████████▋ | 341/391 [00:57<00:08,  6.07it/s]

Epoch [20/40], Step [340/391], Loss: 1.2516


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [20/40], Step [350/391], Loss: 1.2618


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [20/40], Step [360/391], Loss: 1.2569


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [20/40], Step [370/391], Loss: 1.2294


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [20/40], Step [380/391], Loss: 1.2364


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [20/40], Step [390/391], Loss: 1.2423





Test Accuracy of the student model on the test images: 49.50 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.76it/s]

Epoch [21/40], Step [10/391], Loss: 1.1807


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [21/40], Step [20/391], Loss: 1.1780


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [21/40], Step [30/391], Loss: 1.1685


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [21/40], Step [40/391], Loss: 1.1755


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [21/40], Step [50/391], Loss: 1.1651


 16%|█▌        | 61/391 [00:11<00:54,  6.05it/s]

Epoch [21/40], Step [60/391], Loss: 1.1764


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [21/40], Step [70/391], Loss: 1.1300


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [21/40], Step [80/391], Loss: 1.1740


 23%|██▎       | 91/391 [00:16<00:49,  6.05it/s]

Epoch [21/40], Step [90/391], Loss: 1.1504


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [21/40], Step [100/391], Loss: 1.1591


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [21/40], Step [110/391], Loss: 1.1498


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [21/40], Step [120/391], Loss: 1.1631


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [21/40], Step [130/391], Loss: 1.1879


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [21/40], Step [140/391], Loss: 1.1797


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [21/40], Step [150/391], Loss: 1.2001


 41%|████      | 161/391 [00:27<00:37,  6.07it/s]

Epoch [21/40], Step [160/391], Loss: 1.2279


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [21/40], Step [170/391], Loss: 1.1725


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [21/40], Step [180/391], Loss: 1.1925


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [21/40], Step [190/391], Loss: 1.1845


 51%|█████▏    | 201/391 [00:34<00:31,  6.02it/s]

Epoch [21/40], Step [200/391], Loss: 1.1922


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [21/40], Step [210/391], Loss: 1.1952


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [21/40], Step [220/391], Loss: 1.2063


 59%|█████▉    | 231/391 [00:39<00:26,  6.04it/s]

Epoch [21/40], Step [230/391], Loss: 1.1913


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [21/40], Step [240/391], Loss: 1.2201


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [21/40], Step [250/391], Loss: 1.2608


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [21/40], Step [260/391], Loss: 1.2019


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [21/40], Step [270/391], Loss: 1.2130


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [21/40], Step [280/391], Loss: 1.2299


 74%|███████▍  | 291/391 [00:49<00:16,  6.06it/s]

Epoch [21/40], Step [290/391], Loss: 1.2229


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [21/40], Step [300/391], Loss: 1.2637


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [21/40], Step [310/391], Loss: 1.2116


 82%|████████▏ | 321/391 [00:53<00:11,  6.03it/s]

Epoch [21/40], Step [320/391], Loss: 1.2003


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [21/40], Step [330/391], Loss: 1.2077


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [21/40], Step [340/391], Loss: 1.2307


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [21/40], Step [350/391], Loss: 1.2090


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [21/40], Step [360/391], Loss: 1.2461


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [21/40], Step [370/391], Loss: 1.2481


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [21/40], Step [380/391], Loss: 1.2308


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [21/40], Step [390/391], Loss: 1.2370





Test Accuracy of the student model on the test images: 52.89 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [22/40], Step [10/391], Loss: 1.1545


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [22/40], Step [20/391], Loss: 1.1576


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [22/40], Step [30/391], Loss: 1.1810


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [22/40], Step [40/391], Loss: 1.1885


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [22/40], Step [50/391], Loss: 1.1409


 16%|█▌        | 61/391 [00:10<00:54,  6.05it/s]

Epoch [22/40], Step [60/391], Loss: 1.1056


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [22/40], Step [70/391], Loss: 1.1418


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [22/40], Step [80/391], Loss: 1.1295


 23%|██▎       | 91/391 [00:15<00:49,  6.07it/s]

Epoch [22/40], Step [90/391], Loss: 1.1365


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [22/40], Step [100/391], Loss: 1.1459


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [22/40], Step [110/391], Loss: 1.1258


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [22/40], Step [120/391], Loss: 1.1346


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [22/40], Step [130/391], Loss: 1.1119


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [22/40], Step [140/391], Loss: 1.1138


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [22/40], Step [150/391], Loss: 1.1519


 41%|████      | 161/391 [00:27<00:38,  6.05it/s]

Epoch [22/40], Step [160/391], Loss: 1.1361


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [22/40], Step [170/391], Loss: 1.1306


 46%|████▋     | 181/391 [00:30<00:34,  6.05it/s]

Epoch [22/40], Step [180/391], Loss: 1.1626


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [22/40], Step [190/391], Loss: 1.1424


 51%|█████▏    | 201/391 [00:34<00:31,  6.07it/s]

Epoch [22/40], Step [200/391], Loss: 1.1955


 54%|█████▍    | 211/391 [00:35<00:29,  6.07it/s]

Epoch [22/40], Step [210/391], Loss: 1.1619


 57%|█████▋    | 221/391 [00:37<00:28,  6.04it/s]

Epoch [22/40], Step [220/391], Loss: 1.1999


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [22/40], Step [230/391], Loss: 1.1806


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [22/40], Step [240/391], Loss: 1.1749


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [22/40], Step [250/391], Loss: 1.1667


 67%|██████▋   | 261/391 [00:43<00:21,  6.05it/s]

Epoch [22/40], Step [260/391], Loss: 1.1775


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [22/40], Step [270/391], Loss: 1.1513


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [22/40], Step [280/391], Loss: 1.1470


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [22/40], Step [290/391], Loss: 1.1545


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [22/40], Step [300/391], Loss: 1.1642


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [22/40], Step [310/391], Loss: 1.1659


 82%|████████▏ | 321/391 [00:53<00:11,  6.07it/s]

Epoch [22/40], Step [320/391], Loss: 1.1911


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [22/40], Step [330/391], Loss: 1.1668


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [22/40], Step [340/391], Loss: 1.1951


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [22/40], Step [350/391], Loss: 1.1608


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [22/40], Step [360/391], Loss: 1.2029


 95%|█████████▍| 371/391 [01:02<00:03,  6.08it/s]

Epoch [22/40], Step [370/391], Loss: 1.1862


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [22/40], Step [380/391], Loss: 1.1586


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [22/40], Step [390/391], Loss: 1.2049





Test Accuracy of the student model on the test images: 50.04 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [23/40], Step [10/391], Loss: 1.1300


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [23/40], Step [20/391], Loss: 1.1181


  8%|▊         | 31/391 [00:06<00:59,  6.07it/s]

Epoch [23/40], Step [30/391], Loss: 1.1219


 10%|█         | 41/391 [00:07<00:57,  6.05it/s]

Epoch [23/40], Step [40/391], Loss: 1.1390


 13%|█▎        | 51/391 [00:09<00:56,  6.05it/s]

Epoch [23/40], Step [50/391], Loss: 1.0987


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [23/40], Step [60/391], Loss: 1.1064


 18%|█▊        | 71/391 [00:12<00:52,  6.05it/s]

Epoch [23/40], Step [70/391], Loss: 1.1068


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [23/40], Step [80/391], Loss: 1.1168


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [23/40], Step [90/391], Loss: 1.1072


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [23/40], Step [100/391], Loss: 1.0909


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [23/40], Step [110/391], Loss: 1.0854


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [23/40], Step [120/391], Loss: 1.0929


 34%|███▎      | 131/391 [00:22<00:42,  6.07it/s]

Epoch [23/40], Step [130/391], Loss: 1.0614


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [23/40], Step [140/391], Loss: 1.0895


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [23/40], Step [150/391], Loss: 1.0955


 41%|████      | 161/391 [00:27<00:37,  6.05it/s]

Epoch [23/40], Step [160/391], Loss: 1.1180


 44%|████▎     | 171/391 [00:29<00:36,  6.04it/s]

Epoch [23/40], Step [170/391], Loss: 1.0731


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [23/40], Step [180/391], Loss: 1.1138


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [23/40], Step [190/391], Loss: 1.1142


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [23/40], Step [200/391], Loss: 1.1367


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [23/40], Step [210/391], Loss: 1.1074


 57%|█████▋    | 221/391 [00:37<00:28,  6.05it/s]

Epoch [23/40], Step [220/391], Loss: 1.1320


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [23/40], Step [230/391], Loss: 1.1368


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [23/40], Step [240/391], Loss: 1.1244


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [23/40], Step [250/391], Loss: 1.1427


 67%|██████▋   | 261/391 [00:43<00:21,  6.05it/s]

Epoch [23/40], Step [260/391], Loss: 1.1548


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [23/40], Step [270/391], Loss: 1.1562


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [23/40], Step [280/391], Loss: 1.1728


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [23/40], Step [290/391], Loss: 1.1592


 77%|███████▋  | 301/391 [00:50<00:14,  6.04it/s]

Epoch [23/40], Step [300/391], Loss: 1.1328


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [23/40], Step [310/391], Loss: 1.1858


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [23/40], Step [320/391], Loss: 1.1368


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [23/40], Step [330/391], Loss: 1.1424


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [23/40], Step [340/391], Loss: 1.1294


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [23/40], Step [350/391], Loss: 1.1399


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [23/40], Step [360/391], Loss: 1.1388


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [23/40], Step [370/391], Loss: 1.1574


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [23/40], Step [380/391], Loss: 1.1398


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [23/40], Step [390/391], Loss: 1.1159





Test Accuracy of the student model on the test images: 54.07 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.73it/s]

Epoch [24/40], Step [10/391], Loss: 1.0996


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [24/40], Step [20/391], Loss: 1.0518


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [24/40], Step [30/391], Loss: 1.0760


 10%|█         | 41/391 [00:07<00:57,  6.05it/s]

Epoch [24/40], Step [40/391], Loss: 1.0741


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [24/40], Step [50/391], Loss: 1.0597


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [24/40], Step [60/391], Loss: 1.0547


 18%|█▊        | 71/391 [00:12<00:52,  6.07it/s]

Epoch [24/40], Step [70/391], Loss: 1.0761


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [24/40], Step [80/391], Loss: 1.0671


 23%|██▎       | 91/391 [00:16<00:49,  6.06it/s]

Epoch [24/40], Step [90/391], Loss: 1.0929


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [24/40], Step [100/391], Loss: 1.0675


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [24/40], Step [110/391], Loss: 1.0959


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [24/40], Step [120/391], Loss: 1.0880


 34%|███▎      | 131/391 [00:22<00:42,  6.08it/s]

Epoch [24/40], Step [130/391], Loss: 1.0870


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [24/40], Step [140/391], Loss: 1.0440


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [24/40], Step [150/391], Loss: 1.0932


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [24/40], Step [160/391], Loss: 1.0847


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [24/40], Step [170/391], Loss: 1.0373


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [24/40], Step [180/391], Loss: 1.0816


 49%|████▉     | 191/391 [00:32<00:33,  6.05it/s]

Epoch [24/40], Step [190/391], Loss: 1.0844


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [24/40], Step [200/391], Loss: 1.1016


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [24/40], Step [210/391], Loss: 1.0824


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [24/40], Step [220/391], Loss: 1.0701


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [24/40], Step [230/391], Loss: 1.1089


 62%|██████▏   | 241/391 [00:40<00:24,  6.07it/s]

Epoch [24/40], Step [240/391], Loss: 1.0892


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [24/40], Step [250/391], Loss: 1.1047


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [24/40], Step [260/391], Loss: 1.1019


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [24/40], Step [270/391], Loss: 1.0968


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [24/40], Step [280/391], Loss: 1.0772


 74%|███████▍  | 291/391 [00:49<00:16,  6.06it/s]

Epoch [24/40], Step [290/391], Loss: 1.0966


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [24/40], Step [300/391], Loss: 1.1068


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [24/40], Step [310/391], Loss: 1.0798


 82%|████████▏ | 321/391 [00:53<00:11,  6.08it/s]

Epoch [24/40], Step [320/391], Loss: 1.0963


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [24/40], Step [330/391], Loss: 1.0930


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [24/40], Step [340/391], Loss: 1.1209


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [24/40], Step [350/391], Loss: 1.0946


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [24/40], Step [360/391], Loss: 1.0976


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [24/40], Step [370/391], Loss: 1.1066


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [24/40], Step [380/391], Loss: 1.1116


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [24/40], Step [390/391], Loss: 1.0923





Test Accuracy of the student model on the test images: 52.48 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [25/40], Step [10/391], Loss: 1.0755


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [25/40], Step [20/391], Loss: 1.0593


  8%|▊         | 31/391 [00:06<00:59,  6.07it/s]

Epoch [25/40], Step [30/391], Loss: 1.0226


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [25/40], Step [40/391], Loss: 1.0515


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [25/40], Step [50/391], Loss: 1.0342


 16%|█▌        | 61/391 [00:11<00:54,  6.05it/s]

Epoch [25/40], Step [60/391], Loss: 1.0359


 18%|█▊        | 71/391 [00:12<00:52,  6.07it/s]

Epoch [25/40], Step [70/391], Loss: 1.0031


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [25/40], Step [80/391], Loss: 1.0141


 23%|██▎       | 91/391 [00:16<00:49,  6.06it/s]

Epoch [25/40], Step [90/391], Loss: 1.0157


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [25/40], Step [100/391], Loss: 1.0430


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [25/40], Step [110/391], Loss: 1.0111


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [25/40], Step [120/391], Loss: 1.0265


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [25/40], Step [130/391], Loss: 1.0285


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [25/40], Step [140/391], Loss: 1.0359


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [25/40], Step [150/391], Loss: 1.0435


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [25/40], Step [160/391], Loss: 1.0255


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [25/40], Step [170/391], Loss: 1.0494


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [25/40], Step [180/391], Loss: 1.0188


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [25/40], Step [190/391], Loss: 1.0124


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [25/40], Step [200/391], Loss: 1.0181


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [25/40], Step [210/391], Loss: 1.0230


 57%|█████▋    | 221/391 [00:37<00:28,  6.05it/s]

Epoch [25/40], Step [220/391], Loss: 1.0210


 59%|█████▉    | 231/391 [00:39<00:26,  6.07it/s]

Epoch [25/40], Step [230/391], Loss: 1.0511


 62%|██████▏   | 241/391 [00:40<00:24,  6.08it/s]

Epoch [25/40], Step [240/391], Loss: 1.0362


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [25/40], Step [250/391], Loss: 1.0610


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [25/40], Step [260/391], Loss: 1.0270


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [25/40], Step [270/391], Loss: 1.0663


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [25/40], Step [280/391], Loss: 1.0500


 74%|███████▍  | 291/391 [00:49<00:16,  6.05it/s]

Epoch [25/40], Step [290/391], Loss: 1.0301


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [25/40], Step [300/391], Loss: 1.0554


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [25/40], Step [310/391], Loss: 1.0796


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [25/40], Step [320/391], Loss: 1.0862


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [25/40], Step [330/391], Loss: 1.0456


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [25/40], Step [340/391], Loss: 1.0836


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [25/40], Step [350/391], Loss: 1.0943


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [25/40], Step [360/391], Loss: 1.0973


 95%|█████████▍| 371/391 [01:02<00:03,  6.07it/s]

Epoch [25/40], Step [370/391], Loss: 1.0866


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [25/40], Step [380/391], Loss: 1.0554


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [25/40], Step [390/391], Loss: 1.0612





Test Accuracy of the student model on the test images: 52.97 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [26/40], Step [10/391], Loss: 1.0423


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [26/40], Step [20/391], Loss: 1.0011


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [26/40], Step [30/391], Loss: 1.0044


 10%|█         | 41/391 [00:07<00:57,  6.04it/s]

Epoch [26/40], Step [40/391], Loss: 0.9902


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [26/40], Step [50/391], Loss: 0.9656


 16%|█▌        | 61/391 [00:11<00:54,  6.07it/s]

Epoch [26/40], Step [60/391], Loss: 0.9690


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [26/40], Step [70/391], Loss: 0.9649


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [26/40], Step [80/391], Loss: 0.9879


 23%|██▎       | 91/391 [00:15<00:49,  6.07it/s]

Epoch [26/40], Step [90/391], Loss: 0.9852


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [26/40], Step [100/391], Loss: 0.9893


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [26/40], Step [110/391], Loss: 1.0266


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [26/40], Step [120/391], Loss: 0.9893


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [26/40], Step [130/391], Loss: 0.9873


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [26/40], Step [140/391], Loss: 1.0029


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [26/40], Step [150/391], Loss: 1.0035


 41%|████      | 161/391 [00:27<00:37,  6.07it/s]

Epoch [26/40], Step [160/391], Loss: 1.0085


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [26/40], Step [170/391], Loss: 0.9817


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [26/40], Step [180/391], Loss: 0.9845


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [26/40], Step [190/391], Loss: 0.9948


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [26/40], Step [200/391], Loss: 1.0030


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [26/40], Step [210/391], Loss: 1.0213


 57%|█████▋    | 221/391 [00:37<00:28,  6.05it/s]

Epoch [26/40], Step [220/391], Loss: 1.0232


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [26/40], Step [230/391], Loss: 1.0272


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [26/40], Step [240/391], Loss: 0.9816


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [26/40], Step [250/391], Loss: 1.0168


 67%|██████▋   | 261/391 [00:44<00:21,  6.05it/s]

Epoch [26/40], Step [260/391], Loss: 1.0020


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [26/40], Step [270/391], Loss: 1.0246


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [26/40], Step [280/391], Loss: 1.0083


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [26/40], Step [290/391], Loss: 0.9882


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [26/40], Step [300/391], Loss: 1.0064


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [26/40], Step [310/391], Loss: 1.0171


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [26/40], Step [320/391], Loss: 1.0055


 85%|████████▍ | 331/391 [00:55<00:09,  6.04it/s]

Epoch [26/40], Step [330/391], Loss: 1.0266


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [26/40], Step [340/391], Loss: 1.0095


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [26/40], Step [350/391], Loss: 1.0098


 92%|█████████▏| 361/391 [01:00<00:04,  6.08it/s]

Epoch [26/40], Step [360/391], Loss: 1.0116


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [26/40], Step [370/391], Loss: 1.0123


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [26/40], Step [380/391], Loss: 1.0383


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [26/40], Step [390/391], Loss: 1.0103





Test Accuracy of the student model on the test images: 55.53 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [27/40], Step [10/391], Loss: 0.9765


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [27/40], Step [20/391], Loss: 0.9577


  8%|▊         | 31/391 [00:05<00:59,  6.06it/s]

Epoch [27/40], Step [30/391], Loss: 0.9483


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [27/40], Step [40/391], Loss: 0.9730


 13%|█▎        | 51/391 [00:09<00:56,  6.05it/s]

Epoch [27/40], Step [50/391], Loss: 0.9504


 16%|█▌        | 61/391 [00:10<00:54,  6.05it/s]

Epoch [27/40], Step [60/391], Loss: 0.9227


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [27/40], Step [70/391], Loss: 0.9524


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [27/40], Step [80/391], Loss: 0.9600


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [27/40], Step [90/391], Loss: 0.9408


 26%|██▌       | 101/391 [00:17<00:47,  6.07it/s]

Epoch [27/40], Step [100/391], Loss: 0.9458


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [27/40], Step [110/391], Loss: 0.9492


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [27/40], Step [120/391], Loss: 0.9562


 34%|███▎      | 131/391 [00:22<00:42,  6.05it/s]

Epoch [27/40], Step [130/391], Loss: 0.9623


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [27/40], Step [140/391], Loss: 0.9560


 39%|███▊      | 151/391 [00:25<00:39,  6.03it/s]

Epoch [27/40], Step [150/391], Loss: 0.9496


 41%|████      | 161/391 [00:27<00:38,  6.05it/s]

Epoch [27/40], Step [160/391], Loss: 0.9885


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [27/40], Step [170/391], Loss: 0.9691


 46%|████▋     | 181/391 [00:30<00:34,  6.05it/s]

Epoch [27/40], Step [180/391], Loss: 0.9867


 49%|████▉     | 191/391 [00:32<00:32,  6.06it/s]

Epoch [27/40], Step [190/391], Loss: 0.9587


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [27/40], Step [200/391], Loss: 0.9895


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [27/40], Step [210/391], Loss: 0.9620


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [27/40], Step [220/391], Loss: 0.9695


 59%|█████▉    | 231/391 [00:38<00:26,  6.07it/s]

Epoch [27/40], Step [230/391], Loss: 0.9597


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [27/40], Step [240/391], Loss: 0.9649


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [27/40], Step [250/391], Loss: 0.9842


 67%|██████▋   | 261/391 [00:43<00:21,  6.06it/s]

Epoch [27/40], Step [260/391], Loss: 0.9837


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [27/40], Step [270/391], Loss: 0.9887


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [27/40], Step [280/391], Loss: 0.9800


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [27/40], Step [290/391], Loss: 0.9955


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [27/40], Step [300/391], Loss: 0.9828


 80%|███████▉  | 311/391 [00:52<00:13,  6.07it/s]

Epoch [27/40], Step [310/391], Loss: 0.9921


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [27/40], Step [320/391], Loss: 0.9819


 85%|████████▍ | 331/391 [00:55<00:09,  6.08it/s]

Epoch [27/40], Step [330/391], Loss: 0.9781


 87%|████████▋ | 341/391 [00:57<00:08,  6.07it/s]

Epoch [27/40], Step [340/391], Loss: 0.9839


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [27/40], Step [350/391], Loss: 0.9832


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [27/40], Step [360/391], Loss: 0.9844


 95%|█████████▍| 371/391 [01:02<00:03,  6.08it/s]

Epoch [27/40], Step [370/391], Loss: 0.9694


 97%|█████████▋| 381/391 [01:03<00:01,  6.07it/s]

Epoch [27/40], Step [380/391], Loss: 0.9740


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [27/40], Step [390/391], Loss: 0.9727





Test Accuracy of the student model on the test images: 57.09 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [28/40], Step [10/391], Loss: 0.9367


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [28/40], Step [20/391], Loss: 0.9446


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [28/40], Step [30/391], Loss: 0.9290


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [28/40], Step [40/391], Loss: 0.9350


 13%|█▎        | 51/391 [00:09<00:56,  6.07it/s]

Epoch [28/40], Step [50/391], Loss: 0.8979


 16%|█▌        | 61/391 [00:10<00:54,  6.07it/s]

Epoch [28/40], Step [60/391], Loss: 0.9244


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [28/40], Step [70/391], Loss: 0.9222


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [28/40], Step [80/391], Loss: 0.9226


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [28/40], Step [90/391], Loss: 0.9104


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [28/40], Step [100/391], Loss: 0.9219


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [28/40], Step [110/391], Loss: 0.9071


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [28/40], Step [120/391], Loss: 0.9444


 34%|███▎      | 131/391 [00:22<00:43,  6.04it/s]

Epoch [28/40], Step [130/391], Loss: 0.9241


 36%|███▌      | 141/391 [00:24<00:41,  6.04it/s]

Epoch [28/40], Step [140/391], Loss: 0.9469


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [28/40], Step [150/391], Loss: 0.9242


 41%|████      | 161/391 [00:27<00:37,  6.05it/s]

Epoch [28/40], Step [160/391], Loss: 0.9220


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [28/40], Step [170/391], Loss: 0.9242


 46%|████▋     | 181/391 [00:30<00:34,  6.08it/s]

Epoch [28/40], Step [180/391], Loss: 0.9397


 49%|████▉     | 191/391 [00:32<00:32,  6.06it/s]

Epoch [28/40], Step [190/391], Loss: 0.9113


 51%|█████▏    | 201/391 [00:34<00:31,  6.07it/s]

Epoch [28/40], Step [200/391], Loss: 0.9224


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [28/40], Step [210/391], Loss: 0.9414


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [28/40], Step [220/391], Loss: 0.9394


 59%|█████▉    | 231/391 [00:38<00:26,  6.06it/s]

Epoch [28/40], Step [230/391], Loss: 0.9335


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [28/40], Step [240/391], Loss: 0.9380


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [28/40], Step [250/391], Loss: 0.9515


 67%|██████▋   | 261/391 [00:43<00:21,  6.05it/s]

Epoch [28/40], Step [260/391], Loss: 0.9460


 69%|██████▉   | 271/391 [00:45<00:19,  6.05it/s]

Epoch [28/40], Step [270/391], Loss: 0.9352


 72%|███████▏  | 281/391 [00:47<00:18,  6.05it/s]

Epoch [28/40], Step [280/391], Loss: 0.9266


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [28/40], Step [290/391], Loss: 0.9340


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [28/40], Step [300/391], Loss: 0.9433


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [28/40], Step [310/391], Loss: 0.9196


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [28/40], Step [320/391], Loss: 0.9566


 85%|████████▍ | 331/391 [00:55<00:09,  6.08it/s]

Epoch [28/40], Step [330/391], Loss: 0.9249


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [28/40], Step [340/391], Loss: 0.9524


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [28/40], Step [350/391], Loss: 0.9328


 92%|█████████▏| 361/391 [01:00<00:04,  6.05it/s]

Epoch [28/40], Step [360/391], Loss: 0.9412


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [28/40], Step [370/391], Loss: 0.9479


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [28/40], Step [380/391], Loss: 0.9620


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [28/40], Step [390/391], Loss: 0.9340





Test Accuracy of the student model on the test images: 57.15 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.81it/s]

Epoch [29/40], Step [10/391], Loss: 0.9003


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [29/40], Step [20/391], Loss: 0.9088


  8%|▊         | 31/391 [00:05<00:59,  6.06it/s]

Epoch [29/40], Step [30/391], Loss: 0.8890


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [29/40], Step [40/391], Loss: 0.8979


 13%|█▎        | 51/391 [00:09<00:55,  6.07it/s]

Epoch [29/40], Step [50/391], Loss: 0.8911


 16%|█▌        | 61/391 [00:10<00:54,  6.05it/s]

Epoch [29/40], Step [60/391], Loss: 0.9150


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [29/40], Step [70/391], Loss: 0.8907


 21%|██        | 81/391 [00:14<00:51,  6.07it/s]

Epoch [29/40], Step [80/391], Loss: 0.8833


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [29/40], Step [90/391], Loss: 0.8697


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [29/40], Step [100/391], Loss: 0.8795


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [29/40], Step [110/391], Loss: 0.8916


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [29/40], Step [120/391], Loss: 0.8995


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [29/40], Step [130/391], Loss: 0.8620


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [29/40], Step [140/391], Loss: 0.8712


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [29/40], Step [150/391], Loss: 0.8718


 41%|████      | 161/391 [00:27<00:37,  6.05it/s]

Epoch [29/40], Step [160/391], Loss: 0.8906


 44%|████▎     | 171/391 [00:29<00:36,  6.04it/s]

Epoch [29/40], Step [170/391], Loss: 0.8899


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [29/40], Step [180/391], Loss: 0.9066


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [29/40], Step [190/391], Loss: 0.8893


 51%|█████▏    | 201/391 [00:33<00:31,  6.06it/s]

Epoch [29/40], Step [200/391], Loss: 0.8702


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [29/40], Step [210/391], Loss: 0.8986


 57%|█████▋    | 221/391 [00:37<00:28,  6.05it/s]

Epoch [29/40], Step [220/391], Loss: 0.8820


 59%|█████▉    | 231/391 [00:38<00:26,  6.06it/s]

Epoch [29/40], Step [230/391], Loss: 0.9032


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [29/40], Step [240/391], Loss: 0.8948


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [29/40], Step [250/391], Loss: 0.9010


 67%|██████▋   | 261/391 [00:43<00:21,  6.05it/s]

Epoch [29/40], Step [260/391], Loss: 0.9200


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [29/40], Step [270/391], Loss: 0.9027


 72%|███████▏  | 281/391 [00:47<00:18,  6.08it/s]

Epoch [29/40], Step [280/391], Loss: 0.9000


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [29/40], Step [290/391], Loss: 0.9150


 77%|███████▋  | 301/391 [00:50<00:14,  6.07it/s]

Epoch [29/40], Step [300/391], Loss: 0.9020


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [29/40], Step [310/391], Loss: 0.8808


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [29/40], Step [320/391], Loss: 0.9045


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [29/40], Step [330/391], Loss: 0.8904


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [29/40], Step [340/391], Loss: 0.9093


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [29/40], Step [350/391], Loss: 0.9143


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [29/40], Step [360/391], Loss: 0.8985


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [29/40], Step [370/391], Loss: 0.9150


 97%|█████████▋| 381/391 [01:03<00:01,  6.08it/s]

Epoch [29/40], Step [380/391], Loss: 0.9036


100%|██████████| 391/391 [01:05<00:00,  5.99it/s]

Epoch [29/40], Step [390/391], Loss: 0.8946





Test Accuracy of the student model on the test images: 57.18 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.74it/s]

Epoch [30/40], Step [10/391], Loss: 0.8775


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [30/40], Step [20/391], Loss: 0.8519


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [30/40], Step [30/391], Loss: 0.8473


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [30/40], Step [40/391], Loss: 0.8427


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [30/40], Step [50/391], Loss: 0.8288


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [30/40], Step [60/391], Loss: 0.8426


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [30/40], Step [70/391], Loss: 0.8533


 21%|██        | 81/391 [00:14<00:51,  6.04it/s]

Epoch [30/40], Step [80/391], Loss: 0.8501


 23%|██▎       | 91/391 [00:16<00:49,  6.05it/s]

Epoch [30/40], Step [90/391], Loss: 0.8596


 26%|██▌       | 101/391 [00:17<00:47,  6.07it/s]

Epoch [30/40], Step [100/391], Loss: 0.8341


 28%|██▊       | 111/391 [00:19<00:46,  6.08it/s]

Epoch [30/40], Step [110/391], Loss: 0.8468


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [30/40], Step [120/391], Loss: 0.8586


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [30/40], Step [130/391], Loss: 0.8475


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [30/40], Step [140/391], Loss: 0.8487


 39%|███▊      | 151/391 [00:25<00:39,  6.04it/s]

Epoch [30/40], Step [150/391], Loss: 0.8376


 41%|████      | 161/391 [00:27<00:38,  6.02it/s]

Epoch [30/40], Step [160/391], Loss: 0.8502


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [30/40], Step [170/391], Loss: 0.8631


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [30/40], Step [180/391], Loss: 0.8705


 49%|████▉     | 191/391 [00:32<00:33,  6.05it/s]

Epoch [30/40], Step [190/391], Loss: 0.8618


 51%|█████▏    | 201/391 [00:34<00:31,  6.07it/s]

Epoch [30/40], Step [200/391], Loss: 0.8563


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [30/40], Step [210/391], Loss: 0.8759


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [30/40], Step [220/391], Loss: 0.8643


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [30/40], Step [230/391], Loss: 0.8458


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [30/40], Step [240/391], Loss: 0.8621


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [30/40], Step [250/391], Loss: 0.8557


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [30/40], Step [260/391], Loss: 0.8621


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [30/40], Step [270/391], Loss: 0.8467


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [30/40], Step [280/391], Loss: 0.8700


 74%|███████▍  | 291/391 [00:49<00:16,  6.06it/s]

Epoch [30/40], Step [290/391], Loss: 0.8702


 77%|███████▋  | 301/391 [00:50<00:14,  6.07it/s]

Epoch [30/40], Step [300/391], Loss: 0.8658


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [30/40], Step [310/391], Loss: 0.8604


 82%|████████▏ | 321/391 [00:54<00:11,  6.06it/s]

Epoch [30/40], Step [320/391], Loss: 0.8557


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [30/40], Step [330/391], Loss: 0.8693


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [30/40], Step [340/391], Loss: 0.8699


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [30/40], Step [350/391], Loss: 0.8570


 92%|█████████▏| 361/391 [01:00<00:04,  6.05it/s]

Epoch [30/40], Step [360/391], Loss: 0.8599


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [30/40], Step [370/391], Loss: 0.8573


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [30/40], Step [380/391], Loss: 0.8798


100%|██████████| 391/391 [01:05<00:00,  5.96it/s]

Epoch [30/40], Step [390/391], Loss: 0.8754





Test Accuracy of the student model on the test images: 58.33 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.76it/s]

Epoch [31/40], Step [10/391], Loss: 0.8339


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [31/40], Step [20/391], Loss: 0.8273


  8%|▊         | 31/391 [00:06<00:59,  6.07it/s]

Epoch [31/40], Step [30/391], Loss: 0.8120


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [31/40], Step [40/391], Loss: 0.8200


 13%|█▎        | 51/391 [00:09<00:56,  6.07it/s]

Epoch [31/40], Step [50/391], Loss: 0.8124


 16%|█▌        | 61/391 [00:11<00:54,  6.04it/s]

Epoch [31/40], Step [60/391], Loss: 0.8430


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [31/40], Step [70/391], Loss: 0.8198


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [31/40], Step [80/391], Loss: 0.8139


 23%|██▎       | 91/391 [00:16<00:49,  6.06it/s]

Epoch [31/40], Step [90/391], Loss: 0.8325


 26%|██▌       | 101/391 [00:17<00:49,  5.86it/s]

Epoch [31/40], Step [100/391], Loss: 0.8120


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [31/40], Step [110/391], Loss: 0.8385


 31%|███       | 121/391 [00:21<00:44,  6.07it/s]

Epoch [31/40], Step [120/391], Loss: 0.8269


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [31/40], Step [130/391], Loss: 0.8432


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [31/40], Step [140/391], Loss: 0.8108


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [31/40], Step [150/391], Loss: 0.8341


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [31/40], Step [160/391], Loss: 0.8226


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [31/40], Step [170/391], Loss: 0.8220


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [31/40], Step [180/391], Loss: 0.8356


 49%|████▉     | 191/391 [00:32<00:33,  6.05it/s]

Epoch [31/40], Step [190/391], Loss: 0.8148


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [31/40], Step [200/391], Loss: 0.8296


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [31/40], Step [210/391], Loss: 0.8313


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [31/40], Step [220/391], Loss: 0.8430


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [31/40], Step [230/391], Loss: 0.8244


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [31/40], Step [240/391], Loss: 0.8423


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [31/40], Step [250/391], Loss: 0.8372


 67%|██████▋   | 261/391 [00:44<00:21,  6.05it/s]

Epoch [31/40], Step [260/391], Loss: 0.8100


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [31/40], Step [270/391], Loss: 0.8191


 72%|███████▏  | 281/391 [00:47<00:18,  6.04it/s]

Epoch [31/40], Step [280/391], Loss: 0.8271


 74%|███████▍  | 291/391 [00:49<00:16,  6.05it/s]

Epoch [31/40], Step [290/391], Loss: 0.8198


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [31/40], Step [300/391], Loss: 0.8387


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [31/40], Step [310/391], Loss: 0.8235


 82%|████████▏ | 321/391 [00:54<00:11,  6.06it/s]

Epoch [31/40], Step [320/391], Loss: 0.8308


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [31/40], Step [330/391], Loss: 0.8272


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [31/40], Step [340/391], Loss: 0.8174


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [31/40], Step [350/391], Loss: 0.8388


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [31/40], Step [360/391], Loss: 0.8412


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [31/40], Step [370/391], Loss: 0.8263


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [31/40], Step [380/391], Loss: 0.8398


100%|██████████| 391/391 [01:05<00:00,  5.96it/s]

Epoch [31/40], Step [390/391], Loss: 0.8319





Test Accuracy of the student model on the test images: 60.41 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.79it/s]

Epoch [32/40], Step [10/391], Loss: 0.7867


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [32/40], Step [20/391], Loss: 0.8014


  8%|▊         | 31/391 [00:05<00:59,  6.06it/s]

Epoch [32/40], Step [30/391], Loss: 0.7866


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [32/40], Step [40/391], Loss: 0.7836


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [32/40], Step [50/391], Loss: 0.7926


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [32/40], Step [60/391], Loss: 0.7740


 18%|█▊        | 71/391 [00:12<00:52,  6.05it/s]

Epoch [32/40], Step [70/391], Loss: 0.7774


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [32/40], Step [80/391], Loss: 0.7728


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [32/40], Step [90/391], Loss: 0.7850


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [32/40], Step [100/391], Loss: 0.7809


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [32/40], Step [110/391], Loss: 0.7856


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [32/40], Step [120/391], Loss: 0.7950


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [32/40], Step [130/391], Loss: 0.7749


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [32/40], Step [140/391], Loss: 0.7879


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [32/40], Step [150/391], Loss: 0.8095


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [32/40], Step [160/391], Loss: 0.7904


 44%|████▎     | 171/391 [00:29<00:36,  6.05it/s]

Epoch [32/40], Step [170/391], Loss: 0.7974


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [32/40], Step [180/391], Loss: 0.7774


 49%|████▉     | 191/391 [00:32<00:33,  6.05it/s]

Epoch [32/40], Step [190/391], Loss: 0.7969


 51%|█████▏    | 201/391 [00:33<00:31,  6.07it/s]

Epoch [32/40], Step [200/391], Loss: 0.7911


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [32/40], Step [210/391], Loss: 0.8018


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [32/40], Step [220/391], Loss: 0.7863


 59%|█████▉    | 231/391 [00:38<00:26,  6.06it/s]

Epoch [32/40], Step [230/391], Loss: 0.8013


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [32/40], Step [240/391], Loss: 0.8018


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [32/40], Step [250/391], Loss: 0.7926


 67%|██████▋   | 261/391 [00:43<00:21,  6.06it/s]

Epoch [32/40], Step [260/391], Loss: 0.7961


 69%|██████▉   | 271/391 [00:45<00:19,  6.04it/s]

Epoch [32/40], Step [270/391], Loss: 0.7883


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [32/40], Step [280/391], Loss: 0.8056


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [32/40], Step [290/391], Loss: 0.7891


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [32/40], Step [300/391], Loss: 0.8072


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [32/40], Step [310/391], Loss: 0.8044


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [32/40], Step [320/391], Loss: 0.8099


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [32/40], Step [330/391], Loss: 0.8085


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [32/40], Step [340/391], Loss: 0.7972


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [32/40], Step [350/391], Loss: 0.7714


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [32/40], Step [360/391], Loss: 0.8086


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [32/40], Step [370/391], Loss: 0.7984


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [32/40], Step [380/391], Loss: 0.8241


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [32/40], Step [390/391], Loss: 0.7987





Test Accuracy of the student model on the test images: 61.14 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.80it/s]

Epoch [33/40], Step [10/391], Loss: 0.7523


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [33/40], Step [20/391], Loss: 0.7361


  8%|▊         | 31/391 [00:05<00:59,  6.07it/s]

Epoch [33/40], Step [30/391], Loss: 0.7485


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [33/40], Step [40/391], Loss: 0.7460


 13%|█▎        | 51/391 [00:09<00:56,  6.05it/s]

Epoch [33/40], Step [50/391], Loss: 0.7573


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [33/40], Step [60/391], Loss: 0.7640


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [33/40], Step [70/391], Loss: 0.7657


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [33/40], Step [80/391], Loss: 0.7539


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [33/40], Step [90/391], Loss: 0.7507


 26%|██▌       | 101/391 [00:17<00:47,  6.07it/s]

Epoch [33/40], Step [100/391], Loss: 0.7512


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [33/40], Step [110/391], Loss: 0.7575


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [33/40], Step [120/391], Loss: 0.7582


 34%|███▎      | 131/391 [00:22<00:42,  6.05it/s]

Epoch [33/40], Step [130/391], Loss: 0.7751


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [33/40], Step [140/391], Loss: 0.7587


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [33/40], Step [150/391], Loss: 0.7613


 41%|████      | 161/391 [00:27<00:37,  6.05it/s]

Epoch [33/40], Step [160/391], Loss: 0.7411


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [33/40], Step [170/391], Loss: 0.7525


 46%|████▋     | 181/391 [00:30<00:34,  6.07it/s]

Epoch [33/40], Step [180/391], Loss: 0.7721


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [33/40], Step [190/391], Loss: 0.7532


 51%|█████▏    | 201/391 [00:33<00:31,  6.06it/s]

Epoch [33/40], Step [200/391], Loss: 0.7565


 54%|█████▍    | 211/391 [00:35<00:29,  6.07it/s]

Epoch [33/40], Step [210/391], Loss: 0.7605


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [33/40], Step [220/391], Loss: 0.7510


 59%|█████▉    | 231/391 [00:38<00:26,  6.05it/s]

Epoch [33/40], Step [230/391], Loss: 0.7789


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [33/40], Step [240/391], Loss: 0.7480


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [33/40], Step [250/391], Loss: 0.7677


 67%|██████▋   | 261/391 [00:43<00:21,  6.04it/s]

Epoch [33/40], Step [260/391], Loss: 0.7695


 69%|██████▉   | 271/391 [00:45<00:19,  6.05it/s]

Epoch [33/40], Step [270/391], Loss: 0.7826


 72%|███████▏  | 281/391 [00:47<00:18,  6.05it/s]

Epoch [33/40], Step [280/391], Loss: 0.7642


 74%|███████▍  | 291/391 [00:48<00:16,  6.07it/s]

Epoch [33/40], Step [290/391], Loss: 0.7590


 77%|███████▋  | 301/391 [00:50<00:14,  6.07it/s]

Epoch [33/40], Step [300/391], Loss: 0.7722


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [33/40], Step [310/391], Loss: 0.7776


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [33/40], Step [320/391], Loss: 0.7682


 85%|████████▍ | 331/391 [00:55<00:09,  6.07it/s]

Epoch [33/40], Step [330/391], Loss: 0.7730


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [33/40], Step [340/391], Loss: 0.7802


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [33/40], Step [350/391], Loss: 0.7773


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [33/40], Step [360/391], Loss: 0.7567


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [33/40], Step [370/391], Loss: 0.7768


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [33/40], Step [380/391], Loss: 0.7783


100%|██████████| 391/391 [01:05<00:00,  5.99it/s]

Epoch [33/40], Step [390/391], Loss: 0.7492





Test Accuracy of the student model on the test images: 61.74 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.77it/s]

Epoch [34/40], Step [10/391], Loss: 0.7518


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [34/40], Step [20/391], Loss: 0.7343


  8%|▊         | 31/391 [00:06<00:59,  6.04it/s]

Epoch [34/40], Step [30/391], Loss: 0.7452


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [34/40], Step [40/391], Loss: 0.7414


 13%|█▎        | 51/391 [00:09<00:56,  6.04it/s]

Epoch [34/40], Step [50/391], Loss: 0.7323


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [34/40], Step [60/391], Loss: 0.7509


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [34/40], Step [70/391], Loss: 0.7348


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [34/40], Step [80/391], Loss: 0.7289


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [34/40], Step [90/391], Loss: 0.7487


 26%|██▌       | 101/391 [00:17<00:48,  6.04it/s]

Epoch [34/40], Step [100/391], Loss: 0.7265


 28%|██▊       | 111/391 [00:19<00:46,  6.05it/s]

Epoch [34/40], Step [110/391], Loss: 0.7237


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [34/40], Step [120/391], Loss: 0.7199


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [34/40], Step [130/391], Loss: 0.7329


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [34/40], Step [140/391], Loss: 0.7335


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [34/40], Step [150/391], Loss: 0.7396


 41%|████      | 161/391 [00:27<00:37,  6.05it/s]

Epoch [34/40], Step [160/391], Loss: 0.7403


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [34/40], Step [170/391], Loss: 0.7468


 46%|████▋     | 181/391 [00:30<00:34,  6.04it/s]

Epoch [34/40], Step [180/391], Loss: 0.7266


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [34/40], Step [190/391], Loss: 0.7405


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [34/40], Step [200/391], Loss: 0.7496


 54%|█████▍    | 211/391 [00:35<00:29,  6.07it/s]

Epoch [34/40], Step [210/391], Loss: 0.7347


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [34/40], Step [220/391], Loss: 0.7310


 59%|█████▉    | 231/391 [00:39<00:26,  6.07it/s]

Epoch [34/40], Step [230/391], Loss: 0.7361


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [34/40], Step [240/391], Loss: 0.7266


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [34/40], Step [250/391], Loss: 0.7409


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [34/40], Step [260/391], Loss: 0.7403


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [34/40], Step [270/391], Loss: 0.7327


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [34/40], Step [280/391], Loss: 0.7540


 74%|███████▍  | 291/391 [00:48<00:16,  6.07it/s]

Epoch [34/40], Step [290/391], Loss: 0.7399


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [34/40], Step [300/391], Loss: 0.7318


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [34/40], Step [310/391], Loss: 0.7352


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [34/40], Step [320/391], Loss: 0.7493


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [34/40], Step [330/391], Loss: 0.7380


 87%|████████▋ | 341/391 [00:57<00:08,  6.07it/s]

Epoch [34/40], Step [340/391], Loss: 0.7465


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [34/40], Step [350/391], Loss: 0.7290


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [34/40], Step [360/391], Loss: 0.7483


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [34/40], Step [370/391], Loss: 0.7478


 97%|█████████▋| 381/391 [01:03<00:01,  6.07it/s]

Epoch [34/40], Step [380/391], Loss: 0.7278


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [34/40], Step [390/391], Loss: 0.7220





Test Accuracy of the student model on the test images: 62.01 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [35/40], Step [10/391], Loss: 0.7264


  5%|▌         | 21/391 [00:04<01:01,  6.05it/s]

Epoch [35/40], Step [20/391], Loss: 0.7076


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [35/40], Step [30/391], Loss: 0.7083


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [35/40], Step [40/391], Loss: 0.7080


 13%|█▎        | 51/391 [00:09<00:56,  6.07it/s]

Epoch [35/40], Step [50/391], Loss: 0.7036


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [35/40], Step [60/391], Loss: 0.7095


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [35/40], Step [70/391], Loss: 0.7111


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [35/40], Step [80/391], Loss: 0.7068


 23%|██▎       | 91/391 [00:16<00:49,  6.07it/s]

Epoch [35/40], Step [90/391], Loss: 0.7239


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [35/40], Step [100/391], Loss: 0.7185


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [35/40], Step [110/391], Loss: 0.7315


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [35/40], Step [120/391], Loss: 0.7088


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [35/40], Step [130/391], Loss: 0.7143


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [35/40], Step [140/391], Loss: 0.7072


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [35/40], Step [150/391], Loss: 0.7151


 41%|████      | 161/391 [00:27<00:37,  6.07it/s]

Epoch [35/40], Step [160/391], Loss: 0.7105


 44%|████▎     | 171/391 [00:29<00:36,  6.04it/s]

Epoch [35/40], Step [170/391], Loss: 0.7129


 46%|████▋     | 181/391 [00:30<00:34,  6.05it/s]

Epoch [35/40], Step [180/391], Loss: 0.7070


 49%|████▉     | 191/391 [00:32<00:32,  6.08it/s]

Epoch [35/40], Step [190/391], Loss: 0.7089


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [35/40], Step [200/391], Loss: 0.7156


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [35/40], Step [210/391], Loss: 0.6944


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [35/40], Step [220/391], Loss: 0.7063


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [35/40], Step [230/391], Loss: 0.7197


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [35/40], Step [240/391], Loss: 0.7040


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [35/40], Step [250/391], Loss: 0.7353


 67%|██████▋   | 261/391 [00:44<00:21,  6.07it/s]

Epoch [35/40], Step [260/391], Loss: 0.7217


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [35/40], Step [270/391], Loss: 0.7216


 72%|███████▏  | 281/391 [00:47<00:18,  6.05it/s]

Epoch [35/40], Step [280/391], Loss: 0.7194


 74%|███████▍  | 291/391 [00:49<00:16,  6.05it/s]

Epoch [35/40], Step [290/391], Loss: 0.7190


 77%|███████▋  | 301/391 [00:50<00:14,  6.04it/s]

Epoch [35/40], Step [300/391], Loss: 0.7152


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [35/40], Step [310/391], Loss: 0.7180


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [35/40], Step [320/391], Loss: 0.7059


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [35/40], Step [330/391], Loss: 0.7108


 87%|████████▋ | 341/391 [00:57<00:08,  6.07it/s]

Epoch [35/40], Step [340/391], Loss: 0.7281


 90%|████████▉ | 351/391 [00:58<00:06,  6.07it/s]

Epoch [35/40], Step [350/391], Loss: 0.7160


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [35/40], Step [360/391], Loss: 0.7168


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [35/40], Step [370/391], Loss: 0.7145


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [35/40], Step [380/391], Loss: 0.7054


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [35/40], Step [390/391], Loss: 0.7040





Test Accuracy of the student model on the test images: 62.58 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.78it/s]

Epoch [36/40], Step [10/391], Loss: 0.6893


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [36/40], Step [20/391], Loss: 0.6898


  8%|▊         | 31/391 [00:06<00:59,  6.07it/s]

Epoch [36/40], Step [30/391], Loss: 0.7048


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [36/40], Step [40/391], Loss: 0.6861


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [36/40], Step [50/391], Loss: 0.6966


 16%|█▌        | 61/391 [00:11<00:54,  6.05it/s]

Epoch [36/40], Step [60/391], Loss: 0.6807


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [36/40], Step [70/391], Loss: 0.6946


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [36/40], Step [80/391], Loss: 0.6928


 23%|██▎       | 91/391 [00:15<00:49,  6.04it/s]

Epoch [36/40], Step [90/391], Loss: 0.6924


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [36/40], Step [100/391], Loss: 0.6955


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [36/40], Step [110/391], Loss: 0.6937


 31%|███       | 121/391 [00:20<00:44,  6.07it/s]

Epoch [36/40], Step [120/391], Loss: 0.6908


 34%|███▎      | 131/391 [00:22<00:42,  6.07it/s]

Epoch [36/40], Step [130/391], Loss: 0.7088


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [36/40], Step [140/391], Loss: 0.6739


 39%|███▊      | 151/391 [00:25<00:39,  6.05it/s]

Epoch [36/40], Step [150/391], Loss: 0.6942


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [36/40], Step [160/391], Loss: 0.6980


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [36/40], Step [170/391], Loss: 0.6858


 46%|████▋     | 181/391 [00:30<00:34,  6.04it/s]

Epoch [36/40], Step [180/391], Loss: 0.6941


 49%|████▉     | 191/391 [00:32<00:33,  6.06it/s]

Epoch [36/40], Step [190/391], Loss: 0.7032


 51%|█████▏    | 201/391 [00:34<00:32,  5.88it/s]

Epoch [36/40], Step [200/391], Loss: 0.6817


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [36/40], Step [210/391], Loss: 0.6949


 57%|█████▋    | 221/391 [00:37<00:28,  6.05it/s]

Epoch [36/40], Step [220/391], Loss: 0.7023


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [36/40], Step [230/391], Loss: 0.6788


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [36/40], Step [240/391], Loss: 0.6833


 64%|██████▍   | 251/391 [00:42<00:23,  6.05it/s]

Epoch [36/40], Step [250/391], Loss: 0.7049


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [36/40], Step [260/391], Loss: 0.6963


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [36/40], Step [270/391], Loss: 0.7006


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [36/40], Step [280/391], Loss: 0.6851


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [36/40], Step [290/391], Loss: 0.6914


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [36/40], Step [300/391], Loss: 0.7000


 80%|███████▉  | 311/391 [00:52<00:13,  6.06it/s]

Epoch [36/40], Step [310/391], Loss: 0.7030


 82%|████████▏ | 321/391 [00:53<00:11,  6.07it/s]

Epoch [36/40], Step [320/391], Loss: 0.7014


 85%|████████▍ | 331/391 [00:55<00:09,  6.04it/s]

Epoch [36/40], Step [330/391], Loss: 0.6980


 87%|████████▋ | 341/391 [00:57<00:08,  6.05it/s]

Epoch [36/40], Step [340/391], Loss: 0.6863


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [36/40], Step [350/391], Loss: 0.7049


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [36/40], Step [360/391], Loss: 0.6853


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [36/40], Step [370/391], Loss: 0.6806


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [36/40], Step [380/391], Loss: 0.6960


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [36/40], Step [390/391], Loss: 0.6864





Test Accuracy of the student model on the test images: 62.72 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.79it/s]

Epoch [37/40], Step [10/391], Loss: 0.6886


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [37/40], Step [20/391], Loss: 0.6843


  8%|▊         | 31/391 [00:06<00:59,  6.07it/s]

Epoch [37/40], Step [30/391], Loss: 0.6801


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [37/40], Step [40/391], Loss: 0.6665


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [37/40], Step [50/391], Loss: 0.6738


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [37/40], Step [60/391], Loss: 0.6665


 18%|█▊        | 71/391 [00:12<00:52,  6.05it/s]

Epoch [37/40], Step [70/391], Loss: 0.6693


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [37/40], Step [80/391], Loss: 0.6883


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [37/40], Step [90/391], Loss: 0.6646


 26%|██▌       | 101/391 [00:17<00:47,  6.06it/s]

Epoch [37/40], Step [100/391], Loss: 0.6852


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [37/40], Step [110/391], Loss: 0.6810


 31%|███       | 121/391 [00:20<00:44,  6.02it/s]

Epoch [37/40], Step [120/391], Loss: 0.6788


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [37/40], Step [130/391], Loss: 0.6693


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [37/40], Step [140/391], Loss: 0.6846


 39%|███▊      | 151/391 [00:25<00:39,  6.03it/s]

Epoch [37/40], Step [150/391], Loss: 0.6946


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [37/40], Step [160/391], Loss: 0.6743


 44%|████▎     | 171/391 [00:29<00:36,  6.04it/s]

Epoch [37/40], Step [170/391], Loss: 0.6730


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [37/40], Step [180/391], Loss: 0.6771


 49%|████▉     | 191/391 [00:32<00:33,  6.04it/s]

Epoch [37/40], Step [190/391], Loss: 0.6738


 51%|█████▏    | 201/391 [00:34<00:31,  6.06it/s]

Epoch [37/40], Step [200/391], Loss: 0.6874


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [37/40], Step [210/391], Loss: 0.6961


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [37/40], Step [220/391], Loss: 0.6779


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [37/40], Step [230/391], Loss: 0.6778


 62%|██████▏   | 241/391 [00:40<00:24,  6.05it/s]

Epoch [37/40], Step [240/391], Loss: 0.6772


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [37/40], Step [250/391], Loss: 0.6634


 67%|██████▋   | 261/391 [00:43<00:21,  6.05it/s]

Epoch [37/40], Step [260/391], Loss: 0.6826


 69%|██████▉   | 271/391 [00:45<00:19,  6.07it/s]

Epoch [37/40], Step [270/391], Loss: 0.6924


 72%|███████▏  | 281/391 [00:47<00:18,  6.07it/s]

Epoch [37/40], Step [280/391], Loss: 0.6891


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [37/40], Step [290/391], Loss: 0.6707


 77%|███████▋  | 301/391 [00:50<00:14,  6.05it/s]

Epoch [37/40], Step [300/391], Loss: 0.6802


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [37/40], Step [310/391], Loss: 0.6974


 82%|████████▏ | 321/391 [00:53<00:11,  6.04it/s]

Epoch [37/40], Step [320/391], Loss: 0.6640


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [37/40], Step [330/391], Loss: 0.6806


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [37/40], Step [340/391], Loss: 0.6914


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [37/40], Step [350/391], Loss: 0.6771


 92%|█████████▏| 361/391 [01:00<00:04,  6.07it/s]

Epoch [37/40], Step [360/391], Loss: 0.6757


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [37/40], Step [370/391], Loss: 0.6731


 97%|█████████▋| 381/391 [01:03<00:01,  6.05it/s]

Epoch [37/40], Step [380/391], Loss: 0.6648


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [37/40], Step [390/391], Loss: 0.6786





Test Accuracy of the student model on the test images: 63.30 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:06,  5.75it/s]

Epoch [38/40], Step [10/391], Loss: 0.6533


  5%|▌         | 21/391 [00:04<01:01,  6.06it/s]

Epoch [38/40], Step [20/391], Loss: 0.6671


  8%|▊         | 31/391 [00:06<00:59,  6.05it/s]

Epoch [38/40], Step [30/391], Loss: 0.6585


 10%|█         | 41/391 [00:07<00:57,  6.07it/s]

Epoch [38/40], Step [40/391], Loss: 0.6513


 13%|█▎        | 51/391 [00:09<00:56,  6.05it/s]

Epoch [38/40], Step [50/391], Loss: 0.6626


 16%|█▌        | 61/391 [00:11<00:54,  6.06it/s]

Epoch [38/40], Step [60/391], Loss: 0.6716


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [38/40], Step [70/391], Loss: 0.6646


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [38/40], Step [80/391], Loss: 0.6605


 23%|██▎       | 91/391 [00:16<00:49,  6.04it/s]

Epoch [38/40], Step [90/391], Loss: 0.6432


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [38/40], Step [100/391], Loss: 0.6747


 28%|██▊       | 111/391 [00:19<00:46,  6.07it/s]

Epoch [38/40], Step [110/391], Loss: 0.6676


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [38/40], Step [120/391], Loss: 0.6653


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [38/40], Step [130/391], Loss: 0.6784


 36%|███▌      | 141/391 [00:24<00:41,  6.06it/s]

Epoch [38/40], Step [140/391], Loss: 0.6577


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [38/40], Step [150/391], Loss: 0.6651


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [38/40], Step [160/391], Loss: 0.6697


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [38/40], Step [170/391], Loss: 0.6735


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [38/40], Step [180/391], Loss: 0.6797


 49%|████▉     | 191/391 [00:32<00:33,  6.04it/s]

Epoch [38/40], Step [190/391], Loss: 0.6570


 51%|█████▏    | 201/391 [00:34<00:31,  6.05it/s]

Epoch [38/40], Step [200/391], Loss: 0.6598


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [38/40], Step [210/391], Loss: 0.6666


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [38/40], Step [220/391], Loss: 0.6738


 59%|█████▉    | 231/391 [00:39<00:26,  6.05it/s]

Epoch [38/40], Step [230/391], Loss: 0.6774


 62%|██████▏   | 241/391 [00:40<00:24,  6.07it/s]

Epoch [38/40], Step [240/391], Loss: 0.6598


 64%|██████▍   | 251/391 [00:42<00:23,  6.07it/s]

Epoch [38/40], Step [250/391], Loss: 0.6780


 67%|██████▋   | 261/391 [00:44<00:21,  6.06it/s]

Epoch [38/40], Step [260/391], Loss: 0.6632


 69%|██████▉   | 271/391 [00:45<00:19,  6.05it/s]

Epoch [38/40], Step [270/391], Loss: 0.6666


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [38/40], Step [280/391], Loss: 0.6611


 74%|███████▍  | 291/391 [00:49<00:16,  6.04it/s]

Epoch [38/40], Step [290/391], Loss: 0.6732


 77%|███████▋  | 301/391 [00:50<00:14,  6.04it/s]

Epoch [38/40], Step [300/391], Loss: 0.6758


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [38/40], Step [310/391], Loss: 0.6611


 82%|████████▏ | 321/391 [00:53<00:11,  6.05it/s]

Epoch [38/40], Step [320/391], Loss: 0.6678


 85%|████████▍ | 331/391 [00:55<00:09,  6.06it/s]

Epoch [38/40], Step [330/391], Loss: 0.6611


 87%|████████▋ | 341/391 [00:57<00:08,  6.04it/s]

Epoch [38/40], Step [340/391], Loss: 0.6632


 90%|████████▉ | 351/391 [00:58<00:06,  6.05it/s]

Epoch [38/40], Step [350/391], Loss: 0.6738


 92%|█████████▏| 361/391 [01:00<00:04,  6.06it/s]

Epoch [38/40], Step [360/391], Loss: 0.6617


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [38/40], Step [370/391], Loss: 0.6674


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [38/40], Step [380/391], Loss: 0.6828


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [38/40], Step [390/391], Loss: 0.6696





Test Accuracy of the student model on the test images: 63.39 %
Saved best model to interim_ta3.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.76it/s]

Epoch [39/40], Step [10/391], Loss: 0.6569


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [39/40], Step [20/391], Loss: 0.6708


  8%|▊         | 31/391 [00:06<00:59,  6.06it/s]

Epoch [39/40], Step [30/391], Loss: 0.6464


 10%|█         | 41/391 [00:07<00:57,  6.04it/s]

Epoch [39/40], Step [40/391], Loss: 0.6550


 13%|█▎        | 51/391 [00:09<00:56,  6.06it/s]

Epoch [39/40], Step [50/391], Loss: 0.6625


 16%|█▌        | 61/391 [00:11<00:54,  6.07it/s]

Epoch [39/40], Step [60/391], Loss: 0.6527


 18%|█▊        | 71/391 [00:12<00:52,  6.05it/s]

Epoch [39/40], Step [70/391], Loss: 0.6478


 21%|██        | 81/391 [00:14<00:51,  6.05it/s]

Epoch [39/40], Step [80/391], Loss: 0.6665


 23%|██▎       | 91/391 [00:16<00:49,  6.05it/s]

Epoch [39/40], Step [90/391], Loss: 0.6566


 26%|██▌       | 101/391 [00:17<00:47,  6.07it/s]

Epoch [39/40], Step [100/391], Loss: 0.6599


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [39/40], Step [110/391], Loss: 0.6628


 31%|███       | 121/391 [00:20<00:44,  6.06it/s]

Epoch [39/40], Step [120/391], Loss: 0.6617


 34%|███▎      | 131/391 [00:22<00:42,  6.06it/s]

Epoch [39/40], Step [130/391], Loss: 0.6668


 36%|███▌      | 141/391 [00:24<00:41,  6.07it/s]

Epoch [39/40], Step [140/391], Loss: 0.6487


 39%|███▊      | 151/391 [00:25<00:39,  6.06it/s]

Epoch [39/40], Step [150/391], Loss: 0.6628


 41%|████      | 161/391 [00:27<00:38,  6.05it/s]

Epoch [39/40], Step [160/391], Loss: 0.6559


 44%|████▎     | 171/391 [00:29<00:36,  6.07it/s]

Epoch [39/40], Step [170/391], Loss: 0.6687


 46%|████▋     | 181/391 [00:30<00:34,  6.02it/s]

Epoch [39/40], Step [180/391], Loss: 0.6628


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [39/40], Step [190/391], Loss: 0.6684


 51%|█████▏    | 201/391 [00:34<00:31,  6.07it/s]

Epoch [39/40], Step [200/391], Loss: 0.6512


 54%|█████▍    | 211/391 [00:35<00:29,  6.05it/s]

Epoch [39/40], Step [210/391], Loss: 0.6538


 57%|█████▋    | 221/391 [00:37<00:28,  6.07it/s]

Epoch [39/40], Step [220/391], Loss: 0.6605


 59%|█████▉    | 231/391 [00:39<00:26,  6.06it/s]

Epoch [39/40], Step [230/391], Loss: 0.6666


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [39/40], Step [240/391], Loss: 0.6541


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [39/40], Step [250/391], Loss: 0.6672


 67%|██████▋   | 261/391 [00:44<00:21,  6.07it/s]

Epoch [39/40], Step [260/391], Loss: 0.6669


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [39/40], Step [270/391], Loss: 0.6647


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [39/40], Step [280/391], Loss: 0.6388


 74%|███████▍  | 291/391 [00:48<00:16,  6.06it/s]

Epoch [39/40], Step [290/391], Loss: 0.6527


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [39/40], Step [300/391], Loss: 0.6615


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [39/40], Step [310/391], Loss: 0.6563


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [39/40], Step [320/391], Loss: 0.6513


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [39/40], Step [330/391], Loss: 0.6556


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [39/40], Step [340/391], Loss: 0.6572


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [39/40], Step [350/391], Loss: 0.6676


 92%|█████████▏| 361/391 [01:00<00:04,  6.04it/s]

Epoch [39/40], Step [360/391], Loss: 0.6512


 95%|█████████▍| 371/391 [01:02<00:03,  6.06it/s]

Epoch [39/40], Step [370/391], Loss: 0.6741


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [39/40], Step [380/391], Loss: 0.6670


100%|██████████| 391/391 [01:05<00:00,  5.97it/s]

Epoch [39/40], Step [390/391], Loss: 0.6647





Test Accuracy of the student model on the test images: 63.27 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:05,  5.79it/s]

Epoch [40/40], Step [10/391], Loss: 0.6584


  5%|▌         | 21/391 [00:04<01:01,  6.04it/s]

Epoch [40/40], Step [20/391], Loss: 0.6436


  8%|▊         | 31/391 [00:05<00:59,  6.04it/s]

Epoch [40/40], Step [30/391], Loss: 0.6634


 10%|█         | 41/391 [00:07<00:57,  6.06it/s]

Epoch [40/40], Step [40/391], Loss: 0.6614


 13%|█▎        | 51/391 [00:09<00:56,  6.07it/s]

Epoch [40/40], Step [50/391], Loss: 0.6500


 16%|█▌        | 61/391 [00:10<00:54,  6.06it/s]

Epoch [40/40], Step [60/391], Loss: 0.6460


 18%|█▊        | 71/391 [00:12<00:52,  6.06it/s]

Epoch [40/40], Step [70/391], Loss: 0.6470


 21%|██        | 81/391 [00:14<00:51,  6.06it/s]

Epoch [40/40], Step [80/391], Loss: 0.6513


 23%|██▎       | 91/391 [00:15<00:49,  6.06it/s]

Epoch [40/40], Step [90/391], Loss: 0.6543


 26%|██▌       | 101/391 [00:17<00:47,  6.05it/s]

Epoch [40/40], Step [100/391], Loss: 0.6495


 28%|██▊       | 111/391 [00:19<00:46,  6.06it/s]

Epoch [40/40], Step [110/391], Loss: 0.6544


 31%|███       | 121/391 [00:20<00:44,  6.05it/s]

Epoch [40/40], Step [120/391], Loss: 0.6670


 34%|███▎      | 131/391 [00:22<00:42,  6.05it/s]

Epoch [40/40], Step [130/391], Loss: 0.6483


 36%|███▌      | 141/391 [00:24<00:41,  6.05it/s]

Epoch [40/40], Step [140/391], Loss: 0.6543


 39%|███▊      | 151/391 [00:25<00:39,  6.07it/s]

Epoch [40/40], Step [150/391], Loss: 0.6559


 41%|████      | 161/391 [00:27<00:37,  6.06it/s]

Epoch [40/40], Step [160/391], Loss: 0.6570


 44%|████▎     | 171/391 [00:29<00:36,  6.06it/s]

Epoch [40/40], Step [170/391], Loss: 0.6530


 46%|████▋     | 181/391 [00:30<00:34,  6.06it/s]

Epoch [40/40], Step [180/391], Loss: 0.6515


 49%|████▉     | 191/391 [00:32<00:32,  6.07it/s]

Epoch [40/40], Step [190/391], Loss: 0.6499


 51%|█████▏    | 201/391 [00:33<00:31,  6.06it/s]

Epoch [40/40], Step [200/391], Loss: 0.6424


 54%|█████▍    | 211/391 [00:35<00:29,  6.06it/s]

Epoch [40/40], Step [210/391], Loss: 0.6622


 57%|█████▋    | 221/391 [00:37<00:28,  6.06it/s]

Epoch [40/40], Step [220/391], Loss: 0.6554


 59%|█████▉    | 231/391 [00:38<00:26,  6.07it/s]

Epoch [40/40], Step [230/391], Loss: 0.6627


 62%|██████▏   | 241/391 [00:40<00:24,  6.06it/s]

Epoch [40/40], Step [240/391], Loss: 0.6719


 64%|██████▍   | 251/391 [00:42<00:23,  6.06it/s]

Epoch [40/40], Step [250/391], Loss: 0.6606


 67%|██████▋   | 261/391 [00:43<00:21,  6.07it/s]

Epoch [40/40], Step [260/391], Loss: 0.6559


 69%|██████▉   | 271/391 [00:45<00:19,  6.06it/s]

Epoch [40/40], Step [270/391], Loss: 0.6556


 72%|███████▏  | 281/391 [00:47<00:18,  6.06it/s]

Epoch [40/40], Step [280/391], Loss: 0.6446


 74%|███████▍  | 291/391 [00:48<00:16,  6.05it/s]

Epoch [40/40], Step [290/391], Loss: 0.6550


 77%|███████▋  | 301/391 [00:50<00:14,  6.06it/s]

Epoch [40/40], Step [300/391], Loss: 0.6601


 80%|███████▉  | 311/391 [00:52<00:13,  6.05it/s]

Epoch [40/40], Step [310/391], Loss: 0.6634


 82%|████████▏ | 321/391 [00:53<00:11,  6.06it/s]

Epoch [40/40], Step [320/391], Loss: 0.6533


 85%|████████▍ | 331/391 [00:55<00:09,  6.05it/s]

Epoch [40/40], Step [330/391], Loss: 0.6267


 87%|████████▋ | 341/391 [00:57<00:08,  6.06it/s]

Epoch [40/40], Step [340/391], Loss: 0.6536


 90%|████████▉ | 351/391 [00:58<00:06,  6.06it/s]

Epoch [40/40], Step [350/391], Loss: 0.6545


 92%|█████████▏| 361/391 [01:00<00:04,  6.04it/s]

Epoch [40/40], Step [360/391], Loss: 0.6496


 95%|█████████▍| 371/391 [01:02<00:03,  6.05it/s]

Epoch [40/40], Step [370/391], Loss: 0.6761


 97%|█████████▋| 381/391 [01:03<00:01,  6.06it/s]

Epoch [40/40], Step [380/391], Loss: 0.6505


100%|██████████| 391/391 [01:05<00:00,  5.98it/s]

Epoch [40/40], Step [390/391], Loss: 0.6646





Test Accuracy of the student model on the test images: 63.42 %
Saved best model to interim_ta3.pth
Best Accuracy: 63.42 %
Final
Adjusted learning rate: 0.1


  3%|▎         | 11/391 [00:02<00:59,  6.37it/s]

Epoch [1/40], Step [10/391], Loss: 6.7884


  5%|▌         | 21/391 [00:04<00:54,  6.73it/s]

Epoch [1/40], Step [20/391], Loss: 6.5052


  8%|▊         | 31/391 [00:05<00:53,  6.75it/s]

Epoch [1/40], Step [30/391], Loss: 6.2861


 10%|█         | 41/391 [00:06<00:51,  6.74it/s]

Epoch [1/40], Step [40/391], Loss: 6.2800


 13%|█▎        | 51/391 [00:08<00:50,  6.75it/s]

Epoch [1/40], Step [50/391], Loss: 5.8833


 16%|█▌        | 61/391 [00:09<00:48,  6.74it/s]

Epoch [1/40], Step [60/391], Loss: 5.5238


 18%|█▊        | 71/391 [00:11<00:47,  6.74it/s]

Epoch [1/40], Step [70/391], Loss: 5.5053


 21%|██        | 81/391 [00:12<00:46,  6.72it/s]

Epoch [1/40], Step [80/391], Loss: 5.2306


 23%|██▎       | 91/391 [00:14<00:44,  6.73it/s]

Epoch [1/40], Step [90/391], Loss: 5.1952


 26%|██▌       | 101/391 [00:15<00:42,  6.75it/s]

Epoch [1/40], Step [100/391], Loss: 5.1866


 28%|██▊       | 111/391 [00:17<00:41,  6.73it/s]

Epoch [1/40], Step [110/391], Loss: 5.1553


 31%|███       | 121/391 [00:18<00:40,  6.74it/s]

Epoch [1/40], Step [120/391], Loss: 5.0413


 34%|███▎      | 131/391 [00:20<00:38,  6.74it/s]

Epoch [1/40], Step [130/391], Loss: 4.9459


 36%|███▌      | 141/391 [00:21<00:37,  6.73it/s]

Epoch [1/40], Step [140/391], Loss: 4.9501


 39%|███▊      | 151/391 [00:23<00:35,  6.74it/s]

Epoch [1/40], Step [150/391], Loss: 4.8874


 41%|████      | 161/391 [00:24<00:34,  6.73it/s]

Epoch [1/40], Step [160/391], Loss: 4.8332


 44%|████▎     | 171/391 [00:26<00:32,  6.74it/s]

Epoch [1/40], Step [170/391], Loss: 4.8741


 46%|████▋     | 181/391 [00:27<00:31,  6.76it/s]

Epoch [1/40], Step [180/391], Loss: 4.6936


 49%|████▉     | 191/391 [00:29<00:29,  6.76it/s]

Epoch [1/40], Step [190/391], Loss: 4.6596


 51%|█████▏    | 201/391 [00:30<00:28,  6.75it/s]

Epoch [1/40], Step [200/391], Loss: 4.7769


 54%|█████▍    | 211/391 [00:32<00:26,  6.71it/s]

Epoch [1/40], Step [210/391], Loss: 4.7324


 57%|█████▋    | 221/391 [00:33<00:25,  6.75it/s]

Epoch [1/40], Step [220/391], Loss: 4.6769


 59%|█████▉    | 231/391 [00:35<00:23,  6.73it/s]

Epoch [1/40], Step [230/391], Loss: 4.6617


 62%|██████▏   | 241/391 [00:36<00:22,  6.73it/s]

Epoch [1/40], Step [240/391], Loss: 4.6597


 64%|██████▍   | 251/391 [00:38<00:20,  6.71it/s]

Epoch [1/40], Step [250/391], Loss: 4.5734


 67%|██████▋   | 261/391 [00:39<00:19,  6.74it/s]

Epoch [1/40], Step [260/391], Loss: 4.5046


 69%|██████▉   | 271/391 [00:41<00:17,  6.72it/s]

Epoch [1/40], Step [270/391], Loss: 4.5446


 72%|███████▏  | 281/391 [00:42<00:16,  6.75it/s]

Epoch [1/40], Step [280/391], Loss: 4.3925


 74%|███████▍  | 291/391 [00:44<00:14,  6.74it/s]

Epoch [1/40], Step [290/391], Loss: 4.5073


 77%|███████▋  | 301/391 [00:45<00:13,  6.75it/s]

Epoch [1/40], Step [300/391], Loss: 4.6120


 80%|███████▉  | 311/391 [00:47<00:11,  6.74it/s]

Epoch [1/40], Step [310/391], Loss: 4.5031


 82%|████████▏ | 321/391 [00:48<00:10,  6.73it/s]

Epoch [1/40], Step [320/391], Loss: 4.4551


 85%|████████▍ | 331/391 [00:49<00:08,  6.72it/s]

Epoch [1/40], Step [330/391], Loss: 4.4064


 87%|████████▋ | 341/391 [00:51<00:07,  6.75it/s]

Epoch [1/40], Step [340/391], Loss: 4.6266


 90%|████████▉ | 351/391 [00:52<00:05,  6.74it/s]

Epoch [1/40], Step [350/391], Loss: 4.4253


 92%|█████████▏| 361/391 [00:54<00:04,  6.75it/s]

Epoch [1/40], Step [360/391], Loss: 4.3589


 95%|█████████▍| 371/391 [00:55<00:02,  6.73it/s]

Epoch [1/40], Step [370/391], Loss: 4.3575


 97%|█████████▋| 381/391 [00:57<00:01,  6.74it/s]

Epoch [1/40], Step [380/391], Loss: 4.4063


100%|██████████| 391/391 [00:58<00:00,  6.64it/s]

Epoch [1/40], Step [390/391], Loss: 4.4410





Test Accuracy of the student model on the test images: 5.63 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.23it/s]

Epoch [2/40], Step [10/391], Loss: 5.8061


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [2/40], Step [20/391], Loss: 5.4140


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [2/40], Step [30/391], Loss: 5.2154


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [2/40], Step [40/391], Loss: 5.3246


 13%|█▎        | 51/391 [00:08<00:52,  6.50it/s]

Epoch [2/40], Step [50/391], Loss: 5.0526


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [2/40], Step [60/391], Loss: 4.7217


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [2/40], Step [70/391], Loss: 4.7581


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [2/40], Step [80/391], Loss: 4.6918


 23%|██▎       | 91/391 [00:14<00:45,  6.53it/s]

Epoch [2/40], Step [90/391], Loss: 4.7350


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [2/40], Step [100/391], Loss: 4.4692


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [2/40], Step [110/391], Loss: 4.4757


 31%|███       | 121/391 [00:19<00:41,  6.49it/s]

Epoch [2/40], Step [120/391], Loss: 4.4404


 34%|███▎      | 131/391 [00:20<00:39,  6.51it/s]

Epoch [2/40], Step [130/391], Loss: 4.2942


 36%|███▌      | 141/391 [00:22<00:38,  6.50it/s]

Epoch [2/40], Step [140/391], Loss: 4.3756


 39%|███▊      | 151/391 [00:23<00:36,  6.52it/s]

Epoch [2/40], Step [150/391], Loss: 4.2517


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [2/40], Step [160/391], Loss: 4.2797


 44%|████▎     | 171/391 [00:27<00:33,  6.53it/s]

Epoch [2/40], Step [170/391], Loss: 4.1423


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [2/40], Step [180/391], Loss: 4.1644


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [2/40], Step [190/391], Loss: 4.0715


 51%|█████▏    | 201/391 [00:31<00:29,  6.50it/s]

Epoch [2/40], Step [200/391], Loss: 3.9261


 54%|█████▍    | 211/391 [00:33<00:27,  6.50it/s]

Epoch [2/40], Step [210/391], Loss: 3.9894


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [2/40], Step [220/391], Loss: 3.9103


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [2/40], Step [230/391], Loss: 3.9125


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [2/40], Step [240/391], Loss: 3.8281


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [2/40], Step [250/391], Loss: 3.7955


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [2/40], Step [260/391], Loss: 3.8073


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [2/40], Step [270/391], Loss: 3.7177


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [2/40], Step [280/391], Loss: 3.8540


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [2/40], Step [290/391], Loss: 3.6947


 77%|███████▋  | 301/391 [00:46<00:13,  6.52it/s]

Epoch [2/40], Step [300/391], Loss: 3.5604


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [2/40], Step [310/391], Loss: 3.4798


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [2/40], Step [320/391], Loss: 3.5265


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [2/40], Step [330/391], Loss: 3.5556


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [2/40], Step [340/391], Loss: 3.4982


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [2/40], Step [350/391], Loss: 3.4009


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [2/40], Step [360/391], Loss: 3.3282


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [2/40], Step [370/391], Loss: 3.3255


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [2/40], Step [380/391], Loss: 3.2720


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [2/40], Step [390/391], Loss: 3.3543





Test Accuracy of the student model on the test images: 25.18 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.16it/s]

Epoch [3/40], Step [10/391], Loss: 3.2784


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [3/40], Step [20/391], Loss: 3.2164


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [3/40], Step [30/391], Loss: 3.2772


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [3/40], Step [40/391], Loss: 3.2185


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [3/40], Step [50/391], Loss: 3.1906


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [3/40], Step [60/391], Loss: 3.1709


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [3/40], Step [70/391], Loss: 3.0766


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [3/40], Step [80/391], Loss: 3.0526


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [3/40], Step [90/391], Loss: 3.0796


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [3/40], Step [100/391], Loss: 3.1479


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [3/40], Step [110/391], Loss: 3.0222


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [3/40], Step [120/391], Loss: 2.9388


 34%|███▎      | 131/391 [00:21<00:39,  6.53it/s]

Epoch [3/40], Step [130/391], Loss: 2.9219


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [3/40], Step [140/391], Loss: 2.9503


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [3/40], Step [150/391], Loss: 2.8076


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [3/40], Step [160/391], Loss: 2.8310


 44%|████▎     | 171/391 [00:27<00:33,  6.49it/s]

Epoch [3/40], Step [170/391], Loss: 2.9005


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [3/40], Step [180/391], Loss: 2.8026


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [3/40], Step [190/391], Loss: 2.7662


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [3/40], Step [200/391], Loss: 2.8217


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [3/40], Step [210/391], Loss: 2.8397


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [3/40], Step [220/391], Loss: 2.7299


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [3/40], Step [230/391], Loss: 2.6721


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [3/40], Step [240/391], Loss: 2.7646


 64%|██████▍   | 251/391 [00:39<00:21,  6.50it/s]

Epoch [3/40], Step [250/391], Loss: 2.7144


 67%|██████▋   | 261/391 [00:41<00:19,  6.51it/s]

Epoch [3/40], Step [260/391], Loss: 2.6614


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [3/40], Step [270/391], Loss: 2.7123


 72%|███████▏  | 281/391 [00:44<00:16,  6.51it/s]

Epoch [3/40], Step [280/391], Loss: 2.6118


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [3/40], Step [290/391], Loss: 2.6560


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [3/40], Step [300/391], Loss: 2.6283


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [3/40], Step [310/391], Loss: 2.6503


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [3/40], Step [320/391], Loss: 2.5532


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [3/40], Step [330/391], Loss: 2.6316


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [3/40], Step [340/391], Loss: 2.5282


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [3/40], Step [350/391], Loss: 2.5213


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [3/40], Step [360/391], Loss: 2.5139


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [3/40], Step [370/391], Loss: 2.5157


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [3/40], Step [380/391], Loss: 2.5579


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [3/40], Step [390/391], Loss: 2.6173





Test Accuracy of the student model on the test images: 23.11 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.17it/s]

Epoch [4/40], Step [10/391], Loss: 2.4476


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [4/40], Step [20/391], Loss: 2.4616


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [4/40], Step [30/391], Loss: 2.3747


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [4/40], Step [40/391], Loss: 2.3654


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [4/40], Step [50/391], Loss: 2.3981


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [4/40], Step [60/391], Loss: 2.3727


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [4/40], Step [70/391], Loss: 2.4175


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [4/40], Step [80/391], Loss: 2.4131


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [4/40], Step [90/391], Loss: 2.3670


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [4/40], Step [100/391], Loss: 2.3327


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [4/40], Step [110/391], Loss: 2.3731


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [4/40], Step [120/391], Loss: 2.3101


 34%|███▎      | 131/391 [00:20<00:39,  6.50it/s]

Epoch [4/40], Step [130/391], Loss: 2.3438


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [4/40], Step [140/391], Loss: 2.3190


 39%|███▊      | 151/391 [00:23<00:36,  6.51it/s]

Epoch [4/40], Step [150/391], Loss: 2.3233


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [4/40], Step [160/391], Loss: 2.2704


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [4/40], Step [170/391], Loss: 2.3814


 46%|████▋     | 181/391 [00:28<00:32,  6.49it/s]

Epoch [4/40], Step [180/391], Loss: 2.2785


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [4/40], Step [190/391], Loss: 2.2330


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [4/40], Step [200/391], Loss: 2.2061


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [4/40], Step [210/391], Loss: 2.2117


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [4/40], Step [220/391], Loss: 2.2186


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [4/40], Step [230/391], Loss: 2.2342


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [4/40], Step [240/391], Loss: 2.2042


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [4/40], Step [250/391], Loss: 2.2683


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [4/40], Step [260/391], Loss: 2.2080


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [4/40], Step [270/391], Loss: 2.1976


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [4/40], Step [280/391], Loss: 2.1675


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [4/40], Step [290/391], Loss: 2.2037


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [4/40], Step [300/391], Loss: 2.2030


 80%|███████▉  | 311/391 [00:48<00:12,  6.51it/s]

Epoch [4/40], Step [310/391], Loss: 2.1368


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [4/40], Step [320/391], Loss: 2.1848


 85%|████████▍ | 331/391 [00:51<00:09,  6.50it/s]

Epoch [4/40], Step [330/391], Loss: 2.0933


 87%|████████▋ | 341/391 [00:53<00:07,  6.50it/s]

Epoch [4/40], Step [340/391], Loss: 2.1722


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [4/40], Step [350/391], Loss: 2.1215


 92%|█████████▏| 361/391 [00:56<00:04,  6.50it/s]

Epoch [4/40], Step [360/391], Loss: 2.1073


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [4/40], Step [370/391], Loss: 2.1234


 97%|█████████▋| 381/391 [00:59<00:01,  6.54it/s]

Epoch [4/40], Step [380/391], Loss: 2.0831


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [4/40], Step [390/391], Loss: 2.0637





Test Accuracy of the student model on the test images: 37.33 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:02,  6.12it/s]

Epoch [5/40], Step [10/391], Loss: 2.0067


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [5/40], Step [20/391], Loss: 2.0372


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [5/40], Step [30/391], Loss: 2.0335


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [5/40], Step [40/391], Loss: 1.9796


 13%|█▎        | 51/391 [00:08<00:51,  6.54it/s]

Epoch [5/40], Step [50/391], Loss: 1.9761


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [5/40], Step [60/391], Loss: 2.0614


 18%|█▊        | 71/391 [00:11<00:48,  6.54it/s]

Epoch [5/40], Step [70/391], Loss: 1.9740


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [5/40], Step [80/391], Loss: 1.9629


 23%|██▎       | 91/391 [00:15<00:46,  6.50it/s]

Epoch [5/40], Step [90/391], Loss: 1.9487


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [5/40], Step [100/391], Loss: 1.9836


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [5/40], Step [110/391], Loss: 2.0303


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [5/40], Step [120/391], Loss: 1.9920


 34%|███▎      | 131/391 [00:21<00:39,  6.51it/s]

Epoch [5/40], Step [130/391], Loss: 2.0408


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [5/40], Step [140/391], Loss: 1.9957


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [5/40], Step [150/391], Loss: 1.9549


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [5/40], Step [160/391], Loss: 2.0359


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [5/40], Step [170/391], Loss: 2.0309


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [5/40], Step [180/391], Loss: 1.9848


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [5/40], Step [190/391], Loss: 1.9491


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [5/40], Step [200/391], Loss: 1.9464


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [5/40], Step [210/391], Loss: 1.9490


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [5/40], Step [220/391], Loss: 1.9633


 59%|█████▉    | 231/391 [00:36<00:24,  6.50it/s]

Epoch [5/40], Step [230/391], Loss: 1.9394


 62%|██████▏   | 241/391 [00:38<00:23,  6.51it/s]

Epoch [5/40], Step [240/391], Loss: 1.9486


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [5/40], Step [250/391], Loss: 1.9269


 67%|██████▋   | 261/391 [00:41<00:19,  6.53it/s]

Epoch [5/40], Step [260/391], Loss: 1.9244


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [5/40], Step [270/391], Loss: 1.9431


 72%|███████▏  | 281/391 [00:44<00:16,  6.53it/s]

Epoch [5/40], Step [280/391], Loss: 1.9352


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [5/40], Step [290/391], Loss: 1.8315


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [5/40], Step [300/391], Loss: 1.9606


 80%|███████▉  | 311/391 [00:48<00:12,  6.51it/s]

Epoch [5/40], Step [310/391], Loss: 1.9971


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [5/40], Step [320/391], Loss: 1.9240


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [5/40], Step [330/391], Loss: 2.0020


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [5/40], Step [340/391], Loss: 2.0352


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [5/40], Step [350/391], Loss: 1.9578


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [5/40], Step [360/391], Loss: 1.9376


 95%|█████████▍| 371/391 [00:57<00:03,  6.53it/s]

Epoch [5/40], Step [370/391], Loss: 1.9643


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [5/40], Step [380/391], Loss: 1.9506


100%|██████████| 391/391 [01:01<00:00,  6.41it/s]

Epoch [5/40], Step [390/391], Loss: 1.9344





Test Accuracy of the student model on the test images: 37.77 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.16it/s]

Epoch [6/40], Step [10/391], Loss: 1.8812


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [6/40], Step [20/391], Loss: 1.8457


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [6/40], Step [30/391], Loss: 1.8738


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [6/40], Step [40/391], Loss: 1.8068


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [6/40], Step [50/391], Loss: 1.7814


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [6/40], Step [60/391], Loss: 1.8039


 18%|█▊        | 71/391 [00:11<00:48,  6.54it/s]

Epoch [6/40], Step [70/391], Loss: 1.7931


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [6/40], Step [80/391], Loss: 1.8573


 23%|██▎       | 91/391 [00:14<00:45,  6.53it/s]

Epoch [6/40], Step [90/391], Loss: 1.8189


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [6/40], Step [100/391], Loss: 1.8163


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [6/40], Step [110/391], Loss: 1.8062


 31%|███       | 121/391 [00:19<00:41,  6.50it/s]

Epoch [6/40], Step [120/391], Loss: 1.7973


 34%|███▎      | 131/391 [00:21<00:39,  6.53it/s]

Epoch [6/40], Step [130/391], Loss: 1.7460


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [6/40], Step [140/391], Loss: 1.7360


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [6/40], Step [150/391], Loss: 1.8032


 41%|████      | 161/391 [00:25<00:35,  6.53it/s]

Epoch [6/40], Step [160/391], Loss: 1.7523


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [6/40], Step [170/391], Loss: 1.7488


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [6/40], Step [180/391], Loss: 1.7909


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [6/40], Step [190/391], Loss: 1.7479


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [6/40], Step [200/391], Loss: 1.7606


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [6/40], Step [210/391], Loss: 1.7442


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [6/40], Step [220/391], Loss: 1.7698


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [6/40], Step [230/391], Loss: 1.7462


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [6/40], Step [240/391], Loss: 1.7887


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [6/40], Step [250/391], Loss: 1.8179


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [6/40], Step [260/391], Loss: 1.8269


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [6/40], Step [270/391], Loss: 1.7514


 72%|███████▏  | 281/391 [00:44<00:16,  6.52it/s]

Epoch [6/40], Step [280/391], Loss: 1.7998


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [6/40], Step [290/391], Loss: 1.7614


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [6/40], Step [300/391], Loss: 1.8032


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [6/40], Step [310/391], Loss: 1.8741


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [6/40], Step [320/391], Loss: 1.8097


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [6/40], Step [330/391], Loss: 1.8201


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [6/40], Step [340/391], Loss: 1.7672


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [6/40], Step [350/391], Loss: 1.7330


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [6/40], Step [360/391], Loss: 1.7839


 95%|█████████▍| 371/391 [00:57<00:03,  6.53it/s]

Epoch [6/40], Step [370/391], Loss: 1.7723


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [6/40], Step [380/391], Loss: 1.7290


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [6/40], Step [390/391], Loss: 1.7581





Test Accuracy of the student model on the test images: 37.40 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.22it/s]

Epoch [7/40], Step [10/391], Loss: 1.6963


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [7/40], Step [20/391], Loss: 1.6157


  8%|▊         | 31/391 [00:05<00:54,  6.55it/s]

Epoch [7/40], Step [30/391], Loss: 1.6631


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [7/40], Step [40/391], Loss: 1.6311


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [7/40], Step [50/391], Loss: 1.6602


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [7/40], Step [60/391], Loss: 1.6686


 18%|█▊        | 71/391 [00:11<00:49,  6.50it/s]

Epoch [7/40], Step [70/391], Loss: 1.6621


 21%|██        | 81/391 [00:13<00:47,  6.50it/s]

Epoch [7/40], Step [80/391], Loss: 1.6782


 23%|██▎       | 91/391 [00:14<00:46,  6.50it/s]

Epoch [7/40], Step [90/391], Loss: 1.6270


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [7/40], Step [100/391], Loss: 1.5975


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [7/40], Step [110/391], Loss: 1.6776


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [7/40], Step [120/391], Loss: 1.7081


 34%|███▎      | 131/391 [00:20<00:39,  6.53it/s]

Epoch [7/40], Step [130/391], Loss: 1.6786


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [7/40], Step [140/391], Loss: 1.6954


 39%|███▊      | 151/391 [00:23<00:36,  6.52it/s]

Epoch [7/40], Step [150/391], Loss: 1.6429


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [7/40], Step [160/391], Loss: 1.6710


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [7/40], Step [170/391], Loss: 1.7693


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [7/40], Step [180/391], Loss: 1.6962


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [7/40], Step [190/391], Loss: 1.6854


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [7/40], Step [200/391], Loss: 1.6661


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [7/40], Step [210/391], Loss: 1.6217


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [7/40], Step [220/391], Loss: 1.6312


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [7/40], Step [230/391], Loss: 1.6417


 62%|██████▏   | 241/391 [00:37<00:23,  6.46it/s]

Epoch [7/40], Step [240/391], Loss: 1.6616


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [7/40], Step [250/391], Loss: 1.6274


 67%|██████▋   | 261/391 [00:40<00:19,  6.53it/s]

Epoch [7/40], Step [260/391], Loss: 1.6404


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [7/40], Step [270/391], Loss: 1.6360


 72%|███████▏  | 281/391 [00:43<00:16,  6.52it/s]

Epoch [7/40], Step [280/391], Loss: 1.6981


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [7/40], Step [290/391], Loss: 1.7125


 77%|███████▋  | 301/391 [00:46<00:13,  6.54it/s]

Epoch [7/40], Step [300/391], Loss: 1.7032


 80%|███████▉  | 311/391 [00:48<00:12,  6.50it/s]

Epoch [7/40], Step [310/391], Loss: 1.7206


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [7/40], Step [320/391], Loss: 1.7426


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [7/40], Step [330/391], Loss: 1.6734


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [7/40], Step [340/391], Loss: 1.6952


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [7/40], Step [350/391], Loss: 1.6858


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [7/40], Step [360/391], Loss: 1.6175


 95%|█████████▍| 371/391 [00:57<00:03,  6.53it/s]

Epoch [7/40], Step [370/391], Loss: 1.6858


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [7/40], Step [380/391], Loss: 1.7528


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [7/40], Step [390/391], Loss: 1.7228





Test Accuracy of the student model on the test images: 40.24 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.21it/s]

Epoch [8/40], Step [10/391], Loss: 1.5992


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [8/40], Step [20/391], Loss: 1.5814


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [8/40], Step [30/391], Loss: 1.5920


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [8/40], Step [40/391], Loss: 1.5804


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [8/40], Step [50/391], Loss: 1.5555


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [8/40], Step [60/391], Loss: 1.5925


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [8/40], Step [70/391], Loss: 1.6123


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [8/40], Step [80/391], Loss: 1.5959


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [8/40], Step [90/391], Loss: 1.5507


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [8/40], Step [100/391], Loss: 1.5748


 28%|██▊       | 111/391 [00:17<00:43,  6.51it/s]

Epoch [8/40], Step [110/391], Loss: 1.5540


 31%|███       | 121/391 [00:19<00:41,  6.50it/s]

Epoch [8/40], Step [120/391], Loss: 1.6279


 34%|███▎      | 131/391 [00:20<00:39,  6.53it/s]

Epoch [8/40], Step [130/391], Loss: 1.6018


 36%|███▌      | 141/391 [00:22<00:38,  6.50it/s]

Epoch [8/40], Step [140/391], Loss: 1.5935


 39%|███▊      | 151/391 [00:24<00:36,  6.53it/s]

Epoch [8/40], Step [150/391], Loss: 1.5780


 41%|████      | 161/391 [00:25<00:35,  6.53it/s]

Epoch [8/40], Step [160/391], Loss: 1.5889


 44%|████▎     | 171/391 [00:27<00:33,  6.53it/s]

Epoch [8/40], Step [170/391], Loss: 1.5573


 46%|████▋     | 181/391 [00:28<00:32,  6.54it/s]

Epoch [8/40], Step [180/391], Loss: 1.5484


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [8/40], Step [190/391], Loss: 1.5479


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [8/40], Step [200/391], Loss: 1.5754


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [8/40], Step [210/391], Loss: 1.5486


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [8/40], Step [220/391], Loss: 1.6247


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [8/40], Step [230/391], Loss: 1.5647


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [8/40], Step [240/391], Loss: 1.6075


 64%|██████▍   | 251/391 [00:39<00:21,  6.54it/s]

Epoch [8/40], Step [250/391], Loss: 1.5914


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [8/40], Step [260/391], Loss: 1.6148


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [8/40], Step [270/391], Loss: 1.6139


 72%|███████▏  | 281/391 [00:43<00:16,  6.53it/s]

Epoch [8/40], Step [280/391], Loss: 1.6085


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [8/40], Step [290/391], Loss: 1.5616


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [8/40], Step [300/391], Loss: 1.5905


 80%|███████▉  | 311/391 [00:48<00:12,  6.51it/s]

Epoch [8/40], Step [310/391], Loss: 1.5843


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [8/40], Step [320/391], Loss: 1.6130


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [8/40], Step [330/391], Loss: 1.6753


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [8/40], Step [340/391], Loss: 1.6470


 90%|████████▉ | 351/391 [00:54<00:06,  6.50it/s]

Epoch [8/40], Step [350/391], Loss: 1.6363


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [8/40], Step [360/391], Loss: 1.5676


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [8/40], Step [370/391], Loss: 1.6137


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [8/40], Step [380/391], Loss: 1.5509


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [8/40], Step [390/391], Loss: 1.5887





Test Accuracy of the student model on the test images: 43.86 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.20it/s]

Epoch [9/40], Step [10/391], Loss: 1.5662


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [9/40], Step [20/391], Loss: 1.5163


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [9/40], Step [30/391], Loss: 1.5434


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [9/40], Step [40/391], Loss: 1.5351


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [9/40], Step [50/391], Loss: 1.5118


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [9/40], Step [60/391], Loss: 1.4517


 18%|█▊        | 71/391 [00:11<00:49,  6.50it/s]

Epoch [9/40], Step [70/391], Loss: 1.4764


 21%|██        | 81/391 [00:13<00:47,  6.50it/s]

Epoch [9/40], Step [80/391], Loss: 1.5300


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [9/40], Step [90/391], Loss: 1.5236


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [9/40], Step [100/391], Loss: 1.5100


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [9/40], Step [110/391], Loss: 1.5017


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [9/40], Step [120/391], Loss: 1.5295


 34%|███▎      | 131/391 [00:20<00:39,  6.52it/s]

Epoch [9/40], Step [130/391], Loss: 1.5146


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [9/40], Step [140/391], Loss: 1.5175


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [9/40], Step [150/391], Loss: 1.5633


 41%|████      | 161/391 [00:25<00:35,  6.50it/s]

Epoch [9/40], Step [160/391], Loss: 1.5800


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [9/40], Step [170/391], Loss: 1.5441


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [9/40], Step [180/391], Loss: 1.5086


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [9/40], Step [190/391], Loss: 1.5088


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [9/40], Step [200/391], Loss: 1.5250


 54%|█████▍    | 211/391 [00:33<00:27,  6.54it/s]

Epoch [9/40], Step [210/391], Loss: 1.5858


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [9/40], Step [220/391], Loss: 1.5855


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [9/40], Step [230/391], Loss: 1.5566


 62%|██████▏   | 241/391 [00:37<00:22,  6.54it/s]

Epoch [9/40], Step [240/391], Loss: 1.5368


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [9/40], Step [250/391], Loss: 1.5444


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [9/40], Step [260/391], Loss: 1.5473


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [9/40], Step [270/391], Loss: 1.5295


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [9/40], Step [280/391], Loss: 1.5440


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [9/40], Step [290/391], Loss: 1.5588


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [9/40], Step [300/391], Loss: 1.5575


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [9/40], Step [310/391], Loss: 1.4852


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [9/40], Step [320/391], Loss: 1.5485


 85%|████████▍ | 331/391 [00:51<00:09,  6.50it/s]

Epoch [9/40], Step [330/391], Loss: 1.5369


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [9/40], Step [340/391], Loss: 1.5106


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [9/40], Step [350/391], Loss: 1.4980


 92%|█████████▏| 361/391 [00:56<00:04,  6.54it/s]

Epoch [9/40], Step [360/391], Loss: 1.5145


 95%|█████████▍| 371/391 [00:57<00:03,  6.54it/s]

Epoch [9/40], Step [370/391], Loss: 1.4882


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [9/40], Step [380/391], Loss: 1.5631


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [9/40], Step [390/391], Loss: 1.5378





Test Accuracy of the student model on the test images: 39.24 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.20it/s]

Epoch [10/40], Step [10/391], Loss: 1.4924


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [10/40], Step [20/391], Loss: 1.4854


  8%|▊         | 31/391 [00:05<00:55,  6.51it/s]

Epoch [10/40], Step [30/391], Loss: 1.4941


 10%|█         | 41/391 [00:07<00:53,  6.50it/s]

Epoch [10/40], Step [40/391], Loss: 1.4842


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [10/40], Step [50/391], Loss: 1.4874


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [10/40], Step [60/391], Loss: 1.4993


 18%|█▊        | 71/391 [00:11<00:48,  6.53it/s]

Epoch [10/40], Step [70/391], Loss: 1.4413


 21%|██        | 81/391 [00:13<00:49,  6.22it/s]

Epoch [10/40], Step [80/391], Loss: 1.4643


 23%|██▎       | 91/391 [00:14<00:46,  6.50it/s]

Epoch [10/40], Step [90/391], Loss: 1.4167


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [10/40], Step [100/391], Loss: 1.4538


 28%|██▊       | 111/391 [00:17<00:43,  6.51it/s]

Epoch [10/40], Step [110/391], Loss: 1.4540


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [10/40], Step [120/391], Loss: 1.4745


 34%|███▎      | 131/391 [00:21<00:39,  6.52it/s]

Epoch [10/40], Step [130/391], Loss: 1.4683


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [10/40], Step [140/391], Loss: 1.4790


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [10/40], Step [150/391], Loss: 1.4346


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [10/40], Step [160/391], Loss: 1.4634


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [10/40], Step [170/391], Loss: 1.4518


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [10/40], Step [180/391], Loss: 1.5050


 49%|████▉     | 191/391 [00:30<00:30,  6.50it/s]

Epoch [10/40], Step [190/391], Loss: 1.4737


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [10/40], Step [200/391], Loss: 1.4435


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [10/40], Step [210/391], Loss: 1.4975


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [10/40], Step [220/391], Loss: 1.5190


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [10/40], Step [230/391], Loss: 1.4835


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [10/40], Step [240/391], Loss: 1.4256


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [10/40], Step [250/391], Loss: 1.4809


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [10/40], Step [260/391], Loss: 1.4609


 69%|██████▉   | 271/391 [00:42<00:18,  6.50it/s]

Epoch [10/40], Step [270/391], Loss: 1.4874


 72%|███████▏  | 281/391 [00:44<00:16,  6.52it/s]

Epoch [10/40], Step [280/391], Loss: 1.5305


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [10/40], Step [290/391], Loss: 1.4492


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [10/40], Step [300/391], Loss: 1.5060


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [10/40], Step [310/391], Loss: 1.5176


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [10/40], Step [320/391], Loss: 1.5182


 85%|████████▍ | 331/391 [00:51<00:09,  6.54it/s]

Epoch [10/40], Step [330/391], Loss: 1.5180


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [10/40], Step [340/391], Loss: 1.5025


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [10/40], Step [350/391], Loss: 1.4925


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [10/40], Step [360/391], Loss: 1.5161


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [10/40], Step [370/391], Loss: 1.4328


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [10/40], Step [380/391], Loss: 1.4793


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [10/40], Step [390/391], Loss: 1.4734





Test Accuracy of the student model on the test images: 45.08 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.18it/s]

Epoch [11/40], Step [10/391], Loss: 1.4245


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [11/40], Step [20/391], Loss: 1.3944


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [11/40], Step [30/391], Loss: 1.4301


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [11/40], Step [40/391], Loss: 1.4436


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [11/40], Step [50/391], Loss: 1.4215


 16%|█▌        | 61/391 [00:10<00:50,  6.49it/s]

Epoch [11/40], Step [60/391], Loss: 1.4316


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [11/40], Step [70/391], Loss: 1.4282


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [11/40], Step [80/391], Loss: 1.4062


 23%|██▎       | 91/391 [00:14<00:45,  6.53it/s]

Epoch [11/40], Step [90/391], Loss: 1.4187


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [11/40], Step [100/391], Loss: 1.3977


 28%|██▊       | 111/391 [00:17<00:43,  6.51it/s]

Epoch [11/40], Step [110/391], Loss: 1.4409


 31%|███       | 121/391 [00:19<00:41,  6.49it/s]

Epoch [11/40], Step [120/391], Loss: 1.4294


 34%|███▎      | 131/391 [00:20<00:39,  6.51it/s]

Epoch [11/40], Step [130/391], Loss: 1.3837


 36%|███▌      | 141/391 [00:22<00:38,  6.49it/s]

Epoch [11/40], Step [140/391], Loss: 1.3854


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [11/40], Step [150/391], Loss: 1.3869


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [11/40], Step [160/391], Loss: 1.4388


 44%|████▎     | 171/391 [00:27<00:33,  6.53it/s]

Epoch [11/40], Step [170/391], Loss: 1.4472


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [11/40], Step [180/391], Loss: 1.3906


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [11/40], Step [190/391], Loss: 1.4086


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [11/40], Step [200/391], Loss: 1.4142


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [11/40], Step [210/391], Loss: 1.4275


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [11/40], Step [220/391], Loss: 1.4169


 59%|█████▉    | 231/391 [00:36<00:24,  6.50it/s]

Epoch [11/40], Step [230/391], Loss: 1.4269


 62%|██████▏   | 241/391 [00:37<00:22,  6.52it/s]

Epoch [11/40], Step [240/391], Loss: 1.4230


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [11/40], Step [250/391], Loss: 1.4347


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [11/40], Step [260/391], Loss: 1.4932


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [11/40], Step [270/391], Loss: 1.4417


 72%|███████▏  | 281/391 [00:43<00:16,  6.53it/s]

Epoch [11/40], Step [280/391], Loss: 1.4548


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [11/40], Step [290/391], Loss: 1.4482


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [11/40], Step [300/391], Loss: 1.4247


 80%|███████▉  | 311/391 [00:48<00:12,  6.51it/s]

Epoch [11/40], Step [310/391], Loss: 1.4591


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [11/40], Step [320/391], Loss: 1.4036


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [11/40], Step [330/391], Loss: 1.4513


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [11/40], Step [340/391], Loss: 1.4615


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [11/40], Step [350/391], Loss: 1.4355


 92%|█████████▏| 361/391 [00:56<00:04,  6.50it/s]

Epoch [11/40], Step [360/391], Loss: 1.4802


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [11/40], Step [370/391], Loss: 1.4376


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [11/40], Step [380/391], Loss: 1.4653


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [11/40], Step [390/391], Loss: 1.4608





Test Accuracy of the student model on the test images: 44.96 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.20it/s]

Epoch [12/40], Step [10/391], Loss: 1.3785


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [12/40], Step [20/391], Loss: 1.3816


  8%|▊         | 31/391 [00:05<00:55,  6.51it/s]

Epoch [12/40], Step [30/391], Loss: 1.3620


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [12/40], Step [40/391], Loss: 1.3339


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [12/40], Step [50/391], Loss: 1.3664


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [12/40], Step [60/391], Loss: 1.3635


 18%|█▊        | 71/391 [00:11<00:49,  6.53it/s]

Epoch [12/40], Step [70/391], Loss: 1.3586


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [12/40], Step [80/391], Loss: 1.3759


 23%|██▎       | 91/391 [00:14<00:45,  6.54it/s]

Epoch [12/40], Step [90/391], Loss: 1.3720


 26%|██▌       | 101/391 [00:16<00:44,  6.54it/s]

Epoch [12/40], Step [100/391], Loss: 1.3497


 28%|██▊       | 111/391 [00:17<00:43,  6.51it/s]

Epoch [12/40], Step [110/391], Loss: 1.3645


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [12/40], Step [120/391], Loss: 1.3297


 34%|███▎      | 131/391 [00:21<00:39,  6.53it/s]

Epoch [12/40], Step [130/391], Loss: 1.3793


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [12/40], Step [140/391], Loss: 1.3588


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [12/40], Step [150/391], Loss: 1.4435


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [12/40], Step [160/391], Loss: 1.3639


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [12/40], Step [170/391], Loss: 1.4201


 46%|████▋     | 181/391 [00:28<00:32,  6.50it/s]

Epoch [12/40], Step [180/391], Loss: 1.3785


 49%|████▉     | 191/391 [00:30<00:30,  6.50it/s]

Epoch [12/40], Step [190/391], Loss: 1.3659


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [12/40], Step [200/391], Loss: 1.3925


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [12/40], Step [210/391], Loss: 1.3845


 57%|█████▋    | 221/391 [00:34<00:25,  6.54it/s]

Epoch [12/40], Step [220/391], Loss: 1.3910


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [12/40], Step [230/391], Loss: 1.3955


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [12/40], Step [240/391], Loss: 1.4226


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [12/40], Step [250/391], Loss: 1.3914


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [12/40], Step [260/391], Loss: 1.4030


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [12/40], Step [270/391], Loss: 1.4136


 72%|███████▏  | 281/391 [00:44<00:16,  6.52it/s]

Epoch [12/40], Step [280/391], Loss: 1.4384


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [12/40], Step [290/391], Loss: 1.4031


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [12/40], Step [300/391], Loss: 1.4320


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [12/40], Step [310/391], Loss: 1.4731


 82%|████████▏ | 321/391 [00:50<00:10,  6.50it/s]

Epoch [12/40], Step [320/391], Loss: 1.4590


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [12/40], Step [330/391], Loss: 1.4102


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [12/40], Step [340/391], Loss: 1.4427


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [12/40], Step [350/391], Loss: 1.4260


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [12/40], Step [360/391], Loss: 1.3857


 95%|█████████▍| 371/391 [00:57<00:03,  6.53it/s]

Epoch [12/40], Step [370/391], Loss: 1.4081


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [12/40], Step [380/391], Loss: 1.4157


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [12/40], Step [390/391], Loss: 1.4136





Test Accuracy of the student model on the test images: 46.63 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.14it/s]

Epoch [13/40], Step [10/391], Loss: 1.3742


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [13/40], Step [20/391], Loss: 1.3680


  8%|▊         | 31/391 [00:05<00:55,  6.51it/s]

Epoch [13/40], Step [30/391], Loss: 1.3432


 10%|█         | 41/391 [00:07<00:53,  6.50it/s]

Epoch [13/40], Step [40/391], Loss: 1.3520


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [13/40], Step [50/391], Loss: 1.3120


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [13/40], Step [60/391], Loss: 1.3899


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [13/40], Step [70/391], Loss: 1.3968


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [13/40], Step [80/391], Loss: 1.3802


 23%|██▎       | 91/391 [00:14<00:46,  6.51it/s]

Epoch [13/40], Step [90/391], Loss: 1.3566


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [13/40], Step [100/391], Loss: 1.3721


 28%|██▊       | 111/391 [00:18<00:42,  6.53it/s]

Epoch [13/40], Step [110/391], Loss: 1.3999


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [13/40], Step [120/391], Loss: 1.3733


 34%|███▎      | 131/391 [00:21<00:39,  6.52it/s]

Epoch [13/40], Step [130/391], Loss: 1.3797


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [13/40], Step [140/391], Loss: 1.3818


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [13/40], Step [150/391], Loss: 1.3555


 41%|████      | 161/391 [00:25<00:35,  6.54it/s]

Epoch [13/40], Step [160/391], Loss: 1.3806


 44%|████▎     | 171/391 [00:27<00:33,  6.50it/s]

Epoch [13/40], Step [170/391], Loss: 1.3455


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [13/40], Step [180/391], Loss: 1.3220


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [13/40], Step [190/391], Loss: 1.3343


 51%|█████▏    | 201/391 [00:31<00:29,  6.50it/s]

Epoch [13/40], Step [200/391], Loss: 1.3417


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [13/40], Step [210/391], Loss: 1.3416


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [13/40], Step [220/391], Loss: 1.3291


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [13/40], Step [230/391], Loss: 1.3390


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [13/40], Step [240/391], Loss: 1.3794


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [13/40], Step [250/391], Loss: 1.3629


 67%|██████▋   | 261/391 [00:41<00:19,  6.53it/s]

Epoch [13/40], Step [260/391], Loss: 1.3752


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [13/40], Step [270/391], Loss: 1.3589


 72%|███████▏  | 281/391 [00:44<00:16,  6.51it/s]

Epoch [13/40], Step [280/391], Loss: 1.3994


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [13/40], Step [290/391], Loss: 1.3994


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [13/40], Step [300/391], Loss: 1.4070


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [13/40], Step [310/391], Loss: 1.3593


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [13/40], Step [320/391], Loss: 1.3506


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [13/40], Step [330/391], Loss: 1.4218


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [13/40], Step [340/391], Loss: 1.3886


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [13/40], Step [350/391], Loss: 1.3781


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [13/40], Step [360/391], Loss: 1.4216


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [13/40], Step [370/391], Loss: 1.3908


 97%|█████████▋| 381/391 [00:59<00:01,  6.50it/s]

Epoch [13/40], Step [380/391], Loss: 1.4188


100%|██████████| 391/391 [01:00<00:00,  6.41it/s]

Epoch [13/40], Step [390/391], Loss: 1.4023





Test Accuracy of the student model on the test images: 46.29 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.22it/s]

Epoch [14/40], Step [10/391], Loss: 1.3219


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [14/40], Step [20/391], Loss: 1.3048


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [14/40], Step [30/391], Loss: 1.3250


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [14/40], Step [40/391], Loss: 1.2758


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [14/40], Step [50/391], Loss: 1.3035


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [14/40], Step [60/391], Loss: 1.2999


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [14/40], Step [70/391], Loss: 1.3147


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [14/40], Step [80/391], Loss: 1.3131


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [14/40], Step [90/391], Loss: 1.2903


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [14/40], Step [100/391], Loss: 1.2861


 28%|██▊       | 111/391 [00:17<00:43,  6.51it/s]

Epoch [14/40], Step [110/391], Loss: 1.2946


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [14/40], Step [120/391], Loss: 1.2974


 34%|███▎      | 131/391 [00:20<00:39,  6.52it/s]

Epoch [14/40], Step [130/391], Loss: 1.3136


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [14/40], Step [140/391], Loss: 1.3466


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [14/40], Step [150/391], Loss: 1.3529


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [14/40], Step [160/391], Loss: 1.3244


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [14/40], Step [170/391], Loss: 1.3310


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [14/40], Step [180/391], Loss: 1.3377


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [14/40], Step [190/391], Loss: 1.3370


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [14/40], Step [200/391], Loss: 1.2748


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [14/40], Step [210/391], Loss: 1.2992


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [14/40], Step [220/391], Loss: 1.3074


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [14/40], Step [230/391], Loss: 1.2909


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [14/40], Step [240/391], Loss: 1.3501


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [14/40], Step [250/391], Loss: 1.3626


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [14/40], Step [260/391], Loss: 1.4139


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [14/40], Step [270/391], Loss: 1.3894


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [14/40], Step [280/391], Loss: 1.3504


 74%|███████▍  | 291/391 [00:45<00:15,  6.50it/s]

Epoch [14/40], Step [290/391], Loss: 1.3313


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [14/40], Step [300/391], Loss: 1.3647


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [14/40], Step [310/391], Loss: 1.3128


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [14/40], Step [320/391], Loss: 1.3273


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [14/40], Step [330/391], Loss: 1.3347


 87%|████████▋ | 341/391 [00:53<00:07,  6.50it/s]

Epoch [14/40], Step [340/391], Loss: 1.3222


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [14/40], Step [350/391], Loss: 1.3155


 92%|█████████▏| 361/391 [00:56<00:04,  6.54it/s]

Epoch [14/40], Step [360/391], Loss: 1.3358


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [14/40], Step [370/391], Loss: 1.3424


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [14/40], Step [380/391], Loss: 1.3723


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [14/40], Step [390/391], Loss: 1.3554





Test Accuracy of the student model on the test images: 48.68 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.21it/s]

Epoch [15/40], Step [10/391], Loss: 1.2763


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [15/40], Step [20/391], Loss: 1.2712


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [15/40], Step [30/391], Loss: 1.2929


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [15/40], Step [40/391], Loss: 1.2484


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [15/40], Step [50/391], Loss: 1.2853


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [15/40], Step [60/391], Loss: 1.2480


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [15/40], Step [70/391], Loss: 1.2722


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [15/40], Step [80/391], Loss: 1.2931


 23%|██▎       | 91/391 [00:14<00:45,  6.52it/s]

Epoch [15/40], Step [90/391], Loss: 1.2841


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [15/40], Step [100/391], Loss: 1.2445


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [15/40], Step [110/391], Loss: 1.2593


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [15/40], Step [120/391], Loss: 1.2934


 34%|███▎      | 131/391 [00:20<00:39,  6.51it/s]

Epoch [15/40], Step [130/391], Loss: 1.2808


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [15/40], Step [140/391], Loss: 1.2470


 39%|███▊      | 151/391 [00:23<00:36,  6.51it/s]

Epoch [15/40], Step [150/391], Loss: 1.2794


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [15/40], Step [160/391], Loss: 1.2696


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [15/40], Step [170/391], Loss: 1.3130


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [15/40], Step [180/391], Loss: 1.3033


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [15/40], Step [190/391], Loss: 1.2899


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [15/40], Step [200/391], Loss: 1.3008


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [15/40], Step [210/391], Loss: 1.2793


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [15/40], Step [220/391], Loss: 1.3161


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [15/40], Step [230/391], Loss: 1.3015


 62%|██████▏   | 241/391 [00:37<00:22,  6.52it/s]

Epoch [15/40], Step [240/391], Loss: 1.3016


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [15/40], Step [250/391], Loss: 1.3312


 67%|██████▋   | 261/391 [00:40<00:19,  6.53it/s]

Epoch [15/40], Step [260/391], Loss: 1.3517


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [15/40], Step [270/391], Loss: 1.3052


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [15/40], Step [280/391], Loss: 1.2907


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [15/40], Step [290/391], Loss: 1.3088


 77%|███████▋  | 301/391 [00:46<00:13,  6.53it/s]

Epoch [15/40], Step [300/391], Loss: 1.2951


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [15/40], Step [310/391], Loss: 1.3199


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [15/40], Step [320/391], Loss: 1.3292


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [15/40], Step [330/391], Loss: 1.3151


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [15/40], Step [340/391], Loss: 1.3029


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [15/40], Step [350/391], Loss: 1.3424


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [15/40], Step [360/391], Loss: 1.3204


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [15/40], Step [370/391], Loss: 1.3635


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [15/40], Step [380/391], Loss: 1.3421


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [15/40], Step [390/391], Loss: 1.3384





Test Accuracy of the student model on the test images: 45.44 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.19it/s]

Epoch [16/40], Step [10/391], Loss: 1.2748


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [16/40], Step [20/391], Loss: 1.2309


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [16/40], Step [30/391], Loss: 1.2512


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [16/40], Step [40/391], Loss: 1.2220


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [16/40], Step [50/391], Loss: 1.2131


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [16/40], Step [60/391], Loss: 1.2253


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [16/40], Step [70/391], Loss: 1.2087


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [16/40], Step [80/391], Loss: 1.2067


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [16/40], Step [90/391], Loss: 1.2036


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [16/40], Step [100/391], Loss: 1.2351


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [16/40], Step [110/391], Loss: 1.2506


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [16/40], Step [120/391], Loss: 1.2193


 34%|███▎      | 131/391 [00:20<00:40,  6.47it/s]

Epoch [16/40], Step [130/391], Loss: 1.2370


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [16/40], Step [140/391], Loss: 1.2237


 39%|███▊      | 151/391 [00:23<00:36,  6.52it/s]

Epoch [16/40], Step [150/391], Loss: 1.2461


 41%|████      | 161/391 [00:25<00:35,  6.53it/s]

Epoch [16/40], Step [160/391], Loss: 1.2654


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [16/40], Step [170/391], Loss: 1.2502


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [16/40], Step [180/391], Loss: 1.2571


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [16/40], Step [190/391], Loss: 1.2398


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [16/40], Step [200/391], Loss: 1.2275


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [16/40], Step [210/391], Loss: 1.2335


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [16/40], Step [220/391], Loss: 1.2493


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [16/40], Step [230/391], Loss: 1.2979


 62%|██████▏   | 241/391 [00:37<00:22,  6.52it/s]

Epoch [16/40], Step [240/391], Loss: 1.2633


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [16/40], Step [250/391], Loss: 1.2697


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [16/40], Step [260/391], Loss: 1.2441


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [16/40], Step [270/391], Loss: 1.2521


 72%|███████▏  | 281/391 [00:43<00:16,  6.53it/s]

Epoch [16/40], Step [280/391], Loss: 1.2696


 74%|███████▍  | 291/391 [00:45<00:15,  6.50it/s]

Epoch [16/40], Step [290/391], Loss: 1.2648


 77%|███████▋  | 301/391 [00:46<00:13,  6.52it/s]

Epoch [16/40], Step [300/391], Loss: 1.2942


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [16/40], Step [310/391], Loss: 1.2857


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [16/40], Step [320/391], Loss: 1.2929


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [16/40], Step [330/391], Loss: 1.2882


 87%|████████▋ | 341/391 [00:53<00:07,  6.50it/s]

Epoch [16/40], Step [340/391], Loss: 1.3071


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [16/40], Step [350/391], Loss: 1.3061


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [16/40], Step [360/391], Loss: 1.2789


 95%|█████████▍| 371/391 [00:57<00:03,  6.53it/s]

Epoch [16/40], Step [370/391], Loss: 1.2538


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [16/40], Step [380/391], Loss: 1.2852


100%|██████████| 391/391 [01:00<00:00,  6.44it/s]

Epoch [16/40], Step [390/391], Loss: 1.2695





Test Accuracy of the student model on the test images: 49.70 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.17it/s]

Epoch [17/40], Step [10/391], Loss: 1.2593


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [17/40], Step [20/391], Loss: 1.2367


  8%|▊         | 31/391 [00:05<00:55,  6.51it/s]

Epoch [17/40], Step [30/391], Loss: 1.1871


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [17/40], Step [40/391], Loss: 1.1950


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [17/40], Step [50/391], Loss: 1.1813


 16%|█▌        | 61/391 [00:10<00:50,  6.54it/s]

Epoch [17/40], Step [60/391], Loss: 1.1862


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [17/40], Step [70/391], Loss: 1.1870


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [17/40], Step [80/391], Loss: 1.1875


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [17/40], Step [90/391], Loss: 1.1848


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [17/40], Step [100/391], Loss: 1.1924


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [17/40], Step [110/391], Loss: 1.2166


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [17/40], Step [120/391], Loss: 1.1783


 34%|███▎      | 131/391 [00:21<00:39,  6.52it/s]

Epoch [17/40], Step [130/391], Loss: 1.2209


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [17/40], Step [140/391], Loss: 1.2458


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [17/40], Step [150/391], Loss: 1.2505


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [17/40], Step [160/391], Loss: 1.2271


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [17/40], Step [170/391], Loss: 1.2278


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [17/40], Step [180/391], Loss: 1.2592


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [17/40], Step [190/391], Loss: 1.2451


 51%|█████▏    | 201/391 [00:31<00:29,  6.50it/s]

Epoch [17/40], Step [200/391], Loss: 1.2324


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [17/40], Step [210/391], Loss: 1.2398


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [17/40], Step [220/391], Loss: 1.2248


 59%|█████▉    | 231/391 [00:36<00:24,  6.54it/s]

Epoch [17/40], Step [230/391], Loss: 1.2351


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [17/40], Step [240/391], Loss: 1.2202


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [17/40], Step [250/391], Loss: 1.2082


 67%|██████▋   | 261/391 [00:41<00:19,  6.52it/s]

Epoch [17/40], Step [260/391], Loss: 1.2093


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [17/40], Step [270/391], Loss: 1.2377


 72%|███████▏  | 281/391 [00:44<00:16,  6.51it/s]

Epoch [17/40], Step [280/391], Loss: 1.2477


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [17/40], Step [290/391], Loss: 1.2727


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [17/40], Step [300/391], Loss: 1.2706


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [17/40], Step [310/391], Loss: 1.2588


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [17/40], Step [320/391], Loss: 1.2857


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [17/40], Step [330/391], Loss: 1.2579


 87%|████████▋ | 341/391 [00:53<00:07,  6.50it/s]

Epoch [17/40], Step [340/391], Loss: 1.2754


 90%|████████▉ | 351/391 [00:54<00:06,  6.50it/s]

Epoch [17/40], Step [350/391], Loss: 1.2563


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [17/40], Step [360/391], Loss: 1.2184


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [17/40], Step [370/391], Loss: 1.2053


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [17/40], Step [380/391], Loss: 1.2490


100%|██████████| 391/391 [01:00<00:00,  6.41it/s]

Epoch [17/40], Step [390/391], Loss: 1.2835





Test Accuracy of the student model on the test images: 45.76 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.17it/s]

Epoch [18/40], Step [10/391], Loss: 1.2288


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [18/40], Step [20/391], Loss: 1.1827


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [18/40], Step [30/391], Loss: 1.1311


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [18/40], Step [40/391], Loss: 1.1643


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [18/40], Step [50/391], Loss: 1.1567


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [18/40], Step [60/391], Loss: 1.1521


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [18/40], Step [70/391], Loss: 1.1828


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [18/40], Step [80/391], Loss: 1.1563


 23%|██▎       | 91/391 [00:14<00:45,  6.53it/s]

Epoch [18/40], Step [90/391], Loss: 1.1597


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [18/40], Step [100/391], Loss: 1.1583


 28%|██▊       | 111/391 [00:17<00:43,  6.51it/s]

Epoch [18/40], Step [110/391], Loss: 1.1666


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [18/40], Step [120/391], Loss: 1.1661


 34%|███▎      | 131/391 [00:21<00:39,  6.53it/s]

Epoch [18/40], Step [130/391], Loss: 1.2087


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [18/40], Step [140/391], Loss: 1.2055


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [18/40], Step [150/391], Loss: 1.1861


 41%|████      | 161/391 [00:25<00:35,  6.54it/s]

Epoch [18/40], Step [160/391], Loss: 1.1784


 44%|████▎     | 171/391 [00:27<00:33,  6.50it/s]

Epoch [18/40], Step [170/391], Loss: 1.1922


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [18/40], Step [180/391], Loss: 1.2335


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [18/40], Step [190/391], Loss: 1.1914


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [18/40], Step [200/391], Loss: 1.2238


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [18/40], Step [210/391], Loss: 1.2131


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [18/40], Step [220/391], Loss: 1.1830


 59%|█████▉    | 231/391 [00:36<00:24,  6.51it/s]

Epoch [18/40], Step [230/391], Loss: 1.2241


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [18/40], Step [240/391], Loss: 1.2009


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [18/40], Step [250/391], Loss: 1.2055


 67%|██████▋   | 261/391 [00:40<00:20,  6.48it/s]

Epoch [18/40], Step [260/391], Loss: 1.2533


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [18/40], Step [270/391], Loss: 1.2438


 72%|███████▏  | 281/391 [00:44<00:16,  6.53it/s]

Epoch [18/40], Step [280/391], Loss: 1.2258


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [18/40], Step [290/391], Loss: 1.2463


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [18/40], Step [300/391], Loss: 1.2567


 80%|███████▉  | 311/391 [00:48<00:12,  6.51it/s]

Epoch [18/40], Step [310/391], Loss: 1.2150


 82%|████████▏ | 321/391 [00:50<00:10,  6.54it/s]

Epoch [18/40], Step [320/391], Loss: 1.2136


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [18/40], Step [330/391], Loss: 1.2222


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [18/40], Step [340/391], Loss: 1.1940


 90%|████████▉ | 351/391 [00:54<00:06,  6.54it/s]

Epoch [18/40], Step [350/391], Loss: 1.2132


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [18/40], Step [360/391], Loss: 1.2533


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [18/40], Step [370/391], Loss: 1.2041


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [18/40], Step [380/391], Loss: 1.1978


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [18/40], Step [390/391], Loss: 1.2412





Test Accuracy of the student model on the test images: 47.65 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.20it/s]

Epoch [19/40], Step [10/391], Loss: 1.1331


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [19/40], Step [20/391], Loss: 1.1376


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [19/40], Step [30/391], Loss: 1.1111


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [19/40], Step [40/391], Loss: 1.1358


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [19/40], Step [50/391], Loss: 1.1554


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [19/40], Step [60/391], Loss: 1.1320


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [19/40], Step [70/391], Loss: 1.1207


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [19/40], Step [80/391], Loss: 1.1493


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [19/40], Step [90/391], Loss: 1.1316


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [19/40], Step [100/391], Loss: 1.1563


 28%|██▊       | 111/391 [00:17<00:42,  6.53it/s]

Epoch [19/40], Step [110/391], Loss: 1.1438


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [19/40], Step [120/391], Loss: 1.1638


 34%|███▎      | 131/391 [00:20<00:39,  6.52it/s]

Epoch [19/40], Step [130/391], Loss: 1.1549


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [19/40], Step [140/391], Loss: 1.1651


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [19/40], Step [150/391], Loss: 1.1724


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [19/40], Step [160/391], Loss: 1.1621


 44%|████▎     | 171/391 [00:27<00:33,  6.53it/s]

Epoch [19/40], Step [170/391], Loss: 1.1616


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [19/40], Step [180/391], Loss: 1.1655


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [19/40], Step [190/391], Loss: 1.1421


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [19/40], Step [200/391], Loss: 1.1570


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [19/40], Step [210/391], Loss: 1.1702


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [19/40], Step [220/391], Loss: 1.1794


 59%|█████▉    | 231/391 [00:36<00:24,  6.51it/s]

Epoch [19/40], Step [230/391], Loss: 1.1564


 62%|██████▏   | 241/391 [00:37<00:22,  6.52it/s]

Epoch [19/40], Step [240/391], Loss: 1.1477


 64%|██████▍   | 251/391 [00:39<00:21,  6.50it/s]

Epoch [19/40], Step [250/391], Loss: 1.1592


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [19/40], Step [260/391], Loss: 1.1968


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [19/40], Step [270/391], Loss: 1.1511


 72%|███████▏  | 281/391 [00:43<00:16,  6.52it/s]

Epoch [19/40], Step [280/391], Loss: 1.1392


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [19/40], Step [290/391], Loss: 1.1815


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [19/40], Step [300/391], Loss: 1.1841


 80%|███████▉  | 311/391 [00:48<00:12,  6.50it/s]

Epoch [19/40], Step [310/391], Loss: 1.1763


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [19/40], Step [320/391], Loss: 1.1530


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [19/40], Step [330/391], Loss: 1.1473


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [19/40], Step [340/391], Loss: 1.1565


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [19/40], Step [350/391], Loss: 1.2225


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [19/40], Step [360/391], Loss: 1.1957


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [19/40], Step [370/391], Loss: 1.2117


 97%|█████████▋| 381/391 [00:59<00:01,  6.50it/s]

Epoch [19/40], Step [380/391], Loss: 1.2125


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [19/40], Step [390/391], Loss: 1.2224





Test Accuracy of the student model on the test images: 51.15 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.21it/s]

Epoch [20/40], Step [10/391], Loss: 1.1405


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [20/40], Step [20/391], Loss: 1.1286


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [20/40], Step [30/391], Loss: 1.1137


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [20/40], Step [40/391], Loss: 1.1260


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [20/40], Step [50/391], Loss: 1.1322


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [20/40], Step [60/391], Loss: 1.0981


 18%|█▊        | 71/391 [00:11<00:49,  6.50it/s]

Epoch [20/40], Step [70/391], Loss: 1.0836


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [20/40], Step [80/391], Loss: 1.1269


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [20/40], Step [90/391], Loss: 1.1125


 26%|██▌       | 101/391 [00:16<00:44,  6.50it/s]

Epoch [20/40], Step [100/391], Loss: 1.1094


 28%|██▊       | 111/391 [00:17<00:42,  6.53it/s]

Epoch [20/40], Step [110/391], Loss: 1.1112


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [20/40], Step [120/391], Loss: 1.1327


 34%|███▎      | 131/391 [00:20<00:39,  6.53it/s]

Epoch [20/40], Step [130/391], Loss: 1.1095


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [20/40], Step [140/391], Loss: 1.1045


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [20/40], Step [150/391], Loss: 1.1167


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [20/40], Step [160/391], Loss: 1.1055


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [20/40], Step [170/391], Loss: 1.1180


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [20/40], Step [180/391], Loss: 1.1292


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [20/40], Step [190/391], Loss: 1.0849


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [20/40], Step [200/391], Loss: 1.1225


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [20/40], Step [210/391], Loss: 1.1308


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [20/40], Step [220/391], Loss: 1.1677


 59%|█████▉    | 231/391 [00:36<00:24,  6.51it/s]

Epoch [20/40], Step [230/391], Loss: 1.1316


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [20/40], Step [240/391], Loss: 1.1235


 64%|██████▍   | 251/391 [00:39<00:21,  6.50it/s]

Epoch [20/40], Step [250/391], Loss: 1.1568


 67%|██████▋   | 261/391 [00:40<00:19,  6.50it/s]

Epoch [20/40], Step [260/391], Loss: 1.1566


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [20/40], Step [270/391], Loss: 1.1353


 72%|███████▏  | 281/391 [00:43<00:16,  6.53it/s]

Epoch [20/40], Step [280/391], Loss: 1.1411


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [20/40], Step [290/391], Loss: 1.1171


 77%|███████▋  | 301/391 [00:47<00:13,  6.54it/s]

Epoch [20/40], Step [300/391], Loss: 1.1379


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [20/40], Step [310/391], Loss: 1.1504


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [20/40], Step [320/391], Loss: 1.1455


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [20/40], Step [330/391], Loss: 1.1411


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [20/40], Step [340/391], Loss: 1.1740


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [20/40], Step [350/391], Loss: 1.1476


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [20/40], Step [360/391], Loss: 1.1689


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [20/40], Step [370/391], Loss: 1.1223


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [20/40], Step [380/391], Loss: 1.1871


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [20/40], Step [390/391], Loss: 1.1848





Test Accuracy of the student model on the test images: 50.79 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.16it/s]

Epoch [21/40], Step [10/391], Loss: 1.1001


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [21/40], Step [20/391], Loss: 1.1039


  8%|▊         | 31/391 [00:05<00:55,  6.51it/s]

Epoch [21/40], Step [30/391], Loss: 1.0831


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [21/40], Step [40/391], Loss: 1.0876


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [21/40], Step [50/391], Loss: 1.0768


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [21/40], Step [60/391], Loss: 1.0896


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [21/40], Step [70/391], Loss: 1.0742


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [21/40], Step [80/391], Loss: 1.0863


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [21/40], Step [90/391], Loss: 1.0803


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [21/40], Step [100/391], Loss: 1.1036


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [21/40], Step [110/391], Loss: 1.0975


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [21/40], Step [120/391], Loss: 1.0822


 34%|███▎      | 131/391 [00:21<00:39,  6.51it/s]

Epoch [21/40], Step [130/391], Loss: 1.1259


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [21/40], Step [140/391], Loss: 1.1003


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [21/40], Step [150/391], Loss: 1.0744


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [21/40], Step [160/391], Loss: 1.0740


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [21/40], Step [170/391], Loss: 1.0904


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [21/40], Step [180/391], Loss: 1.0868


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [21/40], Step [190/391], Loss: 1.0855


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [21/40], Step [200/391], Loss: 1.1095


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [21/40], Step [210/391], Loss: 1.0802


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [21/40], Step [220/391], Loss: 1.1009


 59%|█████▉    | 231/391 [00:36<00:24,  6.50it/s]

Epoch [21/40], Step [230/391], Loss: 1.0928


 62%|██████▏   | 241/391 [00:37<00:22,  6.52it/s]

Epoch [21/40], Step [240/391], Loss: 1.0981


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [21/40], Step [250/391], Loss: 1.0770


 67%|██████▋   | 261/391 [00:41<00:19,  6.54it/s]

Epoch [21/40], Step [260/391], Loss: 1.0931


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [21/40], Step [270/391], Loss: 1.1047


 72%|███████▏  | 281/391 [00:44<00:16,  6.52it/s]

Epoch [21/40], Step [280/391], Loss: 1.0939


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [21/40], Step [290/391], Loss: 1.1345


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [21/40], Step [300/391], Loss: 1.1092


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [21/40], Step [310/391], Loss: 1.1140


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [21/40], Step [320/391], Loss: 1.1310


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [21/40], Step [330/391], Loss: 1.1055


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [21/40], Step [340/391], Loss: 1.1429


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [21/40], Step [350/391], Loss: 1.1290


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [21/40], Step [360/391], Loss: 1.1206


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [21/40], Step [370/391], Loss: 1.1376


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [21/40], Step [380/391], Loss: 1.1481


100%|██████████| 391/391 [01:00<00:00,  6.41it/s]

Epoch [21/40], Step [390/391], Loss: 1.1523





Test Accuracy of the student model on the test images: 50.51 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.18it/s]

Epoch [22/40], Step [10/391], Loss: 1.1044


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [22/40], Step [20/391], Loss: 1.0659


  8%|▊         | 31/391 [00:05<00:55,  6.54it/s]

Epoch [22/40], Step [30/391], Loss: 1.0735


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [22/40], Step [40/391], Loss: 1.1002


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [22/40], Step [50/391], Loss: 1.0740


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [22/40], Step [60/391], Loss: 1.0533


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [22/40], Step [70/391], Loss: 1.0571


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [22/40], Step [80/391], Loss: 1.0400


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [22/40], Step [90/391], Loss: 1.0533


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [22/40], Step [100/391], Loss: 1.0426


 28%|██▊       | 111/391 [00:17<00:42,  6.51it/s]

Epoch [22/40], Step [110/391], Loss: 1.0568


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [22/40], Step [120/391], Loss: 1.0322


 34%|███▎      | 131/391 [00:21<00:39,  6.52it/s]

Epoch [22/40], Step [130/391], Loss: 1.0492


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [22/40], Step [140/391], Loss: 1.0283


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [22/40], Step [150/391], Loss: 1.0607


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [22/40], Step [160/391], Loss: 1.0360


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [22/40], Step [170/391], Loss: 1.0509


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [22/40], Step [180/391], Loss: 1.0753


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [22/40], Step [190/391], Loss: 1.0894


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [22/40], Step [200/391], Loss: 1.0916


 54%|█████▍    | 211/391 [00:33<00:27,  6.50it/s]

Epoch [22/40], Step [210/391], Loss: 1.0849


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [22/40], Step [220/391], Loss: 1.0680


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [22/40], Step [230/391], Loss: 1.0662


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [22/40], Step [240/391], Loss: 1.0744


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [22/40], Step [250/391], Loss: 1.0442


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [22/40], Step [260/391], Loss: 1.0305


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [22/40], Step [270/391], Loss: 1.0675


 72%|███████▏  | 281/391 [00:44<00:16,  6.53it/s]

Epoch [22/40], Step [280/391], Loss: 1.0650


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [22/40], Step [290/391], Loss: 1.0718


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [22/40], Step [300/391], Loss: 1.0799


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [22/40], Step [310/391], Loss: 1.0751


 82%|████████▏ | 321/391 [00:50<00:10,  6.50it/s]

Epoch [22/40], Step [320/391], Loss: 1.0876


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [22/40], Step [330/391], Loss: 1.1082


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [22/40], Step [340/391], Loss: 1.0969


 90%|████████▉ | 351/391 [00:54<00:06,  6.54it/s]

Epoch [22/40], Step [350/391], Loss: 1.0885


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [22/40], Step [360/391], Loss: 1.0990


 95%|█████████▍| 371/391 [00:57<00:03,  6.50it/s]

Epoch [22/40], Step [370/391], Loss: 1.1012


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [22/40], Step [380/391], Loss: 1.0751


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [22/40], Step [390/391], Loss: 1.0887





Test Accuracy of the student model on the test images: 54.19 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.19it/s]

Epoch [23/40], Step [10/391], Loss: 1.0420


  5%|▌         | 21/391 [00:04<00:56,  6.53it/s]

Epoch [23/40], Step [20/391], Loss: 1.0233


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [23/40], Step [30/391], Loss: 1.0018


 10%|█         | 41/391 [00:07<00:53,  6.54it/s]

Epoch [23/40], Step [40/391], Loss: 0.9997


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [23/40], Step [50/391], Loss: 1.0136


 16%|█▌        | 61/391 [00:10<00:50,  6.50it/s]

Epoch [23/40], Step [60/391], Loss: 1.0026


 18%|█▊        | 71/391 [00:11<00:49,  6.53it/s]

Epoch [23/40], Step [70/391], Loss: 1.0306


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [23/40], Step [80/391], Loss: 1.0180


 23%|██▎       | 91/391 [00:14<00:46,  6.51it/s]

Epoch [23/40], Step [90/391], Loss: 1.0064


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [23/40], Step [100/391], Loss: 1.0190


 28%|██▊       | 111/391 [00:17<00:42,  6.51it/s]

Epoch [23/40], Step [110/391], Loss: 1.0422


 31%|███       | 121/391 [00:19<00:41,  6.50it/s]

Epoch [23/40], Step [120/391], Loss: 1.0171


 34%|███▎      | 131/391 [00:21<00:39,  6.52it/s]

Epoch [23/40], Step [130/391], Loss: 1.0185


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [23/40], Step [140/391], Loss: 1.0067


 39%|███▊      | 151/391 [00:24<00:36,  6.50it/s]

Epoch [23/40], Step [150/391], Loss: 1.0116


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [23/40], Step [160/391], Loss: 1.0238


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [23/40], Step [170/391], Loss: 1.0251


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [23/40], Step [180/391], Loss: 1.0538


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [23/40], Step [190/391], Loss: 1.0452


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [23/40], Step [200/391], Loss: 1.0232


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [23/40], Step [210/391], Loss: 1.0261


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [23/40], Step [220/391], Loss: 1.0527


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [23/40], Step [230/391], Loss: 1.0559


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [23/40], Step [240/391], Loss: 1.0381


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [23/40], Step [250/391], Loss: 1.0618


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [23/40], Step [260/391], Loss: 1.0470


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [23/40], Step [270/391], Loss: 1.0400


 72%|███████▏  | 281/391 [00:44<00:16,  6.51it/s]

Epoch [23/40], Step [280/391], Loss: 1.0262


 74%|███████▍  | 291/391 [00:45<00:15,  6.49it/s]

Epoch [23/40], Step [290/391], Loss: 1.0609


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [23/40], Step [300/391], Loss: 1.0095


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [23/40], Step [310/391], Loss: 1.0536


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [23/40], Step [320/391], Loss: 1.0214


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [23/40], Step [330/391], Loss: 1.0437


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [23/40], Step [340/391], Loss: 1.0420


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [23/40], Step [350/391], Loss: 1.0558


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [23/40], Step [360/391], Loss: 1.0449


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [23/40], Step [370/391], Loss: 1.0636


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [23/40], Step [380/391], Loss: 1.0498


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [23/40], Step [390/391], Loss: 1.0789





Test Accuracy of the student model on the test images: 52.79 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.16it/s]

Epoch [24/40], Step [10/391], Loss: 1.0455


  5%|▌         | 21/391 [00:04<00:57,  6.48it/s]

Epoch [24/40], Step [20/391], Loss: 1.0262


  8%|▊         | 31/391 [00:05<00:55,  6.50it/s]

Epoch [24/40], Step [30/391], Loss: 0.9717


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [24/40], Step [40/391], Loss: 0.9624


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [24/40], Step [50/391], Loss: 0.9686


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [24/40], Step [60/391], Loss: 0.9805


 18%|█▊        | 71/391 [00:11<00:49,  6.53it/s]

Epoch [24/40], Step [70/391], Loss: 0.9675


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [24/40], Step [80/391], Loss: 1.0106


 23%|██▎       | 91/391 [00:14<00:46,  6.50it/s]

Epoch [24/40], Step [90/391], Loss: 0.9876


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [24/40], Step [100/391], Loss: 0.9909


 28%|██▊       | 111/391 [00:18<00:43,  6.51it/s]

Epoch [24/40], Step [110/391], Loss: 0.9960


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [24/40], Step [120/391], Loss: 0.9753


 34%|███▎      | 131/391 [00:21<00:39,  6.53it/s]

Epoch [24/40], Step [130/391], Loss: 0.9828


 36%|███▌      | 141/391 [00:22<00:38,  6.54it/s]

Epoch [24/40], Step [140/391], Loss: 1.0132


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [24/40], Step [150/391], Loss: 0.9922


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [24/40], Step [160/391], Loss: 0.9770


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [24/40], Step [170/391], Loss: 0.9813


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [24/40], Step [180/391], Loss: 0.9840


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [24/40], Step [190/391], Loss: 1.0072


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [24/40], Step [200/391], Loss: 0.9892


 54%|█████▍    | 211/391 [00:33<00:27,  6.49it/s]

Epoch [24/40], Step [210/391], Loss: 0.9970


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [24/40], Step [220/391], Loss: 0.9722


 59%|█████▉    | 231/391 [00:36<00:24,  6.51it/s]

Epoch [24/40], Step [230/391], Loss: 0.9988


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [24/40], Step [240/391], Loss: 1.0015


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [24/40], Step [250/391], Loss: 1.0099


 67%|██████▋   | 261/391 [00:41<00:19,  6.52it/s]

Epoch [24/40], Step [260/391], Loss: 1.0216


 69%|██████▉   | 271/391 [00:42<00:18,  6.54it/s]

Epoch [24/40], Step [270/391], Loss: 1.0016


 72%|███████▏  | 281/391 [00:44<00:16,  6.51it/s]

Epoch [24/40], Step [280/391], Loss: 0.9911


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [24/40], Step [290/391], Loss: 0.9941


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [24/40], Step [300/391], Loss: 1.0243


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [24/40], Step [310/391], Loss: 1.0151


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [24/40], Step [320/391], Loss: 1.0451


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [24/40], Step [330/391], Loss: 1.0385


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [24/40], Step [340/391], Loss: 1.0178


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [24/40], Step [350/391], Loss: 1.0185


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [24/40], Step [360/391], Loss: 1.0128


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [24/40], Step [370/391], Loss: 1.0488


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [24/40], Step [380/391], Loss: 1.0450


100%|██████████| 391/391 [01:00<00:00,  6.41it/s]

Epoch [24/40], Step [390/391], Loss: 1.0359





Test Accuracy of the student model on the test images: 54.39 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.21it/s]

Epoch [25/40], Step [10/391], Loss: 0.9782


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [25/40], Step [20/391], Loss: 0.9598


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [25/40], Step [30/391], Loss: 0.9365


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [25/40], Step [40/391], Loss: 0.9440


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [25/40], Step [50/391], Loss: 0.9278


 16%|█▌        | 61/391 [00:10<00:51,  6.43it/s]

Epoch [25/40], Step [60/391], Loss: 0.9160


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [25/40], Step [70/391], Loss: 0.9178


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [25/40], Step [80/391], Loss: 0.9165


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [25/40], Step [90/391], Loss: 0.9242


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [25/40], Step [100/391], Loss: 0.9425


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [25/40], Step [110/391], Loss: 0.9299


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [25/40], Step [120/391], Loss: 0.9217


 34%|███▎      | 131/391 [00:21<00:39,  6.50it/s]

Epoch [25/40], Step [130/391], Loss: 0.9388


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [25/40], Step [140/391], Loss: 0.9622


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [25/40], Step [150/391], Loss: 0.9508


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [25/40], Step [160/391], Loss: 0.9538


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [25/40], Step [170/391], Loss: 0.9695


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [25/40], Step [180/391], Loss: 0.9602


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [25/40], Step [190/391], Loss: 0.9673


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [25/40], Step [200/391], Loss: 0.9691


 54%|█████▍    | 211/391 [00:33<00:27,  6.50it/s]

Epoch [25/40], Step [210/391], Loss: 0.9848


 57%|█████▋    | 221/391 [00:34<00:25,  6.54it/s]

Epoch [25/40], Step [220/391], Loss: 0.9468


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [25/40], Step [230/391], Loss: 0.9827


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [25/40], Step [240/391], Loss: 0.9496


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [25/40], Step [250/391], Loss: 0.9517


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [25/40], Step [260/391], Loss: 0.9500


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [25/40], Step [270/391], Loss: 0.9796


 72%|███████▏  | 281/391 [00:43<00:16,  6.52it/s]

Epoch [25/40], Step [280/391], Loss: 0.9897


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [25/40], Step [290/391], Loss: 0.9878


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [25/40], Step [300/391], Loss: 0.9960


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [25/40], Step [310/391], Loss: 1.0109


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [25/40], Step [320/391], Loss: 0.9737


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [25/40], Step [330/391], Loss: 0.9729


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [25/40], Step [340/391], Loss: 0.9754


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [25/40], Step [350/391], Loss: 0.9850


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [25/40], Step [360/391], Loss: 0.9882


 95%|█████████▍| 371/391 [00:57<00:03,  6.53it/s]

Epoch [25/40], Step [370/391], Loss: 0.9669


 97%|█████████▋| 381/391 [00:59<00:01,  6.54it/s]

Epoch [25/40], Step [380/391], Loss: 1.0004


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [25/40], Step [390/391], Loss: 0.9798





Test Accuracy of the student model on the test images: 53.00 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.20it/s]

Epoch [26/40], Step [10/391], Loss: 0.9627


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [26/40], Step [20/391], Loss: 0.9223


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [26/40], Step [30/391], Loss: 0.9253


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [26/40], Step [40/391], Loss: 0.9260


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [26/40], Step [50/391], Loss: 0.9323


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [26/40], Step [60/391], Loss: 0.9247


 18%|█▊        | 71/391 [00:11<00:48,  6.54it/s]

Epoch [26/40], Step [70/391], Loss: 0.9101


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [26/40], Step [80/391], Loss: 0.9300


 23%|██▎       | 91/391 [00:14<00:46,  6.50it/s]

Epoch [26/40], Step [90/391], Loss: 0.9189


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [26/40], Step [100/391], Loss: 0.9117


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [26/40], Step [110/391], Loss: 0.9234


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [26/40], Step [120/391], Loss: 0.9351


 34%|███▎      | 131/391 [00:20<00:39,  6.53it/s]

Epoch [26/40], Step [130/391], Loss: 0.9277


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [26/40], Step [140/391], Loss: 0.9363


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [26/40], Step [150/391], Loss: 0.8910


 41%|████      | 161/391 [00:25<00:35,  6.54it/s]

Epoch [26/40], Step [160/391], Loss: 0.9267


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [26/40], Step [170/391], Loss: 0.9193


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [26/40], Step [180/391], Loss: 0.9464


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [26/40], Step [190/391], Loss: 0.9315


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [26/40], Step [200/391], Loss: 0.9179


 54%|█████▍    | 211/391 [00:33<00:27,  6.54it/s]

Epoch [26/40], Step [210/391], Loss: 0.9422


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [26/40], Step [220/391], Loss: 0.9225


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [26/40], Step [230/391], Loss: 0.9325


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [26/40], Step [240/391], Loss: 0.9485


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [26/40], Step [250/391], Loss: 0.9518


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [26/40], Step [260/391], Loss: 0.9281


 69%|██████▉   | 271/391 [00:42<00:18,  6.54it/s]

Epoch [26/40], Step [270/391], Loss: 0.9383


 72%|███████▏  | 281/391 [00:43<00:16,  6.52it/s]

Epoch [26/40], Step [280/391], Loss: 0.9398


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [26/40], Step [290/391], Loss: 0.9169


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [26/40], Step [300/391], Loss: 0.9534


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [26/40], Step [310/391], Loss: 0.9473


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [26/40], Step [320/391], Loss: 0.9209


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [26/40], Step [330/391], Loss: 0.9210


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [26/40], Step [340/391], Loss: 0.9258


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [26/40], Step [350/391], Loss: 0.9426


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [26/40], Step [360/391], Loss: 0.9681


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [26/40], Step [370/391], Loss: 0.9423


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [26/40], Step [380/391], Loss: 0.9631


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [26/40], Step [390/391], Loss: 0.9434





Test Accuracy of the student model on the test images: 55.64 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.17it/s]

Epoch [27/40], Step [10/391], Loss: 0.9137


  5%|▌         | 21/391 [00:04<00:56,  6.53it/s]

Epoch [27/40], Step [20/391], Loss: 0.9048


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [27/40], Step [30/391], Loss: 0.8770


 10%|█         | 41/391 [00:07<00:53,  6.54it/s]

Epoch [27/40], Step [40/391], Loss: 0.8812


 13%|█▎        | 51/391 [00:08<00:52,  6.48it/s]

Epoch [27/40], Step [50/391], Loss: 0.8711


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [27/40], Step [60/391], Loss: 0.8799


 18%|█▊        | 71/391 [00:11<00:49,  6.49it/s]

Epoch [27/40], Step [70/391], Loss: 0.8872


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [27/40], Step [80/391], Loss: 0.8807


 23%|██▎       | 91/391 [00:14<00:46,  6.51it/s]

Epoch [27/40], Step [90/391], Loss: 0.8774


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [27/40], Step [100/391], Loss: 0.8944


 28%|██▊       | 111/391 [00:17<00:43,  6.50it/s]

Epoch [27/40], Step [110/391], Loss: 0.9083


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [27/40], Step [120/391], Loss: 0.8890


 34%|███▎      | 131/391 [00:20<00:39,  6.52it/s]

Epoch [27/40], Step [130/391], Loss: 0.8790


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [27/40], Step [140/391], Loss: 0.8922


 39%|███▊      | 151/391 [00:24<00:36,  6.53it/s]

Epoch [27/40], Step [150/391], Loss: 0.8869


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [27/40], Step [160/391], Loss: 0.9097


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [27/40], Step [170/391], Loss: 0.9140


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [27/40], Step [180/391], Loss: 0.9240


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [27/40], Step [190/391], Loss: 0.8936


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [27/40], Step [200/391], Loss: 0.8997


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [27/40], Step [210/391], Loss: 0.8937


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [27/40], Step [220/391], Loss: 0.8745


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [27/40], Step [230/391], Loss: 0.8918


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [27/40], Step [240/391], Loss: 0.8990


 64%|██████▍   | 251/391 [00:39<00:21,  6.53it/s]

Epoch [27/40], Step [250/391], Loss: 0.9053


 67%|██████▋   | 261/391 [00:40<00:19,  6.53it/s]

Epoch [27/40], Step [260/391], Loss: 0.9020


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [27/40], Step [270/391], Loss: 0.8976


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [27/40], Step [280/391], Loss: 0.9391


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [27/40], Step [290/391], Loss: 0.9046


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [27/40], Step [300/391], Loss: 0.9010


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [27/40], Step [310/391], Loss: 0.9158


 82%|████████▏ | 321/391 [00:50<00:10,  6.54it/s]

Epoch [27/40], Step [320/391], Loss: 0.9218


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [27/40], Step [330/391], Loss: 0.9068


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [27/40], Step [340/391], Loss: 0.8737


 90%|████████▉ | 351/391 [00:54<00:06,  6.54it/s]

Epoch [27/40], Step [350/391], Loss: 0.9025


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [27/40], Step [360/391], Loss: 0.9215


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [27/40], Step [370/391], Loss: 0.9174


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [27/40], Step [380/391], Loss: 0.9139


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [27/40], Step [390/391], Loss: 0.8982





Test Accuracy of the student model on the test images: 57.51 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.16it/s]

Epoch [28/40], Step [10/391], Loss: 0.8714


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [28/40], Step [20/391], Loss: 0.8574


  8%|▊         | 31/391 [00:05<00:55,  6.49it/s]

Epoch [28/40], Step [30/391], Loss: 0.8367


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [28/40], Step [40/391], Loss: 0.8475


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [28/40], Step [50/391], Loss: 0.8414


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [28/40], Step [60/391], Loss: 0.8603


 18%|█▊        | 71/391 [00:11<00:48,  6.53it/s]

Epoch [28/40], Step [70/391], Loss: 0.8535


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [28/40], Step [80/391], Loss: 0.8420


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [28/40], Step [90/391], Loss: 0.8330


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [28/40], Step [100/391], Loss: 0.8452


 28%|██▊       | 111/391 [00:18<00:42,  6.53it/s]

Epoch [28/40], Step [110/391], Loss: 0.8642


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [28/40], Step [120/391], Loss: 0.8765


 34%|███▎      | 131/391 [00:21<00:39,  6.52it/s]

Epoch [28/40], Step [130/391], Loss: 0.8533


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [28/40], Step [140/391], Loss: 0.8561


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [28/40], Step [150/391], Loss: 0.8520


 41%|████      | 161/391 [00:25<00:35,  6.54it/s]

Epoch [28/40], Step [160/391], Loss: 0.8651


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [28/40], Step [170/391], Loss: 0.8408


 46%|████▋     | 181/391 [00:28<00:32,  6.49it/s]

Epoch [28/40], Step [180/391], Loss: 0.8599


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [28/40], Step [190/391], Loss: 0.8640


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [28/40], Step [200/391], Loss: 0.8659


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [28/40], Step [210/391], Loss: 0.8606


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [28/40], Step [220/391], Loss: 0.8619


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [28/40], Step [230/391], Loss: 0.8452


 62%|██████▏   | 241/391 [00:37<00:23,  6.50it/s]

Epoch [28/40], Step [240/391], Loss: 0.8740


 64%|██████▍   | 251/391 [00:39<00:21,  6.50it/s]

Epoch [28/40], Step [250/391], Loss: 0.8677


 67%|██████▋   | 261/391 [00:41<00:19,  6.51it/s]

Epoch [28/40], Step [260/391], Loss: 0.8601


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [28/40], Step [270/391], Loss: 0.8596


 72%|███████▏  | 281/391 [00:44<00:16,  6.53it/s]

Epoch [28/40], Step [280/391], Loss: 0.8866


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [28/40], Step [290/391], Loss: 0.8687


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [28/40], Step [300/391], Loss: 0.8521


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [28/40], Step [310/391], Loss: 0.8689


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [28/40], Step [320/391], Loss: 0.8722


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [28/40], Step [330/391], Loss: 0.8601


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [28/40], Step [340/391], Loss: 0.8816


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [28/40], Step [350/391], Loss: 0.8547


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [28/40], Step [360/391], Loss: 0.8881


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [28/40], Step [370/391], Loss: 0.8869


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [28/40], Step [380/391], Loss: 0.8669


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [28/40], Step [390/391], Loss: 0.8676





Test Accuracy of the student model on the test images: 58.76 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.21it/s]

Epoch [29/40], Step [10/391], Loss: 0.8437


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [29/40], Step [20/391], Loss: 0.8502


  8%|▊         | 31/391 [00:05<00:55,  6.54it/s]

Epoch [29/40], Step [30/391], Loss: 0.8441


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [29/40], Step [40/391], Loss: 0.8113


 13%|█▎        | 51/391 [00:08<00:52,  6.50it/s]

Epoch [29/40], Step [50/391], Loss: 0.8315


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [29/40], Step [60/391], Loss: 0.8243


 18%|█▊        | 71/391 [00:11<00:48,  6.53it/s]

Epoch [29/40], Step [70/391], Loss: 0.8119


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [29/40], Step [80/391], Loss: 0.8178


 23%|██▎       | 91/391 [00:14<00:45,  6.53it/s]

Epoch [29/40], Step [90/391], Loss: 0.8333


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [29/40], Step [100/391], Loss: 0.8281


 28%|██▊       | 111/391 [00:17<00:42,  6.53it/s]

Epoch [29/40], Step [110/391], Loss: 0.8230


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [29/40], Step [120/391], Loss: 0.8479


 34%|███▎      | 131/391 [00:20<00:39,  6.50it/s]

Epoch [29/40], Step [130/391], Loss: 0.8269


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [29/40], Step [140/391], Loss: 0.8408


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [29/40], Step [150/391], Loss: 0.8237


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [29/40], Step [160/391], Loss: 0.8220


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [29/40], Step [170/391], Loss: 0.8186


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [29/40], Step [180/391], Loss: 0.8299


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [29/40], Step [190/391], Loss: 0.8079


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [29/40], Step [200/391], Loss: 0.8384


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [29/40], Step [210/391], Loss: 0.8224


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [29/40], Step [220/391], Loss: 0.8328


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [29/40], Step [230/391], Loss: 0.8274


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [29/40], Step [240/391], Loss: 0.8355


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [29/40], Step [250/391], Loss: 0.8161


 67%|██████▋   | 261/391 [00:40<00:20,  6.50it/s]

Epoch [29/40], Step [260/391], Loss: 0.8317


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [29/40], Step [270/391], Loss: 0.8410


 72%|███████▏  | 281/391 [00:43<00:16,  6.50it/s]

Epoch [29/40], Step [280/391], Loss: 0.8317


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [29/40], Step [290/391], Loss: 0.8560


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [29/40], Step [300/391], Loss: 0.8497


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [29/40], Step [310/391], Loss: 0.8375


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [29/40], Step [320/391], Loss: 0.8404


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [29/40], Step [330/391], Loss: 0.8504


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [29/40], Step [340/391], Loss: 0.8366


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [29/40], Step [350/391], Loss: 0.8337


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [29/40], Step [360/391], Loss: 0.8658


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [29/40], Step [370/391], Loss: 0.8367


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [29/40], Step [380/391], Loss: 0.8439


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [29/40], Step [390/391], Loss: 0.8414





Test Accuracy of the student model on the test images: 60.15 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.15it/s]

Epoch [30/40], Step [10/391], Loss: 0.8005


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [30/40], Step [20/391], Loss: 0.8168


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [30/40], Step [30/391], Loss: 0.7980


 10%|█         | 41/391 [00:07<00:53,  6.55it/s]

Epoch [30/40], Step [40/391], Loss: 0.7626


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [30/40], Step [50/391], Loss: 0.7973


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [30/40], Step [60/391], Loss: 0.8136


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [30/40], Step [70/391], Loss: 0.8195


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [30/40], Step [80/391], Loss: 0.8077


 23%|██▎       | 91/391 [00:14<00:46,  6.51it/s]

Epoch [30/40], Step [90/391], Loss: 0.8095


 26%|██▌       | 101/391 [00:16<00:44,  6.53it/s]

Epoch [30/40], Step [100/391], Loss: 0.7806


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [30/40], Step [110/391], Loss: 0.7952


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [30/40], Step [120/391], Loss: 0.8060


 34%|███▎      | 131/391 [00:21<00:39,  6.51it/s]

Epoch [30/40], Step [130/391], Loss: 0.7860


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [30/40], Step [140/391], Loss: 0.7947


 39%|███▊      | 151/391 [00:24<00:36,  6.53it/s]

Epoch [30/40], Step [150/391], Loss: 0.7884


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [30/40], Step [160/391], Loss: 0.7986


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [30/40], Step [170/391], Loss: 0.8099


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [30/40], Step [180/391], Loss: 0.7867


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [30/40], Step [190/391], Loss: 0.7992


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [30/40], Step [200/391], Loss: 0.8142


 54%|█████▍    | 211/391 [00:33<00:27,  6.49it/s]

Epoch [30/40], Step [210/391], Loss: 0.8009


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [30/40], Step [220/391], Loss: 0.7996


 59%|█████▉    | 231/391 [00:36<00:24,  6.51it/s]

Epoch [30/40], Step [230/391], Loss: 0.8010


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [30/40], Step [240/391], Loss: 0.8115


 64%|██████▍   | 251/391 [00:39<00:21,  6.49it/s]

Epoch [30/40], Step [250/391], Loss: 0.7921


 67%|██████▋   | 261/391 [00:41<00:19,  6.53it/s]

Epoch [30/40], Step [260/391], Loss: 0.8038


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [30/40], Step [270/391], Loss: 0.7993


 72%|███████▏  | 281/391 [00:44<00:16,  6.52it/s]

Epoch [30/40], Step [280/391], Loss: 0.8092


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [30/40], Step [290/391], Loss: 0.8102


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [30/40], Step [300/391], Loss: 0.8115


 80%|███████▉  | 311/391 [00:48<00:12,  6.51it/s]

Epoch [30/40], Step [310/391], Loss: 0.7955


 82%|████████▏ | 321/391 [00:50<00:10,  6.50it/s]

Epoch [30/40], Step [320/391], Loss: 0.8248


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [30/40], Step [330/391], Loss: 0.8082


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [30/40], Step [340/391], Loss: 0.8014


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [30/40], Step [350/391], Loss: 0.8123


 92%|█████████▏| 361/391 [00:56<00:04,  6.52it/s]

Epoch [30/40], Step [360/391], Loss: 0.8072


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [30/40], Step [370/391], Loss: 0.8016


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [30/40], Step [380/391], Loss: 0.7917


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [30/40], Step [390/391], Loss: 0.8135





Test Accuracy of the student model on the test images: 60.98 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.22it/s]

Epoch [31/40], Step [10/391], Loss: 0.7983


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [31/40], Step [20/391], Loss: 0.7769


  8%|▊         | 31/391 [00:05<00:55,  6.50it/s]

Epoch [31/40], Step [30/391], Loss: 0.7534


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [31/40], Step [40/391], Loss: 0.7563


 13%|█▎        | 51/391 [00:08<00:52,  6.50it/s]

Epoch [31/40], Step [50/391], Loss: 0.7529


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [31/40], Step [60/391], Loss: 0.7672


 18%|█▊        | 71/391 [00:11<00:49,  6.53it/s]

Epoch [31/40], Step [70/391], Loss: 0.7457


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [31/40], Step [80/391], Loss: 0.7548


 23%|██▎       | 91/391 [00:14<00:45,  6.52it/s]

Epoch [31/40], Step [90/391], Loss: 0.7523


 26%|██▌       | 101/391 [00:16<00:44,  6.54it/s]

Epoch [31/40], Step [100/391], Loss: 0.7522


 28%|██▊       | 111/391 [00:17<00:42,  6.54it/s]

Epoch [31/40], Step [110/391], Loss: 0.7432


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [31/40], Step [120/391], Loss: 0.7547


 34%|███▎      | 131/391 [00:20<00:39,  6.50it/s]

Epoch [31/40], Step [130/391], Loss: 0.7711


 36%|███▌      | 141/391 [00:22<00:38,  6.54it/s]

Epoch [31/40], Step [140/391], Loss: 0.7508


 39%|███▊      | 151/391 [00:23<00:36,  6.52it/s]

Epoch [31/40], Step [150/391], Loss: 0.7896


 41%|████      | 161/391 [00:25<00:35,  6.52it/s]

Epoch [31/40], Step [160/391], Loss: 0.7686


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [31/40], Step [170/391], Loss: 0.7798


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [31/40], Step [180/391], Loss: 0.7781


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [31/40], Step [190/391], Loss: 0.7849


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [31/40], Step [200/391], Loss: 0.7850


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [31/40], Step [210/391], Loss: 0.7645


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [31/40], Step [220/391], Loss: 0.7606


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [31/40], Step [230/391], Loss: 0.7504


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [31/40], Step [240/391], Loss: 0.7691


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [31/40], Step [250/391], Loss: 0.7775


 67%|██████▋   | 261/391 [00:40<00:19,  6.54it/s]

Epoch [31/40], Step [260/391], Loss: 0.7763


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [31/40], Step [270/391], Loss: 0.7802


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [31/40], Step [280/391], Loss: 0.7841


 74%|███████▍  | 291/391 [00:45<00:15,  6.41it/s]

Epoch [31/40], Step [290/391], Loss: 0.7577


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [31/40], Step [300/391], Loss: 0.7705


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [31/40], Step [310/391], Loss: 0.7702


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [31/40], Step [320/391], Loss: 0.7805


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [31/40], Step [330/391], Loss: 0.7707


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [31/40], Step [340/391], Loss: 0.7718


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [31/40], Step [350/391], Loss: 0.7860


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [31/40], Step [360/391], Loss: 0.7928


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [31/40], Step [370/391], Loss: 0.7873


 97%|█████████▋| 381/391 [00:59<00:01,  6.50it/s]

Epoch [31/40], Step [380/391], Loss: 0.7947


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [31/40], Step [390/391], Loss: 0.7827





Test Accuracy of the student model on the test images: 60.51 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.17it/s]

Epoch [32/40], Step [10/391], Loss: 0.7633


  5%|▌         | 21/391 [00:04<00:56,  6.53it/s]

Epoch [32/40], Step [20/391], Loss: 0.7580


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [32/40], Step [30/391], Loss: 0.7414


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [32/40], Step [40/391], Loss: 0.7195


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [32/40], Step [50/391], Loss: 0.7195


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [32/40], Step [60/391], Loss: 0.7448


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [32/40], Step [70/391], Loss: 0.7291


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [32/40], Step [80/391], Loss: 0.7300


 23%|██▎       | 91/391 [00:14<00:46,  6.49it/s]

Epoch [32/40], Step [90/391], Loss: 0.7301


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [32/40], Step [100/391], Loss: 0.7291


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [32/40], Step [110/391], Loss: 0.7517


 31%|███       | 121/391 [00:19<00:41,  6.51it/s]

Epoch [32/40], Step [120/391], Loss: 0.7439


 34%|███▎      | 131/391 [00:21<00:39,  6.51it/s]

Epoch [32/40], Step [130/391], Loss: 0.7436


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [32/40], Step [140/391], Loss: 0.7364


 39%|███▊      | 151/391 [00:24<00:36,  6.53it/s]

Epoch [32/40], Step [150/391], Loss: 0.7377


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [32/40], Step [160/391], Loss: 0.7267


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [32/40], Step [170/391], Loss: 0.7368


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [32/40], Step [180/391], Loss: 0.7397


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [32/40], Step [190/391], Loss: 0.7385


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [32/40], Step [200/391], Loss: 0.7226


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [32/40], Step [210/391], Loss: 0.7355


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [32/40], Step [220/391], Loss: 0.7596


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [32/40], Step [230/391], Loss: 0.7258


 62%|██████▏   | 241/391 [00:37<00:23,  6.52it/s]

Epoch [32/40], Step [240/391], Loss: 0.7496


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [32/40], Step [250/391], Loss: 0.7488


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [32/40], Step [260/391], Loss: 0.7568


 69%|██████▉   | 271/391 [00:42<00:18,  6.54it/s]

Epoch [32/40], Step [270/391], Loss: 0.7477


 72%|███████▏  | 281/391 [00:44<00:16,  6.52it/s]

Epoch [32/40], Step [280/391], Loss: 0.7439


 74%|███████▍  | 291/391 [00:45<00:15,  6.53it/s]

Epoch [32/40], Step [290/391], Loss: 0.7417


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [32/40], Step [300/391], Loss: 0.7479


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [32/40], Step [310/391], Loss: 0.7488


 82%|████████▏ | 321/391 [00:50<00:10,  6.50it/s]

Epoch [32/40], Step [320/391], Loss: 0.7526


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [32/40], Step [330/391], Loss: 0.7429


 87%|████████▋ | 341/391 [00:53<00:07,  6.53it/s]

Epoch [32/40], Step [340/391], Loss: 0.7459


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [32/40], Step [350/391], Loss: 0.7389


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [32/40], Step [360/391], Loss: 0.7587


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [32/40], Step [370/391], Loss: 0.7513


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [32/40], Step [380/391], Loss: 0.7488


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [32/40], Step [390/391], Loss: 0.7415





Test Accuracy of the student model on the test images: 61.34 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.18it/s]

Epoch [33/40], Step [10/391], Loss: 0.7338


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [33/40], Step [20/391], Loss: 0.7140


  8%|▊         | 31/391 [00:05<00:55,  6.49it/s]

Epoch [33/40], Step [30/391], Loss: 0.7062


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [33/40], Step [40/391], Loss: 0.7175


 13%|█▎        | 51/391 [00:08<00:52,  6.50it/s]

Epoch [33/40], Step [50/391], Loss: 0.7066


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [33/40], Step [60/391], Loss: 0.7135


 18%|█▊        | 71/391 [00:11<00:49,  6.53it/s]

Epoch [33/40], Step [70/391], Loss: 0.7252


 21%|██        | 81/391 [00:13<00:47,  6.53it/s]

Epoch [33/40], Step [80/391], Loss: 0.7150


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [33/40], Step [90/391], Loss: 0.7227


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [33/40], Step [100/391], Loss: 0.7054


 28%|██▊       | 111/391 [00:17<00:42,  6.53it/s]

Epoch [33/40], Step [110/391], Loss: 0.7219


 31%|███       | 121/391 [00:19<00:41,  6.49it/s]

Epoch [33/40], Step [120/391], Loss: 0.7119


 34%|███▎      | 131/391 [00:20<00:39,  6.51it/s]

Epoch [33/40], Step [130/391], Loss: 0.7068


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [33/40], Step [140/391], Loss: 0.7127


 39%|███▊      | 151/391 [00:24<00:36,  6.51it/s]

Epoch [33/40], Step [150/391], Loss: 0.7039


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [33/40], Step [160/391], Loss: 0.7050


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [33/40], Step [170/391], Loss: 0.7306


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [33/40], Step [180/391], Loss: 0.7285


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [33/40], Step [190/391], Loss: 0.7188


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [33/40], Step [200/391], Loss: 0.7116


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [33/40], Step [210/391], Loss: 0.7173


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [33/40], Step [220/391], Loss: 0.7059


 59%|█████▉    | 231/391 [00:36<00:25,  6.27it/s]

Epoch [33/40], Step [230/391], Loss: 0.7236


 62%|██████▏   | 241/391 [00:37<00:23,  6.50it/s]

Epoch [33/40], Step [240/391], Loss: 0.7099


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [33/40], Step [250/391], Loss: 0.7159


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [33/40], Step [260/391], Loss: 0.7334


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [33/40], Step [270/391], Loss: 0.7035


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [33/40], Step [280/391], Loss: 0.6995


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [33/40], Step [290/391], Loss: 0.7030


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [33/40], Step [300/391], Loss: 0.7138


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [33/40], Step [310/391], Loss: 0.7235


 82%|████████▏ | 321/391 [00:50<00:10,  6.53it/s]

Epoch [33/40], Step [320/391], Loss: 0.7201


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [33/40], Step [330/391], Loss: 0.7430


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [33/40], Step [340/391], Loss: 0.7142


 90%|████████▉ | 351/391 [00:54<00:06,  6.53it/s]

Epoch [33/40], Step [350/391], Loss: 0.7216


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [33/40], Step [360/391], Loss: 0.7202


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [33/40], Step [370/391], Loss: 0.7338


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [33/40], Step [380/391], Loss: 0.7396


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [33/40], Step [390/391], Loss: 0.7376





Test Accuracy of the student model on the test images: 61.69 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.17it/s]

Epoch [34/40], Step [10/391], Loss: 0.6909


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [34/40], Step [20/391], Loss: 0.6826


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [34/40], Step [30/391], Loss: 0.6887


 10%|█         | 41/391 [00:07<00:53,  6.54it/s]

Epoch [34/40], Step [40/391], Loss: 0.6813


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [34/40], Step [50/391], Loss: 0.6975


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [34/40], Step [60/391], Loss: 0.7026


 18%|█▊        | 71/391 [00:11<00:48,  6.53it/s]

Epoch [34/40], Step [70/391], Loss: 0.6940


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [34/40], Step [80/391], Loss: 0.6866


 23%|██▎       | 91/391 [00:14<00:46,  6.51it/s]

Epoch [34/40], Step [90/391], Loss: 0.6967


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [34/40], Step [100/391], Loss: 0.6919


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [34/40], Step [110/391], Loss: 0.6854


 31%|███       | 121/391 [00:19<00:41,  6.53it/s]

Epoch [34/40], Step [120/391], Loss: 0.6953


 34%|███▎      | 131/391 [00:20<00:39,  6.52it/s]

Epoch [34/40], Step [130/391], Loss: 0.6838


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [34/40], Step [140/391], Loss: 0.6739


 39%|███▊      | 151/391 [00:24<00:36,  6.52it/s]

Epoch [34/40], Step [150/391], Loss: 0.6998


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [34/40], Step [160/391], Loss: 0.6894


 44%|████▎     | 171/391 [00:27<00:33,  6.51it/s]

Epoch [34/40], Step [170/391], Loss: 0.6886


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [34/40], Step [180/391], Loss: 0.6951


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [34/40], Step [190/391], Loss: 0.6905


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [34/40], Step [200/391], Loss: 0.6772


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [34/40], Step [210/391], Loss: 0.6886


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [34/40], Step [220/391], Loss: 0.6944


 59%|█████▉    | 231/391 [00:36<00:24,  6.49it/s]

Epoch [34/40], Step [230/391], Loss: 0.6927


 62%|██████▏   | 241/391 [00:37<00:23,  6.50it/s]

Epoch [34/40], Step [240/391], Loss: 0.7005


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [34/40], Step [250/391], Loss: 0.7202


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [34/40], Step [260/391], Loss: 0.6794


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [34/40], Step [270/391], Loss: 0.6889


 72%|███████▏  | 281/391 [00:43<00:16,  6.53it/s]

Epoch [34/40], Step [280/391], Loss: 0.6945


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [34/40], Step [290/391], Loss: 0.6934


 77%|███████▋  | 301/391 [00:47<00:13,  6.53it/s]

Epoch [34/40], Step [300/391], Loss: 0.6857


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [34/40], Step [310/391], Loss: 0.6731


 82%|████████▏ | 321/391 [00:50<00:10,  6.50it/s]

Epoch [34/40], Step [320/391], Loss: 0.6911


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [34/40], Step [330/391], Loss: 0.6955


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [34/40], Step [340/391], Loss: 0.6964


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [34/40], Step [350/391], Loss: 0.6951


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [34/40], Step [360/391], Loss: 0.6970


 95%|█████████▍| 371/391 [00:57<00:03,  6.53it/s]

Epoch [34/40], Step [370/391], Loss: 0.6884


 97%|█████████▋| 381/391 [00:59<00:01,  6.54it/s]

Epoch [34/40], Step [380/391], Loss: 0.6751


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [34/40], Step [390/391], Loss: 0.7068





Test Accuracy of the student model on the test images: 62.43 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.23it/s]

Epoch [35/40], Step [10/391], Loss: 0.6926


  5%|▌         | 21/391 [00:04<00:56,  6.52it/s]

Epoch [35/40], Step [20/391], Loss: 0.6775


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [35/40], Step [30/391], Loss: 0.6795


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [35/40], Step [40/391], Loss: 0.6574


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [35/40], Step [50/391], Loss: 0.6825


 16%|█▌        | 61/391 [00:10<00:50,  6.52it/s]

Epoch [35/40], Step [60/391], Loss: 0.6479


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [35/40], Step [70/391], Loss: 0.6777


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [35/40], Step [80/391], Loss: 0.6714


 23%|██▎       | 91/391 [00:14<00:46,  6.52it/s]

Epoch [35/40], Step [90/391], Loss: 0.6776


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [35/40], Step [100/391], Loss: 0.6833


 28%|██▊       | 111/391 [00:17<00:42,  6.53it/s]

Epoch [35/40], Step [110/391], Loss: 0.6725


 31%|███       | 121/391 [00:19<00:41,  6.50it/s]

Epoch [35/40], Step [120/391], Loss: 0.6661


 34%|███▎      | 131/391 [00:20<00:39,  6.51it/s]

Epoch [35/40], Step [130/391], Loss: 0.6560


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [35/40], Step [140/391], Loss: 0.6500


 39%|███▊      | 151/391 [00:23<00:36,  6.52it/s]

Epoch [35/40], Step [150/391], Loss: 0.6618


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [35/40], Step [160/391], Loss: 0.6776


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [35/40], Step [170/391], Loss: 0.6720


 46%|████▋     | 181/391 [00:28<00:32,  6.53it/s]

Epoch [35/40], Step [180/391], Loss: 0.6803


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [35/40], Step [190/391], Loss: 0.6814


 51%|█████▏    | 201/391 [00:31<00:29,  6.51it/s]

Epoch [35/40], Step [200/391], Loss: 0.6803


 54%|█████▍    | 211/391 [00:33<00:27,  6.51it/s]

Epoch [35/40], Step [210/391], Loss: 0.6814


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [35/40], Step [220/391], Loss: 0.6779


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [35/40], Step [230/391], Loss: 0.6877


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [35/40], Step [240/391], Loss: 0.6675


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [35/40], Step [250/391], Loss: 0.6668


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [35/40], Step [260/391], Loss: 0.6727


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [35/40], Step [270/391], Loss: 0.6758


 72%|███████▏  | 281/391 [00:43<00:16,  6.52it/s]

Epoch [35/40], Step [280/391], Loss: 0.6637


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [35/40], Step [290/391], Loss: 0.6747


 77%|███████▋  | 301/391 [00:46<00:13,  6.52it/s]

Epoch [35/40], Step [300/391], Loss: 0.6948


 80%|███████▉  | 311/391 [00:48<00:12,  6.53it/s]

Epoch [35/40], Step [310/391], Loss: 0.6848


 82%|████████▏ | 321/391 [00:50<00:10,  6.51it/s]

Epoch [35/40], Step [320/391], Loss: 0.6712


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [35/40], Step [330/391], Loss: 0.6601


 87%|████████▋ | 341/391 [00:53<00:07,  6.54it/s]

Epoch [35/40], Step [340/391], Loss: 0.6685


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [35/40], Step [350/391], Loss: 0.6677


 92%|█████████▏| 361/391 [00:56<00:04,  6.50it/s]

Epoch [35/40], Step [360/391], Loss: 0.7008


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [35/40], Step [370/391], Loss: 0.6726


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [35/40], Step [380/391], Loss: 0.6860


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [35/40], Step [390/391], Loss: 0.6726





Test Accuracy of the student model on the test images: 62.87 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.15it/s]

Epoch [36/40], Step [10/391], Loss: 0.6570


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [36/40], Step [20/391], Loss: 0.6656


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [36/40], Step [30/391], Loss: 0.6436


 10%|█         | 41/391 [00:07<00:53,  6.52it/s]

Epoch [36/40], Step [40/391], Loss: 0.6574


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [36/40], Step [50/391], Loss: 0.6602


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [36/40], Step [60/391], Loss: 0.6602


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [36/40], Step [70/391], Loss: 0.6570


 21%|██        | 81/391 [00:13<00:47,  6.50it/s]

Epoch [36/40], Step [80/391], Loss: 0.6440


 23%|██▎       | 91/391 [00:14<00:46,  6.50it/s]

Epoch [36/40], Step [90/391], Loss: 0.6490


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [36/40], Step [100/391], Loss: 0.6566


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [36/40], Step [110/391], Loss: 0.6488


 31%|███       | 121/391 [00:19<00:41,  6.54it/s]

Epoch [36/40], Step [120/391], Loss: 0.6544


 34%|███▎      | 131/391 [00:21<00:39,  6.53it/s]

Epoch [36/40], Step [130/391], Loss: 0.6573


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [36/40], Step [140/391], Loss: 0.6549


 39%|███▊      | 151/391 [00:24<00:37,  6.48it/s]

Epoch [36/40], Step [150/391], Loss: 0.6401


 41%|████      | 161/391 [00:25<00:35,  6.49it/s]

Epoch [36/40], Step [160/391], Loss: 0.6712


 44%|████▎     | 171/391 [00:27<00:33,  6.50it/s]

Epoch [36/40], Step [170/391], Loss: 0.6329


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [36/40], Step [180/391], Loss: 0.6690


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [36/40], Step [190/391], Loss: 0.6506


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [36/40], Step [200/391], Loss: 0.6481


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [36/40], Step [210/391], Loss: 0.6652


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [36/40], Step [220/391], Loss: 0.6437


 59%|█████▉    | 231/391 [00:36<00:24,  6.53it/s]

Epoch [36/40], Step [230/391], Loss: 0.6490


 62%|██████▏   | 241/391 [00:37<00:23,  6.50it/s]

Epoch [36/40], Step [240/391], Loss: 0.6478


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [36/40], Step [250/391], Loss: 0.6553


 67%|██████▋   | 261/391 [00:41<00:19,  6.53it/s]

Epoch [36/40], Step [260/391], Loss: 0.6605


 69%|██████▉   | 271/391 [00:42<00:18,  6.52it/s]

Epoch [36/40], Step [270/391], Loss: 0.6689


 72%|███████▏  | 281/391 [00:44<00:16,  6.51it/s]

Epoch [36/40], Step [280/391], Loss: 0.6566


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [36/40], Step [290/391], Loss: 0.6489


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [36/40], Step [300/391], Loss: 0.6521


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [36/40], Step [310/391], Loss: 0.6594


 82%|████████▏ | 321/391 [00:50<00:10,  6.50it/s]

Epoch [36/40], Step [320/391], Loss: 0.6639


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [36/40], Step [330/391], Loss: 0.6576


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [36/40], Step [340/391], Loss: 0.6576


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [36/40], Step [350/391], Loss: 0.6665


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [36/40], Step [360/391], Loss: 0.6509


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [36/40], Step [370/391], Loss: 0.6619


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [36/40], Step [380/391], Loss: 0.6637


100%|██████████| 391/391 [01:01<00:00,  6.41it/s]

Epoch [36/40], Step [390/391], Loss: 0.6472





Test Accuracy of the student model on the test images: 63.40 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.21it/s]

Epoch [37/40], Step [10/391], Loss: 0.6289


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [37/40], Step [20/391], Loss: 0.6524


  8%|▊         | 31/391 [00:05<00:55,  6.54it/s]

Epoch [37/40], Step [30/391], Loss: 0.6347


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [37/40], Step [40/391], Loss: 0.6490


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [37/40], Step [50/391], Loss: 0.6412


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [37/40], Step [60/391], Loss: 0.6263


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [37/40], Step [70/391], Loss: 0.6346


 21%|██        | 81/391 [00:13<00:47,  6.50it/s]

Epoch [37/40], Step [80/391], Loss: 0.6441


 23%|██▎       | 91/391 [00:14<00:46,  6.51it/s]

Epoch [37/40], Step [90/391], Loss: 0.6272


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [37/40], Step [100/391], Loss: 0.6499


 28%|██▊       | 111/391 [00:17<00:42,  6.52it/s]

Epoch [37/40], Step [110/391], Loss: 0.6310


 31%|███       | 121/391 [00:19<00:41,  6.44it/s]

Epoch [37/40], Step [120/391], Loss: 0.6488


 34%|███▎      | 131/391 [00:20<00:39,  6.52it/s]

Epoch [37/40], Step [130/391], Loss: 0.6308


 36%|███▌      | 141/391 [00:22<00:38,  6.53it/s]

Epoch [37/40], Step [140/391], Loss: 0.6295


 39%|███▊      | 151/391 [00:24<00:36,  6.53it/s]

Epoch [37/40], Step [150/391], Loss: 0.6370


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [37/40], Step [160/391], Loss: 0.6515


 44%|████▎     | 171/391 [00:27<00:33,  6.53it/s]

Epoch [37/40], Step [170/391], Loss: 0.6386


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [37/40], Step [180/391], Loss: 0.6444


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [37/40], Step [190/391], Loss: 0.6383


 51%|█████▏    | 201/391 [00:31<00:29,  6.53it/s]

Epoch [37/40], Step [200/391], Loss: 0.6393


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [37/40], Step [210/391], Loss: 0.6612


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [37/40], Step [220/391], Loss: 0.6477


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [37/40], Step [230/391], Loss: 0.6210


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [37/40], Step [240/391], Loss: 0.6542


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [37/40], Step [250/391], Loss: 0.6433


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [37/40], Step [260/391], Loss: 0.6476


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [37/40], Step [270/391], Loss: 0.6451


 72%|███████▏  | 281/391 [00:43<00:16,  6.53it/s]

Epoch [37/40], Step [280/391], Loss: 0.6356


 74%|███████▍  | 291/391 [00:45<00:15,  6.52it/s]

Epoch [37/40], Step [290/391], Loss: 0.6412


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [37/40], Step [300/391], Loss: 0.6324


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [37/40], Step [310/391], Loss: 0.6431


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [37/40], Step [320/391], Loss: 0.6466


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [37/40], Step [330/391], Loss: 0.6403


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [37/40], Step [340/391], Loss: 0.6528


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [37/40], Step [350/391], Loss: 0.6415


 92%|█████████▏| 361/391 [00:56<00:04,  6.49it/s]

Epoch [37/40], Step [360/391], Loss: 0.6410


 95%|█████████▍| 371/391 [00:57<00:03,  6.51it/s]

Epoch [37/40], Step [370/391], Loss: 0.6429


 97%|█████████▋| 381/391 [00:59<00:01,  6.53it/s]

Epoch [37/40], Step [380/391], Loss: 0.6352


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [37/40], Step [390/391], Loss: 0.6346





Test Accuracy of the student model on the test images: 63.35 %
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.17it/s]

Epoch [38/40], Step [10/391], Loss: 0.6197


  5%|▌         | 21/391 [00:04<00:56,  6.50it/s]

Epoch [38/40], Step [20/391], Loss: 0.6264


  8%|▊         | 31/391 [00:05<00:55,  6.52it/s]

Epoch [38/40], Step [30/391], Loss: 0.6219


 10%|█         | 41/391 [00:07<00:53,  6.53it/s]

Epoch [38/40], Step [40/391], Loss: 0.6236


 13%|█▎        | 51/391 [00:08<00:52,  6.53it/s]

Epoch [38/40], Step [50/391], Loss: 0.6571


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [38/40], Step [60/391], Loss: 0.6244


 18%|█▊        | 71/391 [00:11<00:49,  6.52it/s]

Epoch [38/40], Step [70/391], Loss: 0.6478


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [38/40], Step [80/391], Loss: 0.6178


 23%|██▎       | 91/391 [00:14<00:47,  6.38it/s]

Epoch [38/40], Step [90/391], Loss: 0.6436


 26%|██▌       | 101/391 [00:16<00:44,  6.51it/s]

Epoch [38/40], Step [100/391], Loss: 0.6426


 28%|██▊       | 111/391 [00:18<00:42,  6.52it/s]

Epoch [38/40], Step [110/391], Loss: 0.6247


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [38/40], Step [120/391], Loss: 0.6368


 34%|███▎      | 131/391 [00:21<00:39,  6.51it/s]

Epoch [38/40], Step [130/391], Loss: 0.6452


 36%|███▌      | 141/391 [00:22<00:38,  6.52it/s]

Epoch [38/40], Step [140/391], Loss: 0.6394


 39%|███▊      | 151/391 [00:24<00:36,  6.53it/s]

Epoch [38/40], Step [150/391], Loss: 0.6292


 41%|████      | 161/391 [00:25<00:35,  6.53it/s]

Epoch [38/40], Step [160/391], Loss: 0.6242


 44%|████▎     | 171/391 [00:27<00:33,  6.50it/s]

Epoch [38/40], Step [170/391], Loss: 0.6210


 46%|████▋     | 181/391 [00:28<00:32,  6.52it/s]

Epoch [38/40], Step [180/391], Loss: 0.6210


 49%|████▉     | 191/391 [00:30<00:30,  6.53it/s]

Epoch [38/40], Step [190/391], Loss: 0.6300


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [38/40], Step [200/391], Loss: 0.6230


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [38/40], Step [210/391], Loss: 0.6557


 57%|█████▋    | 221/391 [00:34<00:26,  6.52it/s]

Epoch [38/40], Step [220/391], Loss: 0.6506


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [38/40], Step [230/391], Loss: 0.6246


 62%|██████▏   | 241/391 [00:37<00:23,  6.50it/s]

Epoch [38/40], Step [240/391], Loss: 0.6228


 64%|██████▍   | 251/391 [00:39<00:21,  6.50it/s]

Epoch [38/40], Step [250/391], Loss: 0.6247


 67%|██████▋   | 261/391 [00:41<00:19,  6.52it/s]

Epoch [38/40], Step [260/391], Loss: 0.6282


 69%|██████▉   | 271/391 [00:42<00:18,  6.51it/s]

Epoch [38/40], Step [270/391], Loss: 0.6390


 72%|███████▏  | 281/391 [00:44<00:16,  6.51it/s]

Epoch [38/40], Step [280/391], Loss: 0.6463


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [38/40], Step [290/391], Loss: 0.6417


 77%|███████▋  | 301/391 [00:47<00:13,  6.51it/s]

Epoch [38/40], Step [300/391], Loss: 0.6295


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [38/40], Step [310/391], Loss: 0.6401


 82%|████████▏ | 321/391 [00:50<00:10,  6.50it/s]

Epoch [38/40], Step [320/391], Loss: 0.6235


 85%|████████▍ | 331/391 [00:51<00:09,  6.52it/s]

Epoch [38/40], Step [330/391], Loss: 0.6369


 87%|████████▋ | 341/391 [00:53<00:07,  6.50it/s]

Epoch [38/40], Step [340/391], Loss: 0.6298


 90%|████████▉ | 351/391 [00:54<00:06,  6.52it/s]

Epoch [38/40], Step [350/391], Loss: 0.6346


 92%|█████████▏| 361/391 [00:56<00:04,  6.53it/s]

Epoch [38/40], Step [360/391], Loss: 0.6334


 95%|█████████▍| 371/391 [00:57<00:03,  6.54it/s]

Epoch [38/40], Step [370/391], Loss: 0.6307


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [38/40], Step [380/391], Loss: 0.6428


100%|██████████| 391/391 [01:00<00:00,  6.41it/s]

Epoch [38/40], Step [390/391], Loss: 0.6342





Test Accuracy of the student model on the test images: 63.75 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:00,  6.23it/s]

Epoch [39/40], Step [10/391], Loss: 0.6226


  5%|▌         | 21/391 [00:04<00:56,  6.49it/s]

Epoch [39/40], Step [20/391], Loss: 0.6210


  8%|▊         | 31/391 [00:05<00:55,  6.51it/s]

Epoch [39/40], Step [30/391], Loss: 0.6212


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [39/40], Step [40/391], Loss: 0.6214


 13%|█▎        | 51/391 [00:08<00:52,  6.51it/s]

Epoch [39/40], Step [50/391], Loss: 0.6158


 16%|█▌        | 61/391 [00:10<00:50,  6.51it/s]

Epoch [39/40], Step [60/391], Loss: 0.6396


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [39/40], Step [70/391], Loss: 0.6082


 21%|██        | 81/391 [00:13<00:47,  6.52it/s]

Epoch [39/40], Step [80/391], Loss: 0.6094


 23%|██▎       | 91/391 [00:14<00:45,  6.52it/s]

Epoch [39/40], Step [90/391], Loss: 0.6376


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [39/40], Step [100/391], Loss: 0.6272


 28%|██▊       | 111/391 [00:17<00:42,  6.53it/s]

Epoch [39/40], Step [110/391], Loss: 0.6148


 31%|███       | 121/391 [00:19<00:41,  6.50it/s]

Epoch [39/40], Step [120/391], Loss: 0.6238


 34%|███▎      | 131/391 [00:20<00:39,  6.52it/s]

Epoch [39/40], Step [130/391], Loss: 0.6220


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [39/40], Step [140/391], Loss: 0.6165


 39%|███▊      | 151/391 [00:23<00:36,  6.52it/s]

Epoch [39/40], Step [150/391], Loss: 0.6375


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [39/40], Step [160/391], Loss: 0.6188


 44%|████▎     | 171/391 [00:27<00:33,  6.52it/s]

Epoch [39/40], Step [170/391], Loss: 0.6220


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [39/40], Step [180/391], Loss: 0.6414


 49%|████▉     | 191/391 [00:30<00:30,  6.51it/s]

Epoch [39/40], Step [190/391], Loss: 0.6206


 51%|█████▏    | 201/391 [00:31<00:29,  6.50it/s]

Epoch [39/40], Step [200/391], Loss: 0.6332


 54%|█████▍    | 211/391 [00:33<00:27,  6.52it/s]

Epoch [39/40], Step [210/391], Loss: 0.6167


 57%|█████▋    | 221/391 [00:34<00:26,  6.53it/s]

Epoch [39/40], Step [220/391], Loss: 0.6157


 59%|█████▉    | 231/391 [00:36<00:24,  6.51it/s]

Epoch [39/40], Step [230/391], Loss: 0.6311


 62%|██████▏   | 241/391 [00:37<00:22,  6.53it/s]

Epoch [39/40], Step [240/391], Loss: 0.6249


 64%|██████▍   | 251/391 [00:39<00:21,  6.51it/s]

Epoch [39/40], Step [250/391], Loss: 0.6475


 67%|██████▋   | 261/391 [00:40<00:19,  6.52it/s]

Epoch [39/40], Step [260/391], Loss: 0.6298


 69%|██████▉   | 271/391 [00:42<00:18,  6.53it/s]

Epoch [39/40], Step [270/391], Loss: 0.6256


 72%|███████▏  | 281/391 [00:43<00:16,  6.51it/s]

Epoch [39/40], Step [280/391], Loss: 0.6332


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [39/40], Step [290/391], Loss: 0.6282


 77%|███████▋  | 301/391 [00:46<00:13,  6.53it/s]

Epoch [39/40], Step [300/391], Loss: 0.6452


 80%|███████▉  | 311/391 [00:48<00:12,  6.52it/s]

Epoch [39/40], Step [310/391], Loss: 0.6218


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [39/40], Step [320/391], Loss: 0.6281


 85%|████████▍ | 331/391 [00:51<00:09,  6.53it/s]

Epoch [39/40], Step [330/391], Loss: 0.6290


 87%|████████▋ | 341/391 [00:53<00:07,  6.52it/s]

Epoch [39/40], Step [340/391], Loss: 0.6308


 90%|████████▉ | 351/391 [00:54<00:06,  6.49it/s]

Epoch [39/40], Step [350/391], Loss: 0.6270


 92%|█████████▏| 361/391 [00:56<00:04,  6.51it/s]

Epoch [39/40], Step [360/391], Loss: 0.6322


 95%|█████████▍| 371/391 [00:57<00:03,  6.52it/s]

Epoch [39/40], Step [370/391], Loss: 0.6209


 97%|█████████▋| 381/391 [00:59<00:01,  6.51it/s]

Epoch [39/40], Step [380/391], Loss: 0.6255


100%|██████████| 391/391 [01:00<00:00,  6.43it/s]

Epoch [39/40], Step [390/391], Loss: 0.6242





Test Accuracy of the student model on the test images: 63.81 %
Saved best model to final_model.pth
UNFREEZING


  3%|▎         | 11/391 [00:02<01:01,  6.18it/s]

Epoch [40/40], Step [10/391], Loss: 0.6182


  5%|▌         | 21/391 [00:04<00:56,  6.51it/s]

Epoch [40/40], Step [20/391], Loss: 0.6229


  8%|▊         | 31/391 [00:05<00:55,  6.53it/s]

Epoch [40/40], Step [30/391], Loss: 0.6149


 10%|█         | 41/391 [00:07<00:53,  6.51it/s]

Epoch [40/40], Step [40/391], Loss: 0.6231


 13%|█▎        | 51/391 [00:08<00:52,  6.52it/s]

Epoch [40/40], Step [50/391], Loss: 0.6261


 16%|█▌        | 61/391 [00:10<00:50,  6.53it/s]

Epoch [40/40], Step [60/391], Loss: 0.6266


 18%|█▊        | 71/391 [00:11<00:49,  6.51it/s]

Epoch [40/40], Step [70/391], Loss: 0.6185


 21%|██        | 81/391 [00:13<00:47,  6.51it/s]

Epoch [40/40], Step [80/391], Loss: 0.6170


 23%|██▎       | 91/391 [00:14<00:45,  6.52it/s]

Epoch [40/40], Step [90/391], Loss: 0.6131


 26%|██▌       | 101/391 [00:16<00:44,  6.52it/s]

Epoch [40/40], Step [100/391], Loss: 0.6265


 28%|██▊       | 111/391 [00:17<00:42,  6.53it/s]

Epoch [40/40], Step [110/391], Loss: 0.6214


 31%|███       | 121/391 [00:19<00:41,  6.52it/s]

Epoch [40/40], Step [120/391], Loss: 0.6160


 34%|███▎      | 131/391 [00:20<00:39,  6.51it/s]

Epoch [40/40], Step [130/391], Loss: 0.6247


 36%|███▌      | 141/391 [00:22<00:38,  6.51it/s]

Epoch [40/40], Step [140/391], Loss: 0.6245


 39%|███▊      | 151/391 [00:24<00:36,  6.54it/s]

Epoch [40/40], Step [150/391], Loss: 0.6128


 41%|████      | 161/391 [00:25<00:35,  6.51it/s]

Epoch [40/40], Step [160/391], Loss: 0.6175


 44%|████▎     | 171/391 [00:27<00:33,  6.53it/s]

Epoch [40/40], Step [170/391], Loss: 0.6230


 46%|████▋     | 181/391 [00:28<00:32,  6.51it/s]

Epoch [40/40], Step [180/391], Loss: 0.6236


 49%|████▉     | 191/391 [00:30<00:30,  6.52it/s]

Epoch [40/40], Step [190/391], Loss: 0.6182


 51%|█████▏    | 201/391 [00:31<00:29,  6.52it/s]

Epoch [40/40], Step [200/391], Loss: 0.6286


 54%|█████▍    | 211/391 [00:33<00:27,  6.53it/s]

Epoch [40/40], Step [210/391], Loss: 0.6381


 57%|█████▋    | 221/391 [00:34<00:26,  6.51it/s]

Epoch [40/40], Step [220/391], Loss: 0.6166


 59%|█████▉    | 231/391 [00:36<00:24,  6.52it/s]

Epoch [40/40], Step [230/391], Loss: 0.6252


 62%|██████▏   | 241/391 [00:37<00:23,  6.51it/s]

Epoch [40/40], Step [240/391], Loss: 0.6288


 64%|██████▍   | 251/391 [00:39<00:21,  6.52it/s]

Epoch [40/40], Step [250/391], Loss: 0.6106


 67%|██████▋   | 261/391 [00:40<00:19,  6.51it/s]

Epoch [40/40], Step [260/391], Loss: 0.6139


 69%|██████▉   | 271/391 [00:42<00:18,  6.54it/s]

Epoch [40/40], Step [270/391], Loss: 0.6102


 72%|███████▏  | 281/391 [00:43<00:16,  6.53it/s]

Epoch [40/40], Step [280/391], Loss: 0.6252


 74%|███████▍  | 291/391 [00:45<00:15,  6.51it/s]

Epoch [40/40], Step [290/391], Loss: 0.6344


 77%|███████▋  | 301/391 [00:47<00:13,  6.52it/s]

Epoch [40/40], Step [300/391], Loss: 0.6165


 80%|███████▉  | 311/391 [00:48<00:12,  6.51it/s]

Epoch [40/40], Step [310/391], Loss: 0.6220


 82%|████████▏ | 321/391 [00:50<00:10,  6.52it/s]

Epoch [40/40], Step [320/391], Loss: 0.6313


 85%|████████▍ | 331/391 [00:51<00:09,  6.51it/s]

Epoch [40/40], Step [330/391], Loss: 0.6259


 87%|████████▋ | 341/391 [00:53<00:07,  6.51it/s]

Epoch [40/40], Step [340/391], Loss: 0.6219


 90%|████████▉ | 351/391 [00:54<00:06,  6.51it/s]

Epoch [40/40], Step [350/391], Loss: 0.6262


 92%|█████████▏| 361/391 [00:56<00:04,  6.48it/s]

Epoch [40/40], Step [360/391], Loss: 0.6302


 95%|█████████▍| 371/391 [00:57<00:03,  6.50it/s]

Epoch [40/40], Step [370/391], Loss: 0.6185


 97%|█████████▋| 381/391 [00:59<00:01,  6.52it/s]

Epoch [40/40], Step [380/391], Loss: 0.6280


100%|██████████| 391/391 [01:00<00:00,  6.42it/s]

Epoch [40/40], Step [390/391], Loss: 0.6182





Test Accuracy of the student model on the test images: 63.86 %
Saved best model to final_model.pth
Best Accuracy: 63.86 %


In [None]:
!ls -lh

total 54M
-rw-r--r-- 1 root root  27M Dec  9 03:25 best_model.pth
-rw-r--r-- 1 root root  26M Dec  9 03:25 resnet_34_tf.pth
drwxr-xr-x 1 root root 4.0K Dec  5 14:24 sample_data
drwxr-xr-x 5 root root 4.0K Dec  9 03:27 tiny-imagenet-200
