In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from torch.utils.data import DataLoader, Dataset
from torchvision.datasets.utils import download_url
from torchvision.datasets.folder import default_loader
import os
import zipfile
import random
from tqdm import tqdm

# Ensure reproducibility
torch.manual_seed(0)
random.seed(0)

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Define paths
data_dir = './tiny-imagenet-200'
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'val')

# Download and extract Tiny ImageNet dataset
def download_and_extract_tiny_imagenet():
    if not os.path.exists(data_dir):
        os.makedirs(data_dir, exist_ok=True)
        url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'
        filename = 'tiny-imagenet-200.zip'
        zip_path = os.path.join('./', filename)
        print('Downloading Tiny ImageNet dataset...')
        download_url(url, root='./', filename=filename)
        print('Extracting Tiny ImageNet dataset...')
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall('./')
        os.remove(zip_path)
        print('Dataset downloaded and extracted.')
    else:
        print('Tiny ImageNet dataset already exists.')

download_and_extract_tiny_imagenet()

# Prepare validation data
def prepare_val_folder():
    val_img_dir = os.path.join(val_dir, 'images')
    if not os.path.exists(val_img_dir):
        return
    # Read val annotations file
    val_annotations_file = os.path.join(val_dir, 'val_annotations.txt')
    val_img_dict = {}
    with open(val_annotations_file, 'r') as f:
        for line in f.readlines():
            parts = line.strip().split('\t')
            img_name = parts[0]
            img_class = parts[1]
            val_img_dict[img_name] = img_class

    # Create folders for validation images
    print('Organizing validation images...')
    for img, cls in tqdm(val_img_dict.items()):
        cls_dir = os.path.join(val_dir, cls)
        if not os.path.exists(cls_dir):
            os.mkdir(cls_dir)
            os.mkdir(os.path.join(cls_dir, 'images'))
        img_src = os.path.join(val_dir, 'images', img)
        img_dst = os.path.join(cls_dir, 'images', img)
        if os.path.exists(img_src):
            os.rename(img_src, img_dst)
    os.rmdir(os.path.join(val_dir, 'images'))
    print('Validation images organized.')

prepare_val_folder()

# Define data transformations
transform_train = transforms.Compose([
    transforms.RandomRotation(20),
    transforms.RandomHorizontalFlip(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

transform_test = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Create custom dataset class
class TinyImageNetDataset(Dataset):
    def __init__(self, root, train=True, transform=None):
        self.root = root
        self.transform = transform
        self.images = []
        self.labels = []
        self.train = train
        self._load_data()

    def _load_data(self):
        if self.train:
            data_dir = os.path.join(self.root, 'train')
        else:
            data_dir = os.path.join(self.root, 'val')
        classes = sorted(os.listdir(data_dir))
        class_to_idx = {cls_name: idx for idx, cls_name in enumerate(classes)}
        for cls_name in classes:
            cls_dir = os.path.join(data_dir, cls_name, 'images')
            if not os.path.isdir(cls_dir):
                continue
            img_files = os.listdir(cls_dir)
            for img_name in img_files:
                img_path = os.path.join(cls_dir, img_name)
                self.images.append(img_path)
                self.labels.append(class_to_idx[cls_name])
        self.classes = classes
        self.class_to_idx = class_to_idx

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = self.images[idx]
        label = self.labels[idx]
        image = default_loader(img_path)
        if self.transform:
            image = self.transform(image)
        return image, label

# Load datasets
train_dataset = TinyImageNetDataset(root=data_dir, train=True, transform=transform_train)
test_dataset = TinyImageNetDataset(root=data_dir, train=False, transform=transform_test)

# Set a larger batch size
batch_size = 512  # Adjust this value based on your GPU memory
print('Batch size:', batch_size)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True)

# Define function to create and modify ResNet models
def create_resnet_model(name, num_classes=200, pretrained=True):
    if name == 'resnet18':
        model = torchvision.models.resnet18(pretrained=pretrained)
    elif name == 'resnet34':
        model = torchvision.models.resnet34(pretrained=pretrained)
    elif name == 'resnet101':
        model = torchvision.models.resnet101(pretrained=pretrained)
    else:
        raise ValueError('Invalid model name')

    # Modify the final layer to match num_classes
    num_ftrs = model.fc.in_features
    model.fc = nn.Linear(num_ftrs, num_classes)

    return model

from torch.cuda.amp import autocast, GradScaler

# Function to train a model normally (used for teacher model)
def train_model(model, train_loader, test_loader, num_epochs=10, base_lr=0.1, device='cuda', save_path='best_model.pth'):
    criterion = nn.CrossEntropyLoss()
    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)
    optimizer = optim.SGD(model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    model.to(device)

    best_acc = 0

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0

        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()
            with autocast():
                outputs = model(inputs)
                loss = criterion(outputs, targets)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Function for knowledge distillation from teacher to student
def train_kd(student_model, teacher_model, train_loader, test_loader, num_epochs=10, base_lr=0.1, temperature=4, alpha=0.9, device='cuda', save_path='best_student_model.pth'):
    criterion = nn.CrossEntropyLoss()
    soft_loss_fn = nn.KLDivLoss(reduction='batchmean')

    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)

    optimizer = optim.SGD(student_model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    student_model.to(device)
    teacher_model.to(device)
    teacher_model.eval()

    best_acc = 0

    for epoch in range(num_epochs):
        student_model.train()
        running_loss = 0.0

        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()

            with autocast():
                outputs = student_model(inputs)
                with torch.no_grad():
                    teacher_outputs = teacher_model(inputs)

                loss_ce = criterion(outputs, targets)
                loss_kd = soft_loss_fn(F.log_softmax(outputs/temperature, dim=1),
                                       F.softmax(teacher_outputs/temperature, dim=1)) * (temperature ** 2)

                loss = alpha * loss_kd + (1 - alpha) * loss_ce

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        student_model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = student_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the student model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(student_model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Function for knowledge distillation with both teacher and TA (simple average)
def train_kd_with_ta(student_model, teacher_model, ta_model, train_loader, test_loader, num_epochs=10, base_lr=0.1, temperature=4, alpha=0.9, device='cuda', save_path='best_student_model.pth'):
    criterion = nn.CrossEntropyLoss()
    soft_loss_fn = nn.KLDivLoss(reduction='batchmean')

    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)

    optimizer = optim.SGD(student_model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    student_model.to(device)
    teacher_model.to(device)
    teacher_model.eval()
    ta_model.to(device)
    ta_model.eval()

    best_acc = 0

    for epoch in range(num_epochs):
        student_model.train()
        running_loss = 0.0

        for batch_idx, (inputs, targets) in enumerate(tqdm(train_loader)):
            inputs = inputs.to(device, non_blocking=True)
            targets = targets.to(device, non_blocking=True)

            optimizer.zero_grad()

            with autocast():
                outputs = student_model(inputs)
                with torch.no_grad():
                    teacher_outputs = teacher_model(inputs)
                    ta_outputs = ta_model(inputs)
                    # Average the softmax outputs
                    avg_outputs = (F.softmax(teacher_outputs/temperature, dim=1) + F.softmax(ta_outputs/temperature, dim=1)) / 2

                loss_ce = criterion(outputs, targets)
                loss_kd = soft_loss_fn(F.log_softmax(outputs/temperature, dim=1),
                                       avg_outputs) * (temperature ** 2)

                loss = alpha * loss_kd + (1 - alpha) * loss_ce

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        student_model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = student_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the student model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(student_model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Function for the new distillation algorithm
def train_kd_new_algorithm(student_model, teacher_model, ta_model, train_loader, test_loader, num_epochs=10, base_lr=0.1, temp=5, alpha=0.9, device='cuda', save_path='best_student_model.pth'):
    criterion = nn.CrossEntropyLoss(reduction='none')  # per-sample loss
    kl_criterion = nn.KLDivLoss(reduction='none')  # per-sample loss

    adjusted_lr = base_lr * (batch_size / 256)
    print('Adjusted learning rate:', adjusted_lr)

    optimizer = optim.SGD(student_model.parameters(), lr=adjusted_lr,
                          momentum=0.9, weight_decay=5e-4)
    scaler = GradScaler()
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=num_epochs)
    student_model.to(device)
    teacher_model.to(device)
    teacher_model.eval()
    ta_model.to(device)
    ta_model.eval()

    best_acc = 0

    for epoch in range(num_epochs):
        student_model.train()
        running_loss = 0.0

        for batch_idx, (data, target) in enumerate(tqdm(train_loader)):
            data = data.to(device, non_blocking=True)
            target = target.to(device, non_blocking=True)

            optimizer.zero_grad()

            with autocast():
                output = student_model(data)
                with torch.no_grad():
                    teacher_outputs = teacher_model(data)
                    ta_outputs = ta_model(data)

                # Standard Learning Loss (Classification Loss)
                loss_SL = criterion(output, target)  # shape: [batch_size]
                hard_loss = loss_SL

                # Implement the new distillation algorithm
                # Compute per-sample cross-entropy losses for teacher and TA
                ce_teacher = criterion(teacher_outputs, target)  # shape: [batch_size]
                ce_ta = criterion(ta_outputs, target)  # shape: [batch_size]

                # Compute negative ce
                neg_ce_teacher = -ce_teacher
                neg_ce_ta = -ce_ta

                # Stack negative ce to compute confidence scores
                neg_ce = torch.stack([neg_ce_teacher, neg_ce_ta], dim=1)  # shape: [batch_size, 2]

                # Compute confidence scores
                conf_scores = F.softmax(neg_ce, dim=1)  # shape: [batch_size, 2]

                conf_teacher = conf_scores[:, 0]  # shape: [batch_size]
                conf_ta = conf_scores[:, 1]  # shape: [batch_size]

                # Compute softmax outputs for teacher and TA
                teacher_pred = F.softmax(teacher_outputs / temp, dim=1)  # shape: [batch_size, num_classes]
                ta_pred = F.softmax(ta_outputs / temp, dim=1)

                # Compute KL divergence between teacher and TA
                kl_teacher_ta = kl_criterion(
                    F.log_softmax(teacher_outputs / temp, dim=1),
                    ta_pred
                ).sum(dim=1)  # shape: [batch_size]

                # Compute kl_factor
                kl_factor = torch.sigmoid(kl_teacher_ta)  # shape: [batch_size]

                # Compute final weights
                w_teacher = (1 - kl_factor) * 0.5 + kl_factor * conf_teacher  # shape: [batch_size]
                w_ta = (1 - kl_factor) * 0.5 + kl_factor * conf_ta  # shape: [batch_size]

                # Compute KL divergence between student and teacher
                kl_student_teacher = kl_criterion(
                    F.log_softmax(output / temp, dim=1),
                    teacher_pred
                ).sum(dim=1)  # shape: [batch_size]

                kl_student_ta = kl_criterion(
                    F.log_softmax(output / temp, dim=1),
                    ta_pred
                ).sum(dim=1)  # shape: [batch_size]

                # Compute soft losses
                soft_loss_teacher = w_teacher * kl_student_teacher * (temp ** 2)  # shape: [batch_size]
                soft_loss_ta = w_ta * kl_student_ta * (temp ** 2)  # shape: [batch_size]

                # Compute total_loss per sample
                total_loss = alpha * (soft_loss_teacher + soft_loss_ta) + (1 - alpha) * hard_loss  # shape: [batch_size]

                # Compute loss as average over batch
                loss = total_loss.mean()

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()

            running_loss += loss.item()

            if (batch_idx+1) % 10 == 0:
                print('Epoch [%d/%d], Step [%d/%d], Loss: %.4f'
                      % (epoch+1, num_epochs, batch_idx+1, len(train_loader), running_loss/10))
                running_loss = 0.0

        # Validation
        student_model.eval()
        correct = 0
        total = 0

        with torch.no_grad():
            for batch_idx, (inputs, targets) in enumerate(test_loader):
                inputs = inputs.to(device, non_blocking=True)
                targets = targets.to(device, non_blocking=True)
                with autocast():
                    outputs = student_model(inputs)
                _, predicted = torch.max(outputs.data, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()

        acc = 100 * correct / total
        print('Test Accuracy of the student model on the test images: {:.2f} %'.format(acc))

        if acc > best_acc:
            best_acc = acc
            # Save the best model
            torch.save(student_model.state_dict(), save_path)
            print(f"Saved best model to {save_path}")

        scheduler.step()

    print('Best Accuracy: {:.2f} %'.format(best_acc))
    return best_acc

# Set up device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)

# Clear cache
torch.cuda.empty_cache()




# Load the Teacher Model (ResNet-101)
print('Loading Teacher Model (ResNet-101)')
teacher_model = create_resnet_model('resnet101', num_classes=200, pretrained=False)
teacher_model.load_state_dict(torch.load('/content/best_model.pth'))
teacher_model = teacher_model.to(device)
teacher_model.eval()

print('Loading TA Model (ResNet-34)')
ta_model = create_resnet_model('resnet34', num_classes=200, pretrained=False)
ta_model.load_state_dict(torch.load('/content/resnet_34_tf.pth'))
ta_model = ta_model.to(device)
ta_model.eval()

# train without any KD
print('Loading No KD Model (ResNet-18)')
no_kd_18 = create_resnet_model('resnet18', num_classes=200, pretrained=False)
no_kd_18 = no_kd_18.to(device)
no_kd_18_best_acc = train_model(no_kd_18, train_loader, test_loader, num_epochs=40, base_lr=0.1, device=device, save_path='no_kd_18.pth')

# train standard KD
print('Training standard KD')
standard_kd_18 = create_resnet_model('resnet18', num_classes=200, pretrained=False)
standard_kd_18 = standard_kd_18.to(device)
standard_kd_18_best_acc = train_kd(standard_kd_18, teacher_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temperature=4, alpha=0.9, device=device, save_path='standard_kd_18.pth')
print(f"TA model saved to standard_kd_18.pth with best accuracy: {standard_kd_18_best_acc:.2f}%")

# # # Train the Student Model (ResNet-18) with TA Model (Algorithm 3)
print('Training Student Model (ResNet-18) with TA Model (Algorithm 3)')
student_model_alg3 = create_resnet_model('resnet18', num_classes=200, pretrained=False)
student_model_alg3 = student_model_alg3.to(device)
student_best_acc_alg3 = train_kd(student_model_alg3, ta_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temperature=4, alpha=0.9, device=device, save_path='student_model_alg3.pth')
print(f"Student model (Algorithm 3) saved to student_model_alg3.pth with best accuracy: {student_best_acc_alg3:.2f}%")

# Train the Student Model with Both Teacher and TA Models (Simple Average, Algorithm 2)
print('Training Student Model (ResNet-18) with both Teacher and TA Models (Simple Average, Algorithm 2)')
student_model_alg2 = create_resnet_model('resnet18', num_classes=200, pretrained=False)
student_model_alg2 = student_model_alg2.to(device)
student_best_acc_alg2 = train_kd_with_ta(student_model_alg2, teacher_model, ta_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temperature=4, alpha=0.9, device=device, save_path='student_model_alg2.pth')
print(f"Student model (Algorithm 2) saved to student_model_alg2.pth with best accuracy: {student_best_acc_alg2:.2f}%")

# Train the Student Model with the New Distillation Algorithm (Algorithm 1)
print('Training Student Model (ResNet-18) with both Teacher and TA Models (New Distillation Algorithm, Algorithm 1)')
student_model_alg1 = create_resnet_model('resnet18', num_classes=200, pretrained=False)
student_model_alg1 = student_model_alg1.to(device)
student_best_acc_alg1 = train_kd_new_algorithm(student_model_alg1, teacher_model, ta_model, train_loader, test_loader, num_epochs=40, base_lr=0.1, temp=5, alpha=0.9, device=device, save_path='student_model_alg1.pth')
print(f"Student model (Algorithm 1) saved to student_model_alg1.pth with best accuracy: {student_best_acc_alg1:.2f}%")

Using device: cuda
Downloading Tiny ImageNet dataset...
Downloading https://cs231n.stanford.edu/tiny-imagenet-200.zip to ./tiny-imagenet-200.zip


100%|██████████| 248M/248M [00:10<00:00, 22.7MB/s]


Extracting Tiny ImageNet dataset...
Dataset downloaded and extracted.
Organizing validation images...


100%|██████████| 10000/10000 [00:00<00:00, 33437.75it/s]


Validation images organized.
Batch size: 512
Using device: cuda
Loading Teacher Model (ResNet-101)


  teacher_model.load_state_dict(torch.load('/content/best_model.pth'))


Loading TA Model (ResNet-34)


  ta_model.load_state_dict(torch.load('/content/resnet_34_tf.pth'))
  scaler = GradScaler()


Training Student Model (ResNet-18) with TA Model (Algorithm 3)
Adjusted learning rate: 0.2


  with autocast():
  5%|▌         | 10/196 [00:22<03:27,  1.12s/it]

Epoch [1/40], Step [10/196], Loss: 9.0277


 10%|█         | 20/196 [00:32<02:51,  1.02it/s]

Epoch [1/40], Step [20/196], Loss: 8.5376


 15%|█▌        | 30/196 [00:42<02:58,  1.07s/it]

Epoch [1/40], Step [30/196], Loss: 8.2089


 20%|██        | 40/196 [00:54<02:51,  1.10s/it]

Epoch [1/40], Step [40/196], Loss: 8.1514


 26%|██▌       | 50/196 [01:05<03:04,  1.26s/it]

Epoch [1/40], Step [50/196], Loss: 7.7703


 31%|███       | 60/196 [01:16<02:30,  1.11s/it]

Epoch [1/40], Step [60/196], Loss: 7.7817


 36%|███▌      | 70/196 [01:28<02:22,  1.13s/it]

Epoch [1/40], Step [70/196], Loss: 7.5886


 41%|████      | 80/196 [01:40<02:07,  1.10s/it]

Epoch [1/40], Step [80/196], Loss: 7.3727


 46%|████▌     | 90/196 [01:52<02:01,  1.14s/it]

Epoch [1/40], Step [90/196], Loss: 7.3008


 51%|█████     | 100/196 [02:03<01:51,  1.16s/it]

Epoch [1/40], Step [100/196], Loss: 7.2648


 56%|█████▌    | 110/196 [02:15<01:50,  1.29s/it]

Epoch [1/40], Step [110/196], Loss: 6.9170


 61%|██████    | 120/196 [02:26<01:20,  1.06s/it]

Epoch [1/40], Step [120/196], Loss: 7.0434


 66%|██████▋   | 130/196 [02:37<01:10,  1.06s/it]

Epoch [1/40], Step [130/196], Loss: 6.7128


 71%|███████▏  | 140/196 [02:49<01:00,  1.07s/it]

Epoch [1/40], Step [140/196], Loss: 6.6959


 77%|███████▋  | 150/196 [03:01<00:51,  1.11s/it]

Epoch [1/40], Step [150/196], Loss: 6.7124


 82%|████████▏ | 160/196 [03:12<00:43,  1.20s/it]

Epoch [1/40], Step [160/196], Loss: 6.5790


 87%|████████▋ | 170/196 [03:24<00:34,  1.33s/it]

Epoch [1/40], Step [170/196], Loss: 6.3894


 92%|█████████▏| 180/196 [03:35<00:17,  1.09s/it]

Epoch [1/40], Step [180/196], Loss: 6.3935


 97%|█████████▋| 190/196 [03:48<00:06,  1.10s/it]

Epoch [1/40], Step [190/196], Loss: 6.3294


100%|██████████| 196/196 [03:53<00:00,  1.19s/it]
  with autocast():


Test Accuracy of the student model on the test images: 8.53 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:25<08:41,  2.81s/it]

Epoch [2/40], Step [10/196], Loss: 6.4055


 10%|█         | 20/196 [00:38<03:40,  1.25s/it]

Epoch [2/40], Step [20/196], Loss: 6.1244


 15%|█▌        | 30/196 [00:53<03:48,  1.38s/it]

Epoch [2/40], Step [30/196], Loss: 6.0068


 20%|██        | 40/196 [01:06<03:10,  1.22s/it]

Epoch [2/40], Step [40/196], Loss: 5.9087


 26%|██▌       | 50/196 [01:21<03:12,  1.32s/it]

Epoch [2/40], Step [50/196], Loss: 5.8475


 31%|███       | 60/196 [01:34<02:40,  1.18s/it]

Epoch [2/40], Step [60/196], Loss: 5.9131


 36%|███▌      | 70/196 [01:49<02:39,  1.27s/it]

Epoch [2/40], Step [70/196], Loss: 5.8781


 41%|████      | 80/196 [02:03<02:29,  1.29s/it]

Epoch [2/40], Step [80/196], Loss: 5.7488


 46%|████▌     | 90/196 [02:16<02:07,  1.20s/it]

Epoch [2/40], Step [90/196], Loss: 5.6561


 51%|█████     | 100/196 [02:30<02:07,  1.33s/it]

Epoch [2/40], Step [100/196], Loss: 5.3765


 56%|█████▌    | 110/196 [02:44<01:44,  1.21s/it]

Epoch [2/40], Step [110/196], Loss: 5.5598


 61%|██████    | 120/196 [02:59<01:48,  1.42s/it]

Epoch [2/40], Step [120/196], Loss: 5.4630


 66%|██████▋   | 130/196 [03:12<01:22,  1.26s/it]

Epoch [2/40], Step [130/196], Loss: 5.3986


 71%|███████▏  | 140/196 [03:27<01:23,  1.49s/it]

Epoch [2/40], Step [140/196], Loss: 5.2077


 77%|███████▋  | 150/196 [03:40<00:58,  1.27s/it]

Epoch [2/40], Step [150/196], Loss: 5.2773


 82%|████████▏ | 160/196 [03:55<00:59,  1.66s/it]

Epoch [2/40], Step [160/196], Loss: 5.2209


 87%|████████▋ | 170/196 [04:07<00:32,  1.26s/it]

Epoch [2/40], Step [170/196], Loss: 5.1460


 92%|█████████▏| 180/196 [04:22<00:26,  1.65s/it]

Epoch [2/40], Step [180/196], Loss: 5.2034


 97%|█████████▋| 190/196 [04:35<00:07,  1.24s/it]

Epoch [2/40], Step [190/196], Loss: 5.0593


100%|██████████| 196/196 [04:42<00:00,  1.44s/it]


Test Accuracy of the student model on the test images: 16.36 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:18<04:43,  1.53s/it]

Epoch [3/40], Step [10/196], Loss: 5.0736


 10%|█         | 20/196 [00:32<04:17,  1.46s/it]

Epoch [3/40], Step [20/196], Loss: 4.8246


 15%|█▌        | 30/196 [00:48<04:59,  1.80s/it]

Epoch [3/40], Step [30/196], Loss: 4.8716


 20%|██        | 40/196 [01:02<03:38,  1.40s/it]

Epoch [3/40], Step [40/196], Loss: 4.9154


 26%|██▌       | 50/196 [01:16<04:30,  1.85s/it]

Epoch [3/40], Step [50/196], Loss: 4.9024


 31%|███       | 60/196 [01:32<03:31,  1.56s/it]

Epoch [3/40], Step [60/196], Loss: 4.7149


 36%|███▌      | 70/196 [01:46<03:22,  1.61s/it]

Epoch [3/40], Step [70/196], Loss: 4.6588


 41%|████      | 80/196 [01:59<02:43,  1.41s/it]

Epoch [3/40], Step [80/196], Loss: 4.6066


 46%|████▌     | 90/196 [02:15<02:33,  1.45s/it]

Epoch [3/40], Step [90/196], Loss: 4.7015


 51%|█████     | 100/196 [02:31<02:06,  1.32s/it]

Epoch [3/40], Step [100/196], Loss: 4.6182


 56%|█████▌    | 110/196 [02:45<02:06,  1.47s/it]

Epoch [3/40], Step [110/196], Loss: 4.4713


 61%|██████    | 120/196 [02:58<01:35,  1.26s/it]

Epoch [3/40], Step [120/196], Loss: 4.5074


 66%|██████▋   | 130/196 [03:12<01:35,  1.45s/it]

Epoch [3/40], Step [130/196], Loss: 4.5803


 71%|███████▏  | 140/196 [03:26<01:10,  1.27s/it]

Epoch [3/40], Step [140/196], Loss: 4.4471


 77%|███████▋  | 150/196 [03:40<01:06,  1.45s/it]

Epoch [3/40], Step [150/196], Loss: 4.4604


 82%|████████▏ | 160/196 [03:53<00:45,  1.25s/it]

Epoch [3/40], Step [160/196], Loss: 4.2885


 87%|████████▋ | 170/196 [04:08<00:38,  1.48s/it]

Epoch [3/40], Step [170/196], Loss: 4.2670


 92%|█████████▏| 180/196 [04:21<00:20,  1.25s/it]

Epoch [3/40], Step [180/196], Loss: 4.2692


 97%|█████████▋| 190/196 [04:35<00:07,  1.29s/it]

Epoch [3/40], Step [190/196], Loss: 4.4053


100%|██████████| 196/196 [04:41<00:00,  1.43s/it]


Test Accuracy of the student model on the test images: 21.83 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:21<04:16,  1.38s/it]

Epoch [4/40], Step [10/196], Loss: 4.1855


 10%|█         | 20/196 [00:34<03:53,  1.32s/it]

Epoch [4/40], Step [20/196], Loss: 4.1126


 15%|█▌        | 30/196 [00:49<04:03,  1.47s/it]

Epoch [4/40], Step [30/196], Loss: 4.0839


 20%|██        | 40/196 [01:02<03:19,  1.28s/it]

Epoch [4/40], Step [40/196], Loss: 4.0200


 26%|██▌       | 50/196 [01:17<03:28,  1.43s/it]

Epoch [4/40], Step [50/196], Loss: 4.0108


 31%|███       | 60/196 [01:30<02:53,  1.27s/it]

Epoch [4/40], Step [60/196], Loss: 3.8488


 36%|███▌      | 70/196 [01:44<02:50,  1.35s/it]

Epoch [4/40], Step [70/196], Loss: 3.8796


 41%|████      | 80/196 [01:57<02:26,  1.27s/it]

Epoch [4/40], Step [80/196], Loss: 3.8647


 46%|████▌     | 90/196 [02:12<02:30,  1.42s/it]

Epoch [4/40], Step [90/196], Loss: 3.8965


 51%|█████     | 100/196 [02:25<02:00,  1.26s/it]

Epoch [4/40], Step [100/196], Loss: 3.8138


 56%|█████▌    | 110/196 [02:39<01:52,  1.31s/it]

Epoch [4/40], Step [110/196], Loss: 3.8333


 61%|██████    | 120/196 [02:52<01:35,  1.26s/it]

Epoch [4/40], Step [120/196], Loss: 3.8348


 66%|██████▋   | 130/196 [03:06<01:27,  1.33s/it]

Epoch [4/40], Step [130/196], Loss: 3.7821


 71%|███████▏  | 140/196 [03:19<01:11,  1.27s/it]

Epoch [4/40], Step [140/196], Loss: 3.7711


 77%|███████▋  | 150/196 [03:34<01:02,  1.36s/it]

Epoch [4/40], Step [150/196], Loss: 3.8017


 82%|████████▏ | 160/196 [03:47<00:45,  1.25s/it]

Epoch [4/40], Step [160/196], Loss: 3.6852


 87%|████████▋ | 170/196 [04:01<00:34,  1.31s/it]

Epoch [4/40], Step [170/196], Loss: 3.7150


 92%|█████████▏| 180/196 [04:14<00:19,  1.23s/it]

Epoch [4/40], Step [180/196], Loss: 3.6711


 97%|█████████▋| 190/196 [04:28<00:07,  1.31s/it]

Epoch [4/40], Step [190/196], Loss: 3.4883


100%|██████████| 196/196 [04:34<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 27.75 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:21<05:12,  1.68s/it]

Epoch [5/40], Step [10/196], Loss: 3.5496


 10%|█         | 20/196 [00:33<03:54,  1.33s/it]

Epoch [5/40], Step [20/196], Loss: 3.4140


 15%|█▌        | 30/196 [00:47<04:21,  1.57s/it]

Epoch [5/40], Step [30/196], Loss: 3.3866


 20%|██        | 40/196 [01:00<03:23,  1.30s/it]

Epoch [5/40], Step [40/196], Loss: 3.4075


 26%|██▌       | 50/196 [01:15<04:12,  1.73s/it]

Epoch [5/40], Step [50/196], Loss: 3.4284


 31%|███       | 60/196 [01:26<02:46,  1.23s/it]

Epoch [5/40], Step [60/196], Loss: 3.3747


 36%|███▌      | 70/196 [01:41<03:19,  1.58s/it]

Epoch [5/40], Step [70/196], Loss: 3.4052


 41%|████      | 80/196 [01:52<02:16,  1.18s/it]

Epoch [5/40], Step [80/196], Loss: 3.3712


 46%|████▌     | 90/196 [02:07<02:57,  1.68s/it]

Epoch [5/40], Step [90/196], Loss: 3.3122


 51%|█████     | 100/196 [02:19<01:55,  1.21s/it]

Epoch [5/40], Step [100/196], Loss: 3.2492


 56%|█████▌    | 110/196 [02:34<02:08,  1.49s/it]

Epoch [5/40], Step [110/196], Loss: 3.2260


 61%|██████    | 120/196 [02:46<01:27,  1.15s/it]

Epoch [5/40], Step [120/196], Loss: 3.2057


 66%|██████▋   | 130/196 [03:00<01:32,  1.39s/it]

Epoch [5/40], Step [130/196], Loss: 3.2241


 71%|███████▏  | 140/196 [03:13<01:08,  1.22s/it]

Epoch [5/40], Step [140/196], Loss: 3.2792


 77%|███████▋  | 150/196 [03:26<00:59,  1.29s/it]

Epoch [5/40], Step [150/196], Loss: 3.2176


 82%|████████▏ | 160/196 [03:40<00:43,  1.20s/it]

Epoch [5/40], Step [160/196], Loss: 3.2565


 87%|████████▋ | 170/196 [03:53<00:32,  1.26s/it]

Epoch [5/40], Step [170/196], Loss: 3.2031


 92%|█████████▏| 180/196 [04:06<00:18,  1.16s/it]

Epoch [5/40], Step [180/196], Loss: 3.1469


 97%|█████████▋| 190/196 [04:19<00:07,  1.19s/it]

Epoch [5/40], Step [190/196], Loss: 3.1030


100%|██████████| 196/196 [04:26<00:00,  1.36s/it]


Test Accuracy of the student model on the test images: 35.61 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:17<04:50,  1.56s/it]

Epoch [6/40], Step [10/196], Loss: 3.1280


 10%|█         | 20/196 [00:32<04:03,  1.38s/it]

Epoch [6/40], Step [20/196], Loss: 2.9977


 15%|█▌        | 30/196 [00:46<03:59,  1.44s/it]

Epoch [6/40], Step [30/196], Loss: 2.9604


 20%|██        | 40/196 [01:00<03:25,  1.32s/it]

Epoch [6/40], Step [40/196], Loss: 2.9590


 26%|██▌       | 50/196 [01:14<03:35,  1.47s/it]

Epoch [6/40], Step [50/196], Loss: 2.9130


 31%|███       | 60/196 [01:28<02:56,  1.30s/it]

Epoch [6/40], Step [60/196], Loss: 2.9087


 36%|███▌      | 70/196 [01:42<02:52,  1.37s/it]

Epoch [6/40], Step [70/196], Loss: 2.8714


 41%|████      | 80/196 [01:55<02:31,  1.31s/it]

Epoch [6/40], Step [80/196], Loss: 2.9555


 46%|████▌     | 90/196 [02:09<02:20,  1.33s/it]

Epoch [6/40], Step [90/196], Loss: 2.9527


 51%|█████     | 100/196 [02:23<02:05,  1.31s/it]

Epoch [6/40], Step [100/196], Loss: 2.9441


 56%|█████▌    | 110/196 [02:36<01:50,  1.29s/it]

Epoch [6/40], Step [110/196], Loss: 2.9440


 61%|██████    | 120/196 [02:50<01:35,  1.25s/it]

Epoch [6/40], Step [120/196], Loss: 2.9794


 66%|██████▋   | 130/196 [03:04<01:29,  1.36s/it]

Epoch [6/40], Step [130/196], Loss: 2.8735


 71%|███████▏  | 140/196 [03:18<01:11,  1.27s/it]

Epoch [6/40], Step [140/196], Loss: 2.9224


 77%|███████▋  | 150/196 [03:32<01:01,  1.35s/it]

Epoch [6/40], Step [150/196], Loss: 2.8365


 82%|████████▏ | 160/196 [03:45<00:46,  1.28s/it]

Epoch [6/40], Step [160/196], Loss: 2.8605


 87%|████████▋ | 170/196 [04:00<00:35,  1.38s/it]

Epoch [6/40], Step [170/196], Loss: 2.8723


 92%|█████████▏| 180/196 [04:13<00:20,  1.26s/it]

Epoch [6/40], Step [180/196], Loss: 2.7453


 97%|█████████▋| 190/196 [04:28<00:08,  1.34s/it]

Epoch [6/40], Step [190/196], Loss: 2.7716


100%|██████████| 196/196 [04:34<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 39.83 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:27<07:55,  2.56s/it]

Epoch [7/40], Step [10/196], Loss: 2.7254


 10%|█         | 20/196 [00:39<03:44,  1.28s/it]

Epoch [7/40], Step [20/196], Loss: 2.7385


 15%|█▌        | 30/196 [00:53<03:50,  1.39s/it]

Epoch [7/40], Step [30/196], Loss: 2.6474


 20%|██        | 40/196 [01:07<03:15,  1.25s/it]

Epoch [7/40], Step [40/196], Loss: 2.5986


 26%|██▌       | 50/196 [01:22<03:25,  1.41s/it]

Epoch [7/40], Step [50/196], Loss: 2.6691


 31%|███       | 60/196 [01:35<02:42,  1.20s/it]

Epoch [7/40], Step [60/196], Loss: 2.6091


 36%|███▌      | 70/196 [01:50<02:53,  1.38s/it]

Epoch [7/40], Step [70/196], Loss: 2.6527


 41%|████      | 80/196 [02:03<02:24,  1.25s/it]

Epoch [7/40], Step [80/196], Loss: 2.5911


 46%|████▌     | 90/196 [02:18<02:20,  1.33s/it]

Epoch [7/40], Step [90/196], Loss: 2.6119


 51%|█████     | 100/196 [02:31<01:56,  1.21s/it]

Epoch [7/40], Step [100/196], Loss: 2.6165


 56%|█████▌    | 110/196 [02:44<01:50,  1.28s/it]

Epoch [7/40], Step [110/196], Loss: 2.6198


 61%|██████    | 120/196 [02:58<01:32,  1.21s/it]

Epoch [7/40], Step [120/196], Loss: 2.6519


 66%|██████▋   | 130/196 [03:13<01:36,  1.47s/it]

Epoch [7/40], Step [130/196], Loss: 2.6257


 71%|███████▏  | 140/196 [03:26<01:06,  1.20s/it]

Epoch [7/40], Step [140/196], Loss: 2.6395


 77%|███████▋  | 150/196 [03:39<00:57,  1.25s/it]

Epoch [7/40], Step [150/196], Loss: 2.6480


 82%|████████▏ | 160/196 [03:53<00:42,  1.18s/it]

Epoch [7/40], Step [160/196], Loss: 2.6089


 87%|████████▋ | 170/196 [04:06<00:32,  1.24s/it]

Epoch [7/40], Step [170/196], Loss: 2.6072


 92%|█████████▏| 180/196 [04:19<00:18,  1.19s/it]

Epoch [7/40], Step [180/196], Loss: 2.6591


 97%|█████████▋| 190/196 [04:34<00:07,  1.31s/it]

Epoch [7/40], Step [190/196], Loss: 2.6132


100%|██████████| 196/196 [04:40<00:00,  1.43s/it]


Test Accuracy of the student model on the test images: 39.92 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:21<05:18,  1.71s/it]

Epoch [8/40], Step [10/196], Loss: 2.5180


 10%|█         | 20/196 [00:34<03:41,  1.26s/it]

Epoch [8/40], Step [20/196], Loss: 2.4070


 15%|█▌        | 30/196 [00:48<03:27,  1.25s/it]

Epoch [8/40], Step [30/196], Loss: 2.4915


 20%|██        | 40/196 [01:02<03:07,  1.20s/it]

Epoch [8/40], Step [40/196], Loss: 2.4178


 26%|██▌       | 50/196 [01:16<03:08,  1.29s/it]

Epoch [8/40], Step [50/196], Loss: 2.4550


 31%|███       | 60/196 [01:29<02:40,  1.18s/it]

Epoch [8/40], Step [60/196], Loss: 2.4099


 36%|███▌      | 70/196 [01:43<02:46,  1.32s/it]

Epoch [8/40], Step [70/196], Loss: 2.4858


 41%|████      | 80/196 [01:57<02:18,  1.20s/it]

Epoch [8/40], Step [80/196], Loss: 2.3670


 46%|████▌     | 90/196 [02:12<02:34,  1.46s/it]

Epoch [8/40], Step [90/196], Loss: 2.3900


 51%|█████     | 100/196 [02:24<01:51,  1.17s/it]

Epoch [8/40], Step [100/196], Loss: 2.4704


 56%|█████▌    | 110/196 [02:39<01:45,  1.23s/it]

Epoch [8/40], Step [110/196], Loss: 2.3892


 61%|██████    | 120/196 [02:52<01:30,  1.19s/it]

Epoch [8/40], Step [120/196], Loss: 2.3936


 66%|██████▋   | 130/196 [03:06<01:29,  1.35s/it]

Epoch [8/40], Step [130/196], Loss: 2.4429


 71%|███████▏  | 140/196 [03:20<01:07,  1.21s/it]

Epoch [8/40], Step [140/196], Loss: 2.3850


 77%|███████▋  | 150/196 [03:34<01:00,  1.31s/it]

Epoch [8/40], Step [150/196], Loss: 2.3919


 82%|████████▏ | 160/196 [03:47<00:43,  1.20s/it]

Epoch [8/40], Step [160/196], Loss: 2.3633


 87%|████████▋ | 170/196 [04:01<00:35,  1.37s/it]

Epoch [8/40], Step [170/196], Loss: 2.4369


 92%|█████████▏| 180/196 [04:15<00:20,  1.30s/it]

Epoch [8/40], Step [180/196], Loss: 2.4179


 97%|█████████▋| 190/196 [04:30<00:09,  1.62s/it]

Epoch [8/40], Step [190/196], Loss: 2.4222


100%|██████████| 196/196 [04:36<00:00,  1.41s/it]


Test Accuracy of the student model on the test images: 40.11 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:23<06:05,  1.97s/it]

Epoch [9/40], Step [10/196], Loss: 2.3593


 10%|█         | 20/196 [00:40<04:32,  1.55s/it]

Epoch [9/40], Step [20/196], Loss: 2.2789


 15%|█▌        | 30/196 [00:55<04:42,  1.70s/it]

Epoch [9/40], Step [30/196], Loss: 2.2568


 20%|██        | 40/196 [01:08<03:23,  1.30s/it]

Epoch [9/40], Step [40/196], Loss: 2.2420


 26%|██▌       | 50/196 [01:24<04:21,  1.79s/it]

Epoch [9/40], Step [50/196], Loss: 2.2363


 31%|███       | 60/196 [01:43<03:43,  1.64s/it]

Epoch [9/40], Step [60/196], Loss: 2.2368


 36%|███▌      | 70/196 [01:57<02:57,  1.41s/it]

Epoch [9/40], Step [70/196], Loss: 2.2256


 41%|████      | 80/196 [02:11<02:18,  1.20s/it]

Epoch [9/40], Step [80/196], Loss: 2.2484


 46%|████▌     | 90/196 [02:25<02:18,  1.31s/it]

Epoch [9/40], Step [90/196], Loss: 2.2278


 51%|█████     | 100/196 [02:38<01:58,  1.23s/it]

Epoch [9/40], Step [100/196], Loss: 2.2668


 56%|█████▌    | 110/196 [02:53<02:03,  1.44s/it]

Epoch [9/40], Step [110/196], Loss: 2.1812


 61%|██████    | 120/196 [03:06<01:34,  1.24s/it]

Epoch [9/40], Step [120/196], Loss: 2.2283


 66%|██████▋   | 130/196 [03:20<01:33,  1.41s/it]

Epoch [9/40], Step [130/196], Loss: 2.2146


 71%|███████▏  | 140/196 [03:33<01:03,  1.14s/it]

Epoch [9/40], Step [140/196], Loss: 2.2884


 77%|███████▋  | 150/196 [03:47<01:04,  1.39s/it]

Epoch [9/40], Step [150/196], Loss: 2.2399


 82%|████████▏ | 160/196 [04:00<00:45,  1.26s/it]

Epoch [9/40], Step [160/196], Loss: 2.2218


 87%|████████▋ | 170/196 [04:14<00:32,  1.26s/it]

Epoch [9/40], Step [170/196], Loss: 2.2586


 92%|█████████▏| 180/196 [04:27<00:19,  1.21s/it]

Epoch [9/40], Step [180/196], Loss: 2.2644


 97%|█████████▋| 190/196 [04:41<00:07,  1.23s/it]

Epoch [9/40], Step [190/196], Loss: 2.2476


100%|██████████| 196/196 [04:48<00:00,  1.47s/it]


Test Accuracy of the student model on the test images: 40.34 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:22<04:09,  1.34s/it]

Epoch [10/40], Step [10/196], Loss: 2.1617


 10%|█         | 20/196 [00:35<03:41,  1.26s/it]

Epoch [10/40], Step [20/196], Loss: 2.1212


 15%|█▌        | 30/196 [00:50<03:41,  1.33s/it]

Epoch [10/40], Step [30/196], Loss: 2.1104


 20%|██        | 40/196 [01:04<03:32,  1.36s/it]

Epoch [10/40], Step [40/196], Loss: 2.0793


 26%|██▌       | 50/196 [01:18<02:59,  1.23s/it]

Epoch [10/40], Step [50/196], Loss: 2.1080


 31%|███       | 60/196 [01:31<02:42,  1.20s/it]

Epoch [10/40], Step [60/196], Loss: 2.0705


 36%|███▌      | 70/196 [01:45<02:48,  1.34s/it]

Epoch [10/40], Step [70/196], Loss: 2.0883


 41%|████      | 80/196 [01:59<02:19,  1.20s/it]

Epoch [10/40], Step [80/196], Loss: 2.1048


 46%|████▌     | 90/196 [02:13<02:26,  1.38s/it]

Epoch [10/40], Step [90/196], Loss: 2.0729


 51%|█████     | 100/196 [02:26<01:52,  1.17s/it]

Epoch [10/40], Step [100/196], Loss: 2.1274


 56%|█████▌    | 110/196 [02:40<01:52,  1.30s/it]

Epoch [10/40], Step [110/196], Loss: 2.1292


 61%|██████    | 120/196 [02:53<01:28,  1.17s/it]

Epoch [10/40], Step [120/196], Loss: 2.1431


 66%|██████▋   | 130/196 [03:07<01:25,  1.29s/it]

Epoch [10/40], Step [130/196], Loss: 2.0901


 71%|███████▏  | 140/196 [03:20<01:10,  1.25s/it]

Epoch [10/40], Step [140/196], Loss: 2.0621


 77%|███████▋  | 150/196 [03:34<00:58,  1.26s/it]

Epoch [10/40], Step [150/196], Loss: 2.1241


 82%|████████▏ | 160/196 [03:48<00:45,  1.27s/it]

Epoch [10/40], Step [160/196], Loss: 2.0410


 87%|████████▋ | 170/196 [04:02<00:32,  1.25s/it]

Epoch [10/40], Step [170/196], Loss: 2.0730


 92%|█████████▏| 180/196 [04:16<00:19,  1.19s/it]

Epoch [10/40], Step [180/196], Loss: 2.0728


 97%|█████████▋| 190/196 [04:30<00:07,  1.26s/it]

Epoch [10/40], Step [190/196], Loss: 2.0695


100%|██████████| 196/196 [04:37<00:00,  1.42s/it]


Test Accuracy of the student model on the test images: 41.64 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:18<04:13,  1.36s/it]

Epoch [11/40], Step [10/196], Loss: 2.0094


 10%|█         | 20/196 [00:32<03:46,  1.29s/it]

Epoch [11/40], Step [20/196], Loss: 1.9855


 15%|█▌        | 30/196 [00:48<03:26,  1.25s/it]

Epoch [11/40], Step [30/196], Loss: 1.9256


 20%|██        | 40/196 [01:01<03:06,  1.20s/it]

Epoch [11/40], Step [40/196], Loss: 1.9501


 26%|██▌       | 50/196 [01:15<03:03,  1.26s/it]

Epoch [11/40], Step [50/196], Loss: 1.9338


 31%|███       | 60/196 [01:28<02:44,  1.21s/it]

Epoch [11/40], Step [60/196], Loss: 1.9611


 36%|███▌      | 70/196 [01:43<02:43,  1.30s/it]

Epoch [11/40], Step [70/196], Loss: 2.0198


 41%|████      | 80/196 [01:56<02:19,  1.20s/it]

Epoch [11/40], Step [80/196], Loss: 2.0166


 46%|████▌     | 90/196 [02:09<02:03,  1.17s/it]

Epoch [11/40], Step [90/196], Loss: 1.9553


 51%|█████     | 100/196 [02:23<01:53,  1.18s/it]

Epoch [11/40], Step [100/196], Loss: 1.9657


 56%|█████▌    | 110/196 [02:37<01:53,  1.32s/it]

Epoch [11/40], Step [110/196], Loss: 1.9514


 61%|██████    | 120/196 [02:51<01:31,  1.20s/it]

Epoch [11/40], Step [120/196], Loss: 1.9750


 66%|██████▋   | 130/196 [03:04<01:31,  1.39s/it]

Epoch [11/40], Step [130/196], Loss: 1.9763


 71%|███████▏  | 140/196 [03:17<01:07,  1.21s/it]

Epoch [11/40], Step [140/196], Loss: 1.9811


 77%|███████▋  | 150/196 [03:32<00:59,  1.30s/it]

Epoch [11/40], Step [150/196], Loss: 1.9974


 82%|████████▏ | 160/196 [03:45<00:44,  1.24s/it]

Epoch [11/40], Step [160/196], Loss: 2.0225


 87%|████████▋ | 170/196 [03:59<00:32,  1.24s/it]

Epoch [11/40], Step [170/196], Loss: 1.9255


 92%|█████████▏| 180/196 [04:12<00:19,  1.19s/it]

Epoch [11/40], Step [180/196], Loss: 1.9758


 97%|█████████▋| 190/196 [04:27<00:08,  1.36s/it]

Epoch [11/40], Step [190/196], Loss: 1.9651


100%|██████████| 196/196 [04:32<00:00,  1.39s/it]


Test Accuracy of the student model on the test images: 45.87 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:18<04:10,  1.35s/it]

Epoch [12/40], Step [10/196], Loss: 1.8475


 10%|█         | 20/196 [00:32<03:32,  1.20s/it]

Epoch [12/40], Step [20/196], Loss: 1.8532


 15%|█▌        | 30/196 [00:46<03:33,  1.29s/it]

Epoch [12/40], Step [30/196], Loss: 1.8511


 20%|██        | 40/196 [00:59<03:06,  1.20s/it]

Epoch [12/40], Step [40/196], Loss: 1.7867


 26%|██▌       | 50/196 [01:13<03:05,  1.27s/it]

Epoch [12/40], Step [50/196], Loss: 1.8291


 31%|███       | 60/196 [01:27<02:40,  1.18s/it]

Epoch [12/40], Step [60/196], Loss: 1.8365


 36%|███▌      | 70/196 [01:40<02:44,  1.31s/it]

Epoch [12/40], Step [70/196], Loss: 1.8500


 41%|████      | 80/196 [01:53<02:17,  1.18s/it]

Epoch [12/40], Step [80/196], Loss: 1.8711


 46%|████▌     | 90/196 [02:08<02:17,  1.30s/it]

Epoch [12/40], Step [90/196], Loss: 1.8766


 51%|█████     | 100/196 [02:21<01:57,  1.22s/it]

Epoch [12/40], Step [100/196], Loss: 1.8512


 56%|█████▌    | 110/196 [02:36<01:57,  1.37s/it]

Epoch [12/40], Step [110/196], Loss: 1.8856


 61%|██████    | 120/196 [02:49<01:30,  1.19s/it]

Epoch [12/40], Step [120/196], Loss: 1.8316


 66%|██████▋   | 130/196 [03:03<01:31,  1.39s/it]

Epoch [12/40], Step [130/196], Loss: 1.8821


 71%|███████▏  | 140/196 [03:16<01:06,  1.19s/it]

Epoch [12/40], Step [140/196], Loss: 1.8702


 77%|███████▋  | 150/196 [03:31<01:04,  1.41s/it]

Epoch [12/40], Step [150/196], Loss: 1.8909


 82%|████████▏ | 160/196 [03:44<00:43,  1.19s/it]

Epoch [12/40], Step [160/196], Loss: 1.9198


 87%|████████▋ | 170/196 [03:58<00:36,  1.40s/it]

Epoch [12/40], Step [170/196], Loss: 1.8890


 92%|█████████▏| 180/196 [04:11<00:18,  1.18s/it]

Epoch [12/40], Step [180/196], Loss: 1.8866


 97%|█████████▋| 190/196 [04:26<00:08,  1.50s/it]

Epoch [12/40], Step [190/196], Loss: 1.9215


100%|██████████| 196/196 [04:32<00:00,  1.39s/it]


Test Accuracy of the student model on the test images: 46.74 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:20<05:58,  1.93s/it]

Epoch [13/40], Step [10/196], Loss: 1.8628


 10%|█         | 20/196 [00:35<04:11,  1.43s/it]

Epoch [13/40], Step [20/196], Loss: 1.7993


 15%|█▌        | 30/196 [00:57<04:39,  1.68s/it]

Epoch [13/40], Step [30/196], Loss: 1.7171


 20%|██        | 40/196 [01:10<03:17,  1.27s/it]

Epoch [13/40], Step [40/196], Loss: 1.6984


 26%|██▌       | 50/196 [01:24<03:19,  1.37s/it]

Epoch [13/40], Step [50/196], Loss: 1.7079


 31%|███       | 60/196 [01:37<02:52,  1.27s/it]

Epoch [13/40], Step [60/196], Loss: 1.7586


 36%|███▌      | 70/196 [01:51<03:04,  1.47s/it]

Epoch [13/40], Step [70/196], Loss: 1.7694


 41%|████      | 80/196 [02:03<02:20,  1.21s/it]

Epoch [13/40], Step [80/196], Loss: 1.7418


 46%|████▌     | 90/196 [02:18<02:42,  1.53s/it]

Epoch [13/40], Step [90/196], Loss: 1.7284


 51%|█████     | 100/196 [02:31<02:03,  1.29s/it]

Epoch [13/40], Step [100/196], Loss: 1.6939


 56%|█████▌    | 110/196 [02:46<01:47,  1.25s/it]

Epoch [13/40], Step [110/196], Loss: 1.7754


 61%|██████    | 120/196 [02:59<01:36,  1.27s/it]

Epoch [13/40], Step [120/196], Loss: 1.7285


 66%|██████▋   | 130/196 [03:13<01:38,  1.50s/it]

Epoch [13/40], Step [130/196], Loss: 1.7976


 71%|███████▏  | 140/196 [03:26<01:12,  1.29s/it]

Epoch [13/40], Step [140/196], Loss: 1.8036


 77%|███████▋  | 150/196 [03:39<01:03,  1.39s/it]

Epoch [13/40], Step [150/196], Loss: 1.7901


 82%|████████▏ | 160/196 [03:52<00:45,  1.26s/it]

Epoch [13/40], Step [160/196], Loss: 1.7712


 87%|████████▋ | 170/196 [04:07<00:35,  1.38s/it]

Epoch [13/40], Step [170/196], Loss: 1.8271


 92%|█████████▏| 180/196 [04:20<00:20,  1.29s/it]

Epoch [13/40], Step [180/196], Loss: 1.7937


 97%|█████████▋| 190/196 [04:34<00:08,  1.45s/it]

Epoch [13/40], Step [190/196], Loss: 1.8283


100%|██████████| 196/196 [04:40<00:00,  1.43s/it]


Test Accuracy of the student model on the test images: 45.39 %


  5%|▌         | 10/196 [00:24<06:10,  1.99s/it]

Epoch [14/40], Step [10/196], Loss: 1.7805


 10%|█         | 20/196 [00:36<03:38,  1.24s/it]

Epoch [14/40], Step [20/196], Loss: 1.6958


 15%|█▌        | 30/196 [00:50<04:13,  1.52s/it]

Epoch [14/40], Step [30/196], Loss: 1.6393


 20%|██        | 40/196 [01:01<02:58,  1.14s/it]

Epoch [14/40], Step [40/196], Loss: 1.6637


 26%|██▌       | 50/196 [01:15<03:24,  1.40s/it]

Epoch [14/40], Step [50/196], Loss: 1.5994


 31%|███       | 60/196 [01:27<02:28,  1.09s/it]

Epoch [14/40], Step [60/196], Loss: 1.6710


 36%|███▌      | 70/196 [01:41<02:55,  1.39s/it]

Epoch [14/40], Step [70/196], Loss: 1.6135


 41%|████      | 80/196 [01:53<02:10,  1.12s/it]

Epoch [14/40], Step [80/196], Loss: 1.6501


 46%|████▌     | 90/196 [02:07<02:11,  1.24s/it]

Epoch [14/40], Step [90/196], Loss: 1.6540


 51%|█████     | 100/196 [02:21<02:00,  1.26s/it]

Epoch [14/40], Step [100/196], Loss: 1.6188


 56%|█████▌    | 110/196 [02:37<02:10,  1.51s/it]

Epoch [14/40], Step [110/196], Loss: 1.6337


 61%|██████    | 120/196 [02:49<01:25,  1.12s/it]

Epoch [14/40], Step [120/196], Loss: 1.6539


 66%|██████▋   | 130/196 [03:03<01:31,  1.39s/it]

Epoch [14/40], Step [130/196], Loss: 1.6774


 71%|███████▏  | 140/196 [03:16<01:04,  1.16s/it]

Epoch [14/40], Step [140/196], Loss: 1.7318


 77%|███████▋  | 150/196 [03:32<01:10,  1.52s/it]

Epoch [14/40], Step [150/196], Loss: 1.7089


 82%|████████▏ | 160/196 [03:44<00:42,  1.18s/it]

Epoch [14/40], Step [160/196], Loss: 1.7042


 87%|████████▋ | 170/196 [03:59<00:38,  1.48s/it]

Epoch [14/40], Step [170/196], Loss: 1.7347


 92%|█████████▏| 180/196 [04:11<00:19,  1.20s/it]

Epoch [14/40], Step [180/196], Loss: 1.7331


 97%|█████████▋| 190/196 [04:26<00:08,  1.48s/it]

Epoch [14/40], Step [190/196], Loss: 1.6709


100%|██████████| 196/196 [04:32<00:00,  1.39s/it]


Test Accuracy of the student model on the test images: 33.78 %


  5%|▌         | 10/196 [00:18<04:15,  1.37s/it]

Epoch [15/40], Step [10/196], Loss: 1.6910


 10%|█         | 20/196 [00:31<03:25,  1.17s/it]

Epoch [15/40], Step [20/196], Loss: 1.6112


 15%|█▌        | 30/196 [00:43<03:11,  1.15s/it]

Epoch [15/40], Step [30/196], Loss: 1.5971


 20%|██        | 40/196 [00:56<03:10,  1.22s/it]

Epoch [15/40], Step [40/196], Loss: 1.5633


 26%|██▌       | 50/196 [01:09<02:58,  1.22s/it]

Epoch [15/40], Step [50/196], Loss: 1.5533


 31%|███       | 60/196 [01:22<02:46,  1.23s/it]

Epoch [15/40], Step [60/196], Loss: 1.5455


 36%|███▌      | 70/196 [01:34<02:49,  1.35s/it]

Epoch [15/40], Step [70/196], Loss: 1.5919


 41%|████      | 80/196 [01:47<02:21,  1.22s/it]

Epoch [15/40], Step [80/196], Loss: 1.5562


 46%|████▌     | 90/196 [02:00<02:29,  1.41s/it]

Epoch [15/40], Step [90/196], Loss: 1.5642


 51%|█████     | 100/196 [02:11<01:44,  1.09s/it]

Epoch [15/40], Step [100/196], Loss: 1.6050


 56%|█████▌    | 110/196 [02:24<01:37,  1.13s/it]

Epoch [15/40], Step [110/196], Loss: 1.6173


 61%|██████    | 120/196 [02:35<01:23,  1.10s/it]

Epoch [15/40], Step [120/196], Loss: 1.5748


 66%|██████▋   | 130/196 [02:48<01:19,  1.20s/it]

Epoch [15/40], Step [130/196], Loss: 1.5872


 71%|███████▏  | 140/196 [03:01<01:05,  1.18s/it]

Epoch [15/40], Step [140/196], Loss: 1.5742


 77%|███████▋  | 150/196 [03:14<00:57,  1.24s/it]

Epoch [15/40], Step [150/196], Loss: 1.6410


 82%|████████▏ | 160/196 [03:26<00:43,  1.21s/it]

Epoch [15/40], Step [160/196], Loss: 1.6231


 87%|████████▋ | 170/196 [03:39<00:33,  1.28s/it]

Epoch [15/40], Step [170/196], Loss: 1.6399


 92%|█████████▏| 180/196 [03:52<00:22,  1.38s/it]

Epoch [15/40], Step [180/196], Loss: 1.6784


 97%|█████████▋| 190/196 [04:05<00:07,  1.27s/it]

Epoch [15/40], Step [190/196], Loss: 1.6270


100%|██████████| 196/196 [04:10<00:00,  1.28s/it]


Test Accuracy of the student model on the test images: 33.55 %


  5%|▌         | 10/196 [00:22<06:34,  2.12s/it]

Epoch [16/40], Step [10/196], Loss: 1.5505


 10%|█         | 20/196 [00:36<03:41,  1.26s/it]

Epoch [16/40], Step [20/196], Loss: 1.4767


 15%|█▌        | 30/196 [00:51<04:10,  1.51s/it]

Epoch [16/40], Step [30/196], Loss: 1.4387


 20%|██        | 40/196 [01:03<03:00,  1.15s/it]

Epoch [16/40], Step [40/196], Loss: 1.4266


 26%|██▌       | 50/196 [01:17<03:16,  1.34s/it]

Epoch [16/40], Step [50/196], Loss: 1.4674


 31%|███       | 60/196 [01:30<02:43,  1.20s/it]

Epoch [16/40], Step [60/196], Loss: 1.5035


 36%|███▌      | 70/196 [01:46<03:26,  1.64s/it]

Epoch [16/40], Step [70/196], Loss: 1.4892


 41%|████      | 80/196 [01:57<02:14,  1.16s/it]

Epoch [16/40], Step [80/196], Loss: 1.4831


 46%|████▌     | 90/196 [02:12<02:47,  1.58s/it]

Epoch [16/40], Step [90/196], Loss: 1.5299


 51%|█████     | 100/196 [02:25<01:57,  1.23s/it]

Epoch [16/40], Step [100/196], Loss: 1.5182


 56%|█████▌    | 110/196 [02:41<02:18,  1.61s/it]

Epoch [16/40], Step [110/196], Loss: 1.4922


 61%|██████    | 120/196 [02:53<01:33,  1.24s/it]

Epoch [16/40], Step [120/196], Loss: 1.5742


 66%|██████▋   | 130/196 [03:08<01:44,  1.58s/it]

Epoch [16/40], Step [130/196], Loss: 1.5300


 71%|███████▏  | 140/196 [03:21<01:09,  1.24s/it]

Epoch [16/40], Step [140/196], Loss: 1.5043


 77%|███████▋  | 150/196 [03:36<01:13,  1.60s/it]

Epoch [16/40], Step [150/196], Loss: 1.5400


 82%|████████▏ | 160/196 [03:49<00:46,  1.30s/it]

Epoch [16/40], Step [160/196], Loss: 1.5698


 87%|████████▋ | 170/196 [04:03<00:41,  1.58s/it]

Epoch [16/40], Step [170/196], Loss: 1.5610


 92%|█████████▏| 180/196 [04:17<00:21,  1.37s/it]

Epoch [16/40], Step [180/196], Loss: 1.5688


 97%|█████████▋| 190/196 [04:31<00:09,  1.55s/it]

Epoch [16/40], Step [190/196], Loss: 1.5614


100%|██████████| 196/196 [04:36<00:00,  1.41s/it]


Test Accuracy of the student model on the test images: 48.71 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:21<05:10,  1.67s/it]

Epoch [17/40], Step [10/196], Loss: 1.4668


 10%|█         | 20/196 [00:33<03:21,  1.15s/it]

Epoch [17/40], Step [20/196], Loss: 1.4752


 15%|█▌        | 30/196 [00:47<04:06,  1.48s/it]

Epoch [17/40], Step [30/196], Loss: 1.4442


 20%|██        | 40/196 [00:59<02:58,  1.14s/it]

Epoch [17/40], Step [40/196], Loss: 1.3979


 26%|██▌       | 50/196 [01:11<02:40,  1.10s/it]

Epoch [17/40], Step [50/196], Loss: 1.4235


 31%|███       | 60/196 [01:23<02:29,  1.10s/it]

Epoch [17/40], Step [60/196], Loss: 1.4117


 36%|███▌      | 70/196 [01:37<02:29,  1.18s/it]

Epoch [17/40], Step [70/196], Loss: 1.3884


 41%|████      | 80/196 [01:50<02:09,  1.12s/it]

Epoch [17/40], Step [80/196], Loss: 1.4033


 46%|████▌     | 90/196 [02:03<02:06,  1.20s/it]

Epoch [17/40], Step [90/196], Loss: 1.3973


 51%|█████     | 100/196 [02:16<01:50,  1.15s/it]

Epoch [17/40], Step [100/196], Loss: 1.4023


 56%|█████▌    | 110/196 [02:29<01:47,  1.25s/it]

Epoch [17/40], Step [110/196], Loss: 1.4414


 61%|██████    | 120/196 [02:42<01:31,  1.21s/it]

Epoch [17/40], Step [120/196], Loss: 1.4434


 66%|██████▋   | 130/196 [02:55<01:19,  1.21s/it]

Epoch [17/40], Step [130/196], Loss: 1.4544


 71%|███████▏  | 140/196 [03:08<01:11,  1.27s/it]

Epoch [17/40], Step [140/196], Loss: 1.4495


 77%|███████▋  | 150/196 [03:21<01:02,  1.36s/it]

Epoch [17/40], Step [150/196], Loss: 1.4752


 82%|████████▏ | 160/196 [03:34<00:46,  1.28s/it]

Epoch [17/40], Step [160/196], Loss: 1.4508


 87%|████████▋ | 170/196 [03:48<00:38,  1.48s/it]

Epoch [17/40], Step [170/196], Loss: 1.4647


 92%|█████████▏| 180/196 [04:00<00:19,  1.22s/it]

Epoch [17/40], Step [180/196], Loss: 1.4779


 97%|█████████▋| 190/196 [04:14<00:08,  1.47s/it]

Epoch [17/40], Step [190/196], Loss: 1.5103


100%|██████████| 196/196 [04:19<00:00,  1.33s/it]


Test Accuracy of the student model on the test images: 49.52 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:26<06:20,  2.05s/it]

Epoch [18/40], Step [10/196], Loss: 1.4144


 10%|█         | 20/196 [00:40<04:03,  1.38s/it]

Epoch [18/40], Step [20/196], Loss: 1.3764


 15%|█▌        | 30/196 [00:54<03:50,  1.39s/it]

Epoch [18/40], Step [30/196], Loss: 1.3460


 20%|██        | 40/196 [01:07<03:23,  1.30s/it]

Epoch [18/40], Step [40/196], Loss: 1.3715


 26%|██▌       | 50/196 [01:22<03:28,  1.43s/it]

Epoch [18/40], Step [50/196], Loss: 1.3370


 31%|███       | 60/196 [01:35<02:52,  1.27s/it]

Epoch [18/40], Step [60/196], Loss: 1.3053


 36%|███▌      | 70/196 [01:49<02:52,  1.37s/it]

Epoch [18/40], Step [70/196], Loss: 1.3221


 41%|████      | 80/196 [02:02<02:30,  1.30s/it]

Epoch [18/40], Step [80/196], Loss: 1.3228


 46%|████▌     | 90/196 [02:17<02:32,  1.44s/it]

Epoch [18/40], Step [90/196], Loss: 1.3480


 51%|█████     | 100/196 [02:29<02:00,  1.25s/it]

Epoch [18/40], Step [100/196], Loss: 1.3852


 56%|█████▌    | 110/196 [02:44<01:56,  1.35s/it]

Epoch [18/40], Step [110/196], Loss: 1.3429


 61%|██████    | 120/196 [02:56<01:32,  1.22s/it]

Epoch [18/40], Step [120/196], Loss: 1.3832


 66%|██████▋   | 130/196 [03:10<01:32,  1.41s/it]

Epoch [18/40], Step [130/196], Loss: 1.3483


 71%|███████▏  | 140/196 [03:24<01:13,  1.32s/it]

Epoch [18/40], Step [140/196], Loss: 1.3503


 77%|███████▋  | 150/196 [03:38<01:02,  1.36s/it]

Epoch [18/40], Step [150/196], Loss: 1.3410


 82%|████████▏ | 160/196 [03:51<00:46,  1.30s/it]

Epoch [18/40], Step [160/196], Loss: 1.3650


 87%|████████▋ | 170/196 [04:05<00:36,  1.42s/it]

Epoch [18/40], Step [170/196], Loss: 1.3747


 92%|█████████▏| 180/196 [04:18<00:21,  1.33s/it]

Epoch [18/40], Step [180/196], Loss: 1.3933


 97%|█████████▋| 190/196 [04:32<00:08,  1.49s/it]

Epoch [18/40], Step [190/196], Loss: 1.3988


100%|██████████| 196/196 [04:38<00:00,  1.42s/it]


Test Accuracy of the student model on the test images: 51.60 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:18<04:31,  1.46s/it]

Epoch [19/40], Step [10/196], Loss: 1.3157


 10%|█         | 20/196 [00:36<04:00,  1.36s/it]

Epoch [19/40], Step [20/196], Loss: 1.2914


 15%|█▌        | 30/196 [00:53<04:13,  1.53s/it]

Epoch [19/40], Step [30/196], Loss: 1.2470


 20%|██        | 40/196 [01:05<03:23,  1.31s/it]

Epoch [19/40], Step [40/196], Loss: 1.2568


 26%|██▌       | 50/196 [01:20<03:42,  1.53s/it]

Epoch [19/40], Step [50/196], Loss: 1.2497


 31%|███       | 60/196 [01:33<02:46,  1.23s/it]

Epoch [19/40], Step [60/196], Loss: 1.2591


 36%|███▌      | 70/196 [01:47<03:07,  1.49s/it]

Epoch [19/40], Step [70/196], Loss: 1.2333


 41%|████      | 80/196 [02:01<02:29,  1.29s/it]

Epoch [19/40], Step [80/196], Loss: 1.2707


 46%|████▌     | 90/196 [02:15<02:45,  1.56s/it]

Epoch [19/40], Step [90/196], Loss: 1.2971


 51%|█████     | 100/196 [02:28<02:01,  1.27s/it]

Epoch [19/40], Step [100/196], Loss: 1.2657


 56%|█████▌    | 110/196 [02:43<02:18,  1.61s/it]

Epoch [19/40], Step [110/196], Loss: 1.2453


 61%|██████    | 120/196 [02:55<01:42,  1.34s/it]

Epoch [19/40], Step [120/196], Loss: 1.2587


 66%|██████▋   | 130/196 [03:10<01:54,  1.73s/it]

Epoch [19/40], Step [130/196], Loss: 1.2747


 71%|███████▏  | 140/196 [03:24<01:19,  1.42s/it]

Epoch [19/40], Step [140/196], Loss: 1.2682


 77%|███████▋  | 150/196 [03:38<01:16,  1.66s/it]

Epoch [19/40], Step [150/196], Loss: 1.3015


 82%|████████▏ | 160/196 [03:50<00:46,  1.30s/it]

Epoch [19/40], Step [160/196], Loss: 1.3047


 87%|████████▋ | 170/196 [04:04<00:39,  1.53s/it]

Epoch [19/40], Step [170/196], Loss: 1.3163


 92%|█████████▏| 180/196 [04:18<00:23,  1.45s/it]

Epoch [19/40], Step [180/196], Loss: 1.3258


 97%|█████████▋| 190/196 [04:32<00:09,  1.64s/it]

Epoch [19/40], Step [190/196], Loss: 1.3722


100%|██████████| 196/196 [04:38<00:00,  1.42s/it]


Test Accuracy of the student model on the test images: 51.24 %


  5%|▌         | 10/196 [00:24<06:51,  2.21s/it]

Epoch [20/40], Step [10/196], Loss: 1.2542


 10%|█         | 20/196 [00:35<03:35,  1.22s/it]

Epoch [20/40], Step [20/196], Loss: 1.1970


 15%|█▌        | 30/196 [00:51<04:28,  1.62s/it]

Epoch [20/40], Step [30/196], Loss: 1.1618


 20%|██        | 40/196 [01:03<03:06,  1.20s/it]

Epoch [20/40], Step [40/196], Loss: 1.1778


 26%|██▌       | 50/196 [01:19<03:52,  1.59s/it]

Epoch [20/40], Step [50/196], Loss: 1.1778


 31%|███       | 60/196 [01:32<03:05,  1.36s/it]

Epoch [20/40], Step [60/196], Loss: 1.1693


 36%|███▌      | 70/196 [01:47<03:26,  1.64s/it]

Epoch [20/40], Step [70/196], Loss: 1.1914


 41%|████      | 80/196 [02:00<02:26,  1.26s/it]

Epoch [20/40], Step [80/196], Loss: 1.1530


 46%|████▌     | 90/196 [02:14<02:42,  1.53s/it]

Epoch [20/40], Step [90/196], Loss: 1.1911


 51%|█████     | 100/196 [02:27<02:11,  1.37s/it]

Epoch [20/40], Step [100/196], Loss: 1.2021


 56%|█████▌    | 110/196 [02:42<02:18,  1.61s/it]

Epoch [20/40], Step [110/196], Loss: 1.2293


 61%|██████    | 120/196 [02:55<01:43,  1.36s/it]

Epoch [20/40], Step [120/196], Loss: 1.2426


 66%|██████▋   | 130/196 [03:09<01:42,  1.56s/it]

Epoch [20/40], Step [130/196], Loss: 1.2431


 71%|███████▏  | 140/196 [03:22<01:15,  1.35s/it]

Epoch [20/40], Step [140/196], Loss: 1.1839


 77%|███████▋  | 150/196 [03:36<01:09,  1.51s/it]

Epoch [20/40], Step [150/196], Loss: 1.2315


 82%|████████▏ | 160/196 [03:49<00:47,  1.32s/it]

Epoch [20/40], Step [160/196], Loss: 1.2455


 87%|████████▋ | 170/196 [04:03<00:38,  1.49s/it]

Epoch [20/40], Step [170/196], Loss: 1.2577


 92%|█████████▏| 180/196 [04:17<00:20,  1.29s/it]

Epoch [20/40], Step [180/196], Loss: 1.2567


 97%|█████████▋| 190/196 [04:31<00:08,  1.42s/it]

Epoch [20/40], Step [190/196], Loss: 1.2493


100%|██████████| 196/196 [04:36<00:00,  1.41s/it]


Test Accuracy of the student model on the test images: 56.03 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:18<04:21,  1.41s/it]

Epoch [21/40], Step [10/196], Loss: 1.2121


 10%|█         | 20/196 [00:31<03:38,  1.24s/it]

Epoch [21/40], Step [20/196], Loss: 1.1562


 15%|█▌        | 30/196 [00:45<03:27,  1.25s/it]

Epoch [21/40], Step [30/196], Loss: 1.1678


 20%|██        | 40/196 [01:03<03:55,  1.51s/it]

Epoch [21/40], Step [40/196], Loss: 1.1165


 26%|██▌       | 50/196 [01:17<03:41,  1.52s/it]

Epoch [21/40], Step [50/196], Loss: 1.1197


 31%|███       | 60/196 [01:30<02:49,  1.25s/it]

Epoch [21/40], Step [60/196], Loss: 1.1480


 36%|███▌      | 70/196 [01:44<02:58,  1.42s/it]

Epoch [21/40], Step [70/196], Loss: 1.1352


 41%|████      | 80/196 [01:57<02:21,  1.22s/it]

Epoch [21/40], Step [80/196], Loss: 1.1271


 46%|████▌     | 90/196 [02:11<02:30,  1.42s/it]

Epoch [21/40], Step [90/196], Loss: 1.1153


 51%|█████     | 100/196 [02:24<02:03,  1.29s/it]

Epoch [21/40], Step [100/196], Loss: 1.1311


 56%|█████▌    | 110/196 [02:38<02:13,  1.55s/it]

Epoch [21/40], Step [110/196], Loss: 1.1277


 61%|██████    | 120/196 [02:51<01:39,  1.31s/it]

Epoch [21/40], Step [120/196], Loss: 1.1490


 66%|██████▋   | 130/196 [03:06<01:44,  1.58s/it]

Epoch [21/40], Step [130/196], Loss: 1.1731


 71%|███████▏  | 140/196 [03:19<01:16,  1.37s/it]

Epoch [21/40], Step [140/196], Loss: 1.1660


 77%|███████▋  | 150/196 [03:34<01:11,  1.54s/it]

Epoch [21/40], Step [150/196], Loss: 1.2002


 82%|████████▏ | 160/196 [03:47<00:48,  1.34s/it]

Epoch [21/40], Step [160/196], Loss: 1.1824


 87%|████████▋ | 170/196 [04:01<00:38,  1.49s/it]

Epoch [21/40], Step [170/196], Loss: 1.2002


 92%|█████████▏| 180/196 [04:14<00:21,  1.34s/it]

Epoch [21/40], Step [180/196], Loss: 1.1871


 97%|█████████▋| 190/196 [04:28<00:09,  1.55s/it]

Epoch [21/40], Step [190/196], Loss: 1.1860


100%|██████████| 196/196 [04:34<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 54.19 %


  5%|▌         | 10/196 [00:19<04:36,  1.49s/it]

Epoch [22/40], Step [10/196], Loss: 1.1540


 10%|█         | 20/196 [00:32<03:34,  1.22s/it]

Epoch [22/40], Step [20/196], Loss: 1.1031


 15%|█▌        | 30/196 [00:46<03:52,  1.40s/it]

Epoch [22/40], Step [30/196], Loss: 1.0806


 20%|██        | 40/196 [00:59<03:05,  1.19s/it]

Epoch [22/40], Step [40/196], Loss: 1.0674


 26%|██▌       | 50/196 [01:14<03:49,  1.57s/it]

Epoch [22/40], Step [50/196], Loss: 1.0694


 31%|███       | 60/196 [01:26<02:40,  1.18s/it]

Epoch [22/40], Step [60/196], Loss: 1.0834


 36%|███▌      | 70/196 [01:42<03:17,  1.56s/it]

Epoch [22/40], Step [70/196], Loss: 1.0844


 41%|████      | 80/196 [01:54<02:17,  1.18s/it]

Epoch [22/40], Step [80/196], Loss: 1.0687


 46%|████▌     | 90/196 [02:10<02:45,  1.56s/it]

Epoch [22/40], Step [90/196], Loss: 1.0808


 51%|█████     | 100/196 [02:23<02:12,  1.38s/it]

Epoch [22/40], Step [100/196], Loss: 1.0649


 56%|█████▌    | 110/196 [02:38<02:20,  1.63s/it]

Epoch [22/40], Step [110/196], Loss: 1.0591


 61%|██████    | 120/196 [02:50<01:30,  1.19s/it]

Epoch [22/40], Step [120/196], Loss: 1.0274


 66%|██████▋   | 130/196 [03:06<01:44,  1.59s/it]

Epoch [22/40], Step [130/196], Loss: 1.0924


 71%|███████▏  | 140/196 [03:19<01:12,  1.30s/it]

Epoch [22/40], Step [140/196], Loss: 1.0819


 77%|███████▋  | 150/196 [03:33<01:09,  1.50s/it]

Epoch [22/40], Step [150/196], Loss: 1.0804


 82%|████████▏ | 160/196 [03:47<00:46,  1.29s/it]

Epoch [22/40], Step [160/196], Loss: 1.0940


 87%|████████▋ | 170/196 [04:01<00:36,  1.42s/it]

Epoch [22/40], Step [170/196], Loss: 1.0967


 92%|█████████▏| 180/196 [04:14<00:20,  1.28s/it]

Epoch [22/40], Step [180/196], Loss: 1.0882


 97%|█████████▋| 190/196 [04:28<00:09,  1.50s/it]

Epoch [22/40], Step [190/196], Loss: 1.0769


100%|██████████| 196/196 [04:34<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 54.24 %


  5%|▌         | 10/196 [00:18<04:41,  1.51s/it]

Epoch [23/40], Step [10/196], Loss: 1.0541


 10%|█         | 20/196 [00:31<03:33,  1.21s/it]

Epoch [23/40], Step [20/196], Loss: 1.0186


 15%|█▌        | 30/196 [00:46<03:55,  1.42s/it]

Epoch [23/40], Step [30/196], Loss: 0.9847


 20%|██        | 40/196 [00:59<03:09,  1.22s/it]

Epoch [23/40], Step [40/196], Loss: 0.9931


 26%|██▌       | 50/196 [01:14<03:25,  1.41s/it]

Epoch [23/40], Step [50/196], Loss: 0.9877


 31%|███       | 60/196 [01:27<02:43,  1.21s/it]

Epoch [23/40], Step [60/196], Loss: 0.9765


 36%|███▌      | 70/196 [01:42<03:11,  1.52s/it]

Epoch [23/40], Step [70/196], Loss: 0.9975


 41%|████      | 80/196 [01:55<02:19,  1.20s/it]

Epoch [23/40], Step [80/196], Loss: 1.0038


 46%|████▌     | 90/196 [02:10<02:45,  1.56s/it]

Epoch [23/40], Step [90/196], Loss: 0.9743


 51%|█████     | 100/196 [02:22<01:49,  1.14s/it]

Epoch [23/40], Step [100/196], Loss: 1.0046


 56%|█████▌    | 110/196 [02:37<02:10,  1.52s/it]

Epoch [23/40], Step [110/196], Loss: 0.9949


 61%|██████    | 120/196 [02:49<01:31,  1.20s/it]

Epoch [23/40], Step [120/196], Loss: 1.0129


 66%|██████▋   | 130/196 [03:05<01:41,  1.54s/it]

Epoch [23/40], Step [130/196], Loss: 1.0126


 71%|███████▏  | 140/196 [03:18<01:08,  1.23s/it]

Epoch [23/40], Step [140/196], Loss: 1.0132


 77%|███████▋  | 150/196 [03:32<01:10,  1.53s/it]

Epoch [23/40], Step [150/196], Loss: 1.0237


 82%|████████▏ | 160/196 [03:45<00:44,  1.25s/it]

Epoch [23/40], Step [160/196], Loss: 1.0255


 87%|████████▋ | 170/196 [04:00<00:40,  1.55s/it]

Epoch [23/40], Step [170/196], Loss: 1.0363


 92%|█████████▏| 180/196 [04:13<00:20,  1.29s/it]

Epoch [23/40], Step [180/196], Loss: 1.0378


 97%|█████████▋| 190/196 [04:28<00:09,  1.52s/it]

Epoch [23/40], Step [190/196], Loss: 1.0676


100%|██████████| 196/196 [04:33<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 56.18 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:23<05:12,  1.68s/it]

Epoch [24/40], Step [10/196], Loss: 0.9882


 10%|█         | 20/196 [00:37<04:00,  1.37s/it]

Epoch [24/40], Step [20/196], Loss: 0.9359


 15%|█▌        | 30/196 [00:51<04:22,  1.58s/it]

Epoch [24/40], Step [30/196], Loss: 0.9341


 20%|██        | 40/196 [01:04<03:22,  1.30s/it]

Epoch [24/40], Step [40/196], Loss: 0.9615


 26%|██▌       | 50/196 [01:18<03:41,  1.52s/it]

Epoch [24/40], Step [50/196], Loss: 0.9501


 31%|███       | 60/196 [01:31<03:03,  1.35s/it]

Epoch [24/40], Step [60/196], Loss: 0.9394


 36%|███▌      | 70/196 [01:45<03:29,  1.66s/it]

Epoch [24/40], Step [70/196], Loss: 0.9053


 41%|████      | 80/196 [01:59<02:42,  1.40s/it]

Epoch [24/40], Step [80/196], Loss: 0.9227


 46%|████▌     | 90/196 [02:13<02:45,  1.56s/it]

Epoch [24/40], Step [90/196], Loss: 0.9361


 51%|█████     | 100/196 [02:26<02:15,  1.41s/it]

Epoch [24/40], Step [100/196], Loss: 0.9469


 56%|█████▌    | 110/196 [02:40<02:12,  1.54s/it]

Epoch [24/40], Step [110/196], Loss: 0.9348


 61%|██████    | 120/196 [02:54<01:44,  1.37s/it]

Epoch [24/40], Step [120/196], Loss: 0.9399


 66%|██████▋   | 130/196 [03:08<01:39,  1.51s/it]

Epoch [24/40], Step [130/196], Loss: 0.9362


 71%|███████▏  | 140/196 [03:21<01:18,  1.39s/it]

Epoch [24/40], Step [140/196], Loss: 0.9428


 77%|███████▋  | 150/196 [03:35<01:11,  1.56s/it]

Epoch [24/40], Step [150/196], Loss: 0.9571


 82%|████████▏ | 160/196 [03:49<00:50,  1.41s/it]

Epoch [24/40], Step [160/196], Loss: 0.9613


 87%|████████▋ | 170/196 [04:03<00:40,  1.57s/it]

Epoch [24/40], Step [170/196], Loss: 0.9678


 92%|█████████▏| 180/196 [04:17<00:22,  1.43s/it]

Epoch [24/40], Step [180/196], Loss: 0.9728


 97%|█████████▋| 190/196 [04:31<00:08,  1.49s/it]

Epoch [24/40], Step [190/196], Loss: 0.9810


100%|██████████| 196/196 [04:37<00:00,  1.41s/it]


Test Accuracy of the student model on the test images: 57.97 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:21<04:52,  1.57s/it]

Epoch [25/40], Step [10/196], Loss: 0.8929


 10%|█         | 20/196 [00:35<04:08,  1.41s/it]

Epoch [25/40], Step [20/196], Loss: 0.8800


 15%|█▌        | 30/196 [00:49<04:37,  1.67s/it]

Epoch [25/40], Step [30/196], Loss: 0.8728


 20%|██        | 40/196 [01:02<03:22,  1.30s/it]

Epoch [25/40], Step [40/196], Loss: 0.8508


 26%|██▌       | 50/196 [01:17<03:46,  1.55s/it]

Epoch [25/40], Step [50/196], Loss: 0.8589


 31%|███       | 60/196 [01:30<02:53,  1.27s/it]

Epoch [25/40], Step [60/196], Loss: 0.8489


 36%|███▌      | 70/196 [01:44<03:06,  1.48s/it]

Epoch [25/40], Step [70/196], Loss: 0.8504


 41%|████      | 80/196 [01:57<02:30,  1.30s/it]

Epoch [25/40], Step [80/196], Loss: 0.8618


 46%|████▌     | 90/196 [02:11<02:42,  1.54s/it]

Epoch [25/40], Step [90/196], Loss: 0.8711


 51%|█████     | 100/196 [02:25<02:12,  1.38s/it]

Epoch [25/40], Step [100/196], Loss: 0.8440


 56%|█████▌    | 110/196 [02:39<02:03,  1.43s/it]

Epoch [25/40], Step [110/196], Loss: 0.8641


 61%|██████    | 120/196 [02:52<01:38,  1.30s/it]

Epoch [25/40], Step [120/196], Loss: 0.8678


 66%|██████▋   | 130/196 [03:07<01:35,  1.44s/it]

Epoch [25/40], Step [130/196], Loss: 0.8790


 71%|███████▏  | 140/196 [03:20<01:14,  1.33s/it]

Epoch [25/40], Step [140/196], Loss: 0.8900


 77%|███████▋  | 150/196 [03:34<01:02,  1.36s/it]

Epoch [25/40], Step [150/196], Loss: 0.8961


 82%|████████▏ | 160/196 [03:47<00:47,  1.32s/it]

Epoch [25/40], Step [160/196], Loss: 0.8945


 87%|████████▋ | 170/196 [04:01<00:35,  1.37s/it]

Epoch [25/40], Step [170/196], Loss: 0.9196


 92%|█████████▏| 180/196 [04:14<00:21,  1.36s/it]

Epoch [25/40], Step [180/196], Loss: 0.8983


 97%|█████████▋| 190/196 [04:29<00:08,  1.43s/it]

Epoch [25/40], Step [190/196], Loss: 0.9090


100%|██████████| 196/196 [04:34<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 59.37 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:20<05:34,  1.80s/it]

Epoch [26/40], Step [10/196], Loss: 0.8582


 10%|█         | 20/196 [00:36<04:05,  1.40s/it]

Epoch [26/40], Step [20/196], Loss: 0.8284


 15%|█▌        | 30/196 [00:50<04:03,  1.47s/it]

Epoch [26/40], Step [30/196], Loss: 0.8133


 20%|██        | 40/196 [01:06<03:52,  1.49s/it]

Epoch [26/40], Step [40/196], Loss: 0.7990


 26%|██▌       | 50/196 [01:21<03:38,  1.49s/it]

Epoch [26/40], Step [50/196], Loss: 0.8116


 31%|███       | 60/196 [01:33<02:55,  1.29s/it]

Epoch [26/40], Step [60/196], Loss: 0.8067


 36%|███▌      | 70/196 [01:47<03:12,  1.53s/it]

Epoch [26/40], Step [70/196], Loss: 0.7916


 41%|████      | 80/196 [02:01<02:32,  1.31s/it]

Epoch [26/40], Step [80/196], Loss: 0.7986


 46%|████▌     | 90/196 [02:16<02:35,  1.47s/it]

Epoch [26/40], Step [90/196], Loss: 0.7834


 51%|█████     | 100/196 [02:28<02:02,  1.28s/it]

Epoch [26/40], Step [100/196], Loss: 0.7982


 56%|█████▌    | 110/196 [02:43<01:58,  1.37s/it]

Epoch [26/40], Step [110/196], Loss: 0.7953


 61%|██████    | 120/196 [02:56<01:33,  1.23s/it]

Epoch [26/40], Step [120/196], Loss: 0.8210


 66%|██████▋   | 130/196 [03:11<01:27,  1.33s/it]

Epoch [26/40], Step [130/196], Loss: 0.8253


 71%|███████▏  | 140/196 [03:24<01:10,  1.26s/it]

Epoch [26/40], Step [140/196], Loss: 0.8240


 77%|███████▋  | 150/196 [03:39<01:01,  1.34s/it]

Epoch [26/40], Step [150/196], Loss: 0.8217


 82%|████████▏ | 160/196 [03:51<00:44,  1.23s/it]

Epoch [26/40], Step [160/196], Loss: 0.8105


 87%|████████▋ | 170/196 [04:05<00:37,  1.43s/it]

Epoch [26/40], Step [170/196], Loss: 0.8272


 92%|█████████▏| 180/196 [04:18<00:20,  1.29s/it]

Epoch [26/40], Step [180/196], Loss: 0.8198


 97%|█████████▋| 190/196 [04:32<00:08,  1.37s/it]

Epoch [26/40], Step [190/196], Loss: 0.8290


100%|██████████| 196/196 [04:38<00:00,  1.42s/it]


Test Accuracy of the student model on the test images: 62.33 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:23<05:07,  1.65s/it]

Epoch [27/40], Step [10/196], Loss: 0.7618


 10%|█         | 20/196 [00:35<03:35,  1.22s/it]

Epoch [27/40], Step [20/196], Loss: 0.7582


 15%|█▌        | 30/196 [00:48<03:34,  1.29s/it]

Epoch [27/40], Step [30/196], Loss: 0.7438


 20%|██        | 40/196 [01:02<03:18,  1.27s/it]

Epoch [27/40], Step [40/196], Loss: 0.7291


 26%|██▌       | 50/196 [01:15<03:20,  1.37s/it]

Epoch [27/40], Step [50/196], Loss: 0.7212


 31%|███       | 60/196 [01:27<02:47,  1.23s/it]

Epoch [27/40], Step [60/196], Loss: 0.7399


 36%|███▌      | 70/196 [01:38<02:19,  1.10s/it]

Epoch [27/40], Step [70/196], Loss: 0.7427


 41%|████      | 80/196 [01:51<02:11,  1.13s/it]

Epoch [27/40], Step [80/196], Loss: 0.7418


 46%|████▌     | 90/196 [02:03<02:02,  1.16s/it]

Epoch [27/40], Step [90/196], Loss: 0.7335


 51%|█████     | 100/196 [02:16<01:49,  1.14s/it]

Epoch [27/40], Step [100/196], Loss: 0.7268


 56%|█████▌    | 110/196 [02:28<01:38,  1.15s/it]

Epoch [27/40], Step [110/196], Loss: 0.7309


 61%|██████    | 120/196 [02:41<01:41,  1.33s/it]

Epoch [27/40], Step [120/196], Loss: 0.7473


 66%|██████▋   | 130/196 [02:55<01:17,  1.18s/it]

Epoch [27/40], Step [130/196], Loss: 0.7444


 71%|███████▏  | 140/196 [03:08<01:10,  1.27s/it]

Epoch [27/40], Step [140/196], Loss: 0.7668


 77%|███████▋  | 150/196 [03:22<01:01,  1.33s/it]

Epoch [27/40], Step [150/196], Loss: 0.7625


 82%|████████▏ | 160/196 [03:35<00:46,  1.29s/it]

Epoch [27/40], Step [160/196], Loss: 0.7342


 87%|████████▋ | 170/196 [03:48<00:35,  1.38s/it]

Epoch [27/40], Step [170/196], Loss: 0.7420


 92%|█████████▏| 180/196 [04:01<00:20,  1.26s/it]

Epoch [27/40], Step [180/196], Loss: 0.7555


 97%|█████████▋| 190/196 [04:15<00:08,  1.48s/it]

Epoch [27/40], Step [190/196], Loss: 0.7690


100%|██████████| 196/196 [04:20<00:00,  1.33s/it]


Test Accuracy of the student model on the test images: 61.20 %


  5%|▌         | 10/196 [00:18<04:06,  1.33s/it]

Epoch [28/40], Step [10/196], Loss: 0.7053


 10%|█         | 20/196 [00:31<03:36,  1.23s/it]

Epoch [28/40], Step [20/196], Loss: 0.6862


 15%|█▌        | 30/196 [00:45<03:50,  1.39s/it]

Epoch [28/40], Step [30/196], Loss: 0.6619


 20%|██        | 40/196 [00:58<03:02,  1.17s/it]

Epoch [28/40], Step [40/196], Loss: 0.6724


 26%|██▌       | 50/196 [01:11<02:47,  1.15s/it]

Epoch [28/40], Step [50/196], Loss: 0.6711


 31%|███       | 60/196 [01:24<02:46,  1.22s/it]

Epoch [28/40], Step [60/196], Loss: 0.6696


 36%|███▌      | 70/196 [01:38<02:57,  1.41s/it]

Epoch [28/40], Step [70/196], Loss: 0.6641


 41%|████      | 80/196 [01:52<02:21,  1.22s/it]

Epoch [28/40], Step [80/196], Loss: 0.6744


 46%|████▌     | 90/196 [02:05<02:24,  1.36s/it]

Epoch [28/40], Step [90/196], Loss: 0.6569


 51%|█████     | 100/196 [02:19<02:05,  1.31s/it]

Epoch [28/40], Step [100/196], Loss: 0.6717


 56%|█████▌    | 110/196 [02:34<02:09,  1.51s/it]

Epoch [28/40], Step [110/196], Loss: 0.6736


 61%|██████    | 120/196 [02:46<01:31,  1.21s/it]

Epoch [28/40], Step [120/196], Loss: 0.6748


 66%|██████▋   | 130/196 [03:00<01:27,  1.33s/it]

Epoch [28/40], Step [130/196], Loss: 0.6811


 71%|███████▏  | 140/196 [03:13<01:09,  1.23s/it]

Epoch [28/40], Step [140/196], Loss: 0.6797


 77%|███████▋  | 150/196 [03:27<00:58,  1.27s/it]

Epoch [28/40], Step [150/196], Loss: 0.6698


 82%|████████▏ | 160/196 [03:40<00:41,  1.15s/it]

Epoch [28/40], Step [160/196], Loss: 0.6751


 87%|████████▋ | 170/196 [03:54<00:36,  1.41s/it]

Epoch [28/40], Step [170/196], Loss: 0.6862


 92%|█████████▏| 180/196 [04:07<00:20,  1.26s/it]

Epoch [28/40], Step [180/196], Loss: 0.6858


 97%|█████████▋| 190/196 [04:20<00:07,  1.26s/it]

Epoch [28/40], Step [190/196], Loss: 0.6738


100%|██████████| 196/196 [04:26<00:00,  1.36s/it]


Test Accuracy of the student model on the test images: 63.19 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:22<06:45,  2.18s/it]

Epoch [29/40], Step [10/196], Loss: 0.6649


 10%|█         | 20/196 [00:38<04:17,  1.46s/it]

Epoch [29/40], Step [20/196], Loss: 0.6462


 15%|█▌        | 30/196 [00:51<04:01,  1.46s/it]

Epoch [29/40], Step [30/196], Loss: 0.6470


 20%|██        | 40/196 [01:05<03:31,  1.35s/it]

Epoch [29/40], Step [40/196], Loss: 0.6357


 26%|██▌       | 50/196 [01:21<03:39,  1.50s/it]

Epoch [29/40], Step [50/196], Loss: 0.6265


 31%|███       | 60/196 [01:35<02:56,  1.30s/it]

Epoch [29/40], Step [60/196], Loss: 0.6270


 36%|███▌      | 70/196 [01:49<02:59,  1.43s/it]

Epoch [29/40], Step [70/196], Loss: 0.6254


 41%|████      | 80/196 [02:02<02:27,  1.27s/it]

Epoch [29/40], Step [80/196], Loss: 0.6173


 46%|████▌     | 90/196 [02:16<02:24,  1.37s/it]

Epoch [29/40], Step [90/196], Loss: 0.6121


 51%|█████     | 100/196 [02:29<02:00,  1.25s/it]

Epoch [29/40], Step [100/196], Loss: 0.6076


 56%|█████▌    | 110/196 [02:43<01:55,  1.34s/it]

Epoch [29/40], Step [110/196], Loss: 0.6215


 61%|██████    | 120/196 [02:57<01:34,  1.25s/it]

Epoch [29/40], Step [120/196], Loss: 0.6223


 66%|██████▋   | 130/196 [03:11<01:27,  1.33s/it]

Epoch [29/40], Step [130/196], Loss: 0.6127


 71%|███████▏  | 140/196 [03:24<01:12,  1.30s/it]

Epoch [29/40], Step [140/196], Loss: 0.6277


 77%|███████▋  | 150/196 [03:38<00:59,  1.30s/it]

Epoch [29/40], Step [150/196], Loss: 0.6240


 82%|████████▏ | 160/196 [03:51<00:44,  1.25s/it]

Epoch [29/40], Step [160/196], Loss: 0.6242


 87%|████████▋ | 170/196 [04:05<00:34,  1.33s/it]

Epoch [29/40], Step [170/196], Loss: 0.6203


 92%|█████████▏| 180/196 [04:18<00:19,  1.23s/it]

Epoch [29/40], Step [180/196], Loss: 0.6276


 97%|█████████▋| 190/196 [04:32<00:07,  1.31s/it]

Epoch [29/40], Step [190/196], Loss: 0.6420


100%|██████████| 196/196 [04:38<00:00,  1.42s/it]


Test Accuracy of the student model on the test images: 63.84 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:26<08:40,  2.80s/it]

Epoch [30/40], Step [10/196], Loss: 0.5967


 10%|█         | 20/196 [00:38<03:36,  1.23s/it]

Epoch [30/40], Step [20/196], Loss: 0.5866


 15%|█▌        | 30/196 [00:52<03:40,  1.33s/it]

Epoch [30/40], Step [30/196], Loss: 0.5672


 20%|██        | 40/196 [01:05<03:19,  1.28s/it]

Epoch [30/40], Step [40/196], Loss: 0.5656


 26%|██▌       | 50/196 [01:19<03:10,  1.30s/it]

Epoch [30/40], Step [50/196], Loss: 0.5613


 31%|███       | 60/196 [01:33<02:54,  1.28s/it]

Epoch [30/40], Step [60/196], Loss: 0.5731


 36%|███▌      | 70/196 [01:47<02:50,  1.35s/it]

Epoch [30/40], Step [70/196], Loss: 0.5592


 41%|████      | 80/196 [02:00<02:32,  1.32s/it]

Epoch [30/40], Step [80/196], Loss: 0.5620


 46%|████▌     | 90/196 [02:14<02:21,  1.33s/it]

Epoch [30/40], Step [90/196], Loss: 0.5698


 51%|█████     | 100/196 [02:28<01:58,  1.24s/it]

Epoch [30/40], Step [100/196], Loss: 0.5673


 56%|█████▌    | 110/196 [02:42<01:54,  1.33s/it]

Epoch [30/40], Step [110/196], Loss: 0.5676


 61%|██████    | 120/196 [02:55<01:36,  1.27s/it]

Epoch [30/40], Step [120/196], Loss: 0.5745


 66%|██████▋   | 130/196 [03:09<01:27,  1.33s/it]

Epoch [30/40], Step [130/196], Loss: 0.5650


 71%|███████▏  | 140/196 [03:22<01:09,  1.24s/it]

Epoch [30/40], Step [140/196], Loss: 0.5692


 77%|███████▋  | 150/196 [03:37<01:01,  1.34s/it]

Epoch [30/40], Step [150/196], Loss: 0.5531


 82%|████████▏ | 160/196 [03:50<00:44,  1.24s/it]

Epoch [30/40], Step [160/196], Loss: 0.5688


 87%|████████▋ | 170/196 [04:04<00:33,  1.28s/it]

Epoch [30/40], Step [170/196], Loss: 0.5734


 92%|█████████▏| 180/196 [04:17<00:18,  1.18s/it]

Epoch [30/40], Step [180/196], Loss: 0.5718


 97%|█████████▋| 190/196 [04:31<00:08,  1.34s/it]

Epoch [30/40], Step [190/196], Loss: 0.5892


100%|██████████| 196/196 [04:37<00:00,  1.42s/it]


Test Accuracy of the student model on the test images: 64.14 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:21<05:02,  1.63s/it]

Epoch [31/40], Step [10/196], Loss: 0.5496


 10%|█         | 20/196 [00:34<03:42,  1.27s/it]

Epoch [31/40], Step [20/196], Loss: 0.5292


 15%|█▌        | 30/196 [00:47<03:50,  1.39s/it]

Epoch [31/40], Step [30/196], Loss: 0.5117


 20%|██        | 40/196 [01:00<03:14,  1.25s/it]

Epoch [31/40], Step [40/196], Loss: 0.5206


 26%|██▌       | 50/196 [01:14<03:37,  1.49s/it]

Epoch [31/40], Step [50/196], Loss: 0.5312


 31%|███       | 60/196 [01:26<02:39,  1.17s/it]

Epoch [31/40], Step [60/196], Loss: 0.5152


 36%|███▌      | 70/196 [01:42<03:26,  1.64s/it]

Epoch [31/40], Step [70/196], Loss: 0.5139


 41%|████      | 80/196 [01:54<02:10,  1.12s/it]

Epoch [31/40], Step [80/196], Loss: 0.5219


 46%|████▌     | 90/196 [02:09<02:46,  1.57s/it]

Epoch [31/40], Step [90/196], Loss: 0.5272


 51%|█████     | 100/196 [02:21<01:58,  1.23s/it]

Epoch [31/40], Step [100/196], Loss: 0.5114


 56%|█████▌    | 110/196 [02:36<02:09,  1.51s/it]

Epoch [31/40], Step [110/196], Loss: 0.5035


 61%|██████    | 120/196 [02:49<01:32,  1.22s/it]

Epoch [31/40], Step [120/196], Loss: 0.5177


 66%|██████▋   | 130/196 [03:04<01:37,  1.48s/it]

Epoch [31/40], Step [130/196], Loss: 0.5158


 71%|███████▏  | 140/196 [03:17<01:22,  1.47s/it]

Epoch [31/40], Step [140/196], Loss: 0.5326


 77%|███████▋  | 150/196 [03:32<01:13,  1.59s/it]

Epoch [31/40], Step [150/196], Loss: 0.5181


 82%|████████▏ | 160/196 [03:45<00:48,  1.36s/it]

Epoch [31/40], Step [160/196], Loss: 0.5275


 87%|████████▋ | 170/196 [04:00<00:42,  1.64s/it]

Epoch [31/40], Step [170/196], Loss: 0.5199


 92%|█████████▏| 180/196 [04:13<00:21,  1.37s/it]

Epoch [31/40], Step [180/196], Loss: 0.5267


 97%|█████████▋| 190/196 [04:27<00:09,  1.62s/it]

Epoch [31/40], Step [190/196], Loss: 0.5229


100%|██████████| 196/196 [04:33<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 65.75 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:17<04:02,  1.31s/it]

Epoch [32/40], Step [10/196], Loss: 0.4914


 10%|█         | 20/196 [00:30<03:32,  1.20s/it]

Epoch [32/40], Step [20/196], Loss: 0.4738


 15%|█▌        | 30/196 [00:44<03:37,  1.31s/it]

Epoch [32/40], Step [30/196], Loss: 0.4757


 20%|██        | 40/196 [00:58<03:08,  1.21s/it]

Epoch [32/40], Step [40/196], Loss: 0.4744


 26%|██▌       | 50/196 [01:12<03:13,  1.33s/it]

Epoch [32/40], Step [50/196], Loss: 0.4779


 31%|███       | 60/196 [01:25<02:38,  1.17s/it]

Epoch [32/40], Step [60/196], Loss: 0.4788


 36%|███▌      | 70/196 [01:39<02:50,  1.35s/it]

Epoch [32/40], Step [70/196], Loss: 0.4687


 41%|████      | 80/196 [01:52<02:16,  1.18s/it]

Epoch [32/40], Step [80/196], Loss: 0.4665


 46%|████▌     | 90/196 [02:06<02:16,  1.29s/it]

Epoch [32/40], Step [90/196], Loss: 0.4775


 51%|█████     | 100/196 [02:19<01:53,  1.18s/it]

Epoch [32/40], Step [100/196], Loss: 0.4857


 56%|█████▌    | 110/196 [02:33<01:50,  1.29s/it]

Epoch [32/40], Step [110/196], Loss: 0.4804


 61%|██████    | 120/196 [02:46<01:30,  1.19s/it]

Epoch [32/40], Step [120/196], Loss: 0.4725


 66%|██████▋   | 130/196 [02:59<01:21,  1.24s/it]

Epoch [32/40], Step [130/196], Loss: 0.4854


 71%|███████▏  | 140/196 [03:13<01:06,  1.19s/it]

Epoch [32/40], Step [140/196], Loss: 0.4724


 77%|███████▋  | 150/196 [03:26<01:00,  1.32s/it]

Epoch [32/40], Step [150/196], Loss: 0.4796


 82%|████████▏ | 160/196 [03:40<00:42,  1.19s/it]

Epoch [32/40], Step [160/196], Loss: 0.4764


 87%|████████▋ | 170/196 [03:53<00:33,  1.28s/it]

Epoch [32/40], Step [170/196], Loss: 0.4792


 92%|█████████▏| 180/196 [04:06<00:18,  1.17s/it]

Epoch [32/40], Step [180/196], Loss: 0.4822


 97%|█████████▋| 190/196 [04:20<00:07,  1.29s/it]

Epoch [32/40], Step [190/196], Loss: 0.4919


100%|██████████| 196/196 [04:26<00:00,  1.36s/it]


Test Accuracy of the student model on the test images: 65.66 %


  5%|▌         | 10/196 [00:20<05:12,  1.68s/it]

Epoch [33/40], Step [10/196], Loss: 0.4582


 10%|█         | 20/196 [00:33<03:50,  1.31s/it]

Epoch [33/40], Step [20/196], Loss: 0.4450


 15%|█▌        | 30/196 [00:47<04:14,  1.53s/it]

Epoch [33/40], Step [30/196], Loss: 0.4493


 20%|██        | 40/196 [00:59<02:55,  1.13s/it]

Epoch [33/40], Step [40/196], Loss: 0.4380


 26%|██▌       | 50/196 [01:14<03:48,  1.57s/it]

Epoch [33/40], Step [50/196], Loss: 0.4413


 31%|███       | 60/196 [01:28<02:56,  1.30s/it]

Epoch [33/40], Step [60/196], Loss: 0.4416


 36%|███▌      | 70/196 [01:43<03:14,  1.55s/it]

Epoch [33/40], Step [70/196], Loss: 0.4333


 41%|████      | 80/196 [01:56<02:31,  1.31s/it]

Epoch [33/40], Step [80/196], Loss: 0.4384


 46%|████▌     | 90/196 [02:11<02:45,  1.56s/it]

Epoch [33/40], Step [90/196], Loss: 0.4374


 51%|█████     | 100/196 [02:24<02:06,  1.32s/it]

Epoch [33/40], Step [100/196], Loss: 0.4320


 56%|█████▌    | 110/196 [02:38<02:03,  1.43s/it]

Epoch [33/40], Step [110/196], Loss: 0.4409


 61%|██████    | 120/196 [02:52<01:39,  1.31s/it]

Epoch [33/40], Step [120/196], Loss: 0.4369


 66%|██████▋   | 130/196 [03:06<01:39,  1.51s/it]

Epoch [33/40], Step [130/196], Loss: 0.4508


 71%|███████▏  | 140/196 [03:20<01:13,  1.31s/it]

Epoch [33/40], Step [140/196], Loss: 0.4399


 77%|███████▋  | 150/196 [03:34<01:04,  1.41s/it]

Epoch [33/40], Step [150/196], Loss: 0.4456


 82%|████████▏ | 160/196 [03:48<00:46,  1.30s/it]

Epoch [33/40], Step [160/196], Loss: 0.4444


 87%|████████▋ | 170/196 [04:02<00:35,  1.36s/it]

Epoch [33/40], Step [170/196], Loss: 0.4320


 92%|█████████▏| 180/196 [04:16<00:20,  1.29s/it]

Epoch [33/40], Step [180/196], Loss: 0.4532


 97%|█████████▋| 190/196 [04:30<00:08,  1.34s/it]

Epoch [33/40], Step [190/196], Loss: 0.4475


100%|██████████| 196/196 [04:36<00:00,  1.41s/it]


Test Accuracy of the student model on the test images: 66.40 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:19<05:32,  1.79s/it]

Epoch [34/40], Step [10/196], Loss: 0.4175


 10%|█         | 20/196 [00:31<03:57,  1.35s/it]

Epoch [34/40], Step [20/196], Loss: 0.4082


 15%|█▌        | 30/196 [00:46<04:13,  1.53s/it]

Epoch [34/40], Step [30/196], Loss: 0.4103


 20%|██        | 40/196 [01:00<03:19,  1.28s/it]

Epoch [34/40], Step [40/196], Loss: 0.3998


 26%|██▌       | 50/196 [01:14<03:56,  1.62s/it]

Epoch [34/40], Step [50/196], Loss: 0.4041


 31%|███       | 60/196 [01:27<02:50,  1.25s/it]

Epoch [34/40], Step [60/196], Loss: 0.4151


 36%|███▌      | 70/196 [01:41<02:56,  1.40s/it]

Epoch [34/40], Step [70/196], Loss: 0.4057


 41%|████      | 80/196 [01:54<02:22,  1.23s/it]

Epoch [34/40], Step [80/196], Loss: 0.4138


 46%|████▌     | 90/196 [02:09<02:55,  1.66s/it]

Epoch [34/40], Step [90/196], Loss: 0.4160


 51%|█████     | 100/196 [02:23<02:06,  1.31s/it]

Epoch [34/40], Step [100/196], Loss: 0.4090


 56%|█████▌    | 110/196 [02:36<01:59,  1.39s/it]

Epoch [34/40], Step [110/196], Loss: 0.4105


 61%|██████    | 120/196 [02:49<01:40,  1.32s/it]

Epoch [34/40], Step [120/196], Loss: 0.4078


 66%|██████▋   | 130/196 [03:04<01:41,  1.54s/it]

Epoch [34/40], Step [130/196], Loss: 0.4187


 71%|███████▏  | 140/196 [03:18<01:13,  1.31s/it]

Epoch [34/40], Step [140/196], Loss: 0.4102


 77%|███████▋  | 150/196 [03:32<01:08,  1.48s/it]

Epoch [34/40], Step [150/196], Loss: 0.4174


 82%|████████▏ | 160/196 [03:45<00:43,  1.22s/it]

Epoch [34/40], Step [160/196], Loss: 0.4152


 87%|████████▋ | 170/196 [03:59<00:38,  1.46s/it]

Epoch [34/40], Step [170/196], Loss: 0.4150


 92%|█████████▏| 180/196 [04:12<00:19,  1.23s/it]

Epoch [34/40], Step [180/196], Loss: 0.4088


 97%|█████████▋| 190/196 [04:27<00:09,  1.54s/it]

Epoch [34/40], Step [190/196], Loss: 0.4167


100%|██████████| 196/196 [04:33<00:00,  1.39s/it]


Test Accuracy of the student model on the test images: 67.24 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:19<04:49,  1.56s/it]

Epoch [35/40], Step [10/196], Loss: 0.3983


 10%|█         | 20/196 [00:31<03:28,  1.18s/it]

Epoch [35/40], Step [20/196], Loss: 0.3915


 15%|█▌        | 30/196 [00:46<04:04,  1.48s/it]

Epoch [35/40], Step [30/196], Loss: 0.3884


 20%|██        | 40/196 [00:58<03:00,  1.16s/it]

Epoch [35/40], Step [40/196], Loss: 0.3849


 26%|██▌       | 50/196 [01:13<03:38,  1.50s/it]

Epoch [35/40], Step [50/196], Loss: 0.3840


 31%|███       | 60/196 [01:25<02:33,  1.13s/it]

Epoch [35/40], Step [60/196], Loss: 0.3957


 36%|███▌      | 70/196 [01:40<03:11,  1.52s/it]

Epoch [35/40], Step [70/196], Loss: 0.3924


 41%|████      | 80/196 [01:52<02:16,  1.18s/it]

Epoch [35/40], Step [80/196], Loss: 0.3805


 46%|████▌     | 90/196 [02:05<02:02,  1.16s/it]

Epoch [35/40], Step [90/196], Loss: 0.3825


 51%|█████     | 100/196 [02:18<01:52,  1.17s/it]

Epoch [35/40], Step [100/196], Loss: 0.3836


 56%|█████▌    | 110/196 [02:32<02:04,  1.44s/it]

Epoch [35/40], Step [110/196], Loss: 0.3835


 61%|██████    | 120/196 [02:45<01:29,  1.18s/it]

Epoch [35/40], Step [120/196], Loss: 0.3885


 66%|██████▋   | 130/196 [02:59<01:26,  1.31s/it]

Epoch [35/40], Step [130/196], Loss: 0.3885


 71%|███████▏  | 140/196 [03:12<01:07,  1.21s/it]

Epoch [35/40], Step [140/196], Loss: 0.3815


 77%|███████▋  | 150/196 [03:26<00:59,  1.29s/it]

Epoch [35/40], Step [150/196], Loss: 0.3813


 82%|████████▏ | 160/196 [03:39<00:42,  1.17s/it]

Epoch [35/40], Step [160/196], Loss: 0.3917


 87%|████████▋ | 170/196 [03:53<00:35,  1.38s/it]

Epoch [35/40], Step [170/196], Loss: 0.3883


 92%|█████████▏| 180/196 [04:06<00:19,  1.19s/it]

Epoch [35/40], Step [180/196], Loss: 0.3946


 97%|█████████▋| 190/196 [04:21<00:08,  1.34s/it]

Epoch [35/40], Step [190/196], Loss: 0.3910


100%|██████████| 196/196 [04:27<00:00,  1.37s/it]


Test Accuracy of the student model on the test images: 66.92 %


  5%|▌         | 10/196 [00:22<06:19,  2.04s/it]

Epoch [36/40], Step [10/196], Loss: 0.3768


 10%|█         | 20/196 [00:33<03:21,  1.15s/it]

Epoch [36/40], Step [20/196], Loss: 0.3635


 15%|█▌        | 30/196 [00:47<04:14,  1.53s/it]

Epoch [36/40], Step [30/196], Loss: 0.3666


 20%|██        | 40/196 [01:00<03:08,  1.21s/it]

Epoch [36/40], Step [40/196], Loss: 0.3724


 26%|██▌       | 50/196 [01:13<02:45,  1.13s/it]

Epoch [36/40], Step [50/196], Loss: 0.3672


 31%|███       | 60/196 [01:26<02:52,  1.27s/it]

Epoch [36/40], Step [60/196], Loss: 0.3649


 36%|███▌      | 70/196 [01:40<03:04,  1.47s/it]

Epoch [36/40], Step [70/196], Loss: 0.3638


 41%|████      | 80/196 [01:54<02:30,  1.30s/it]

Epoch [36/40], Step [80/196], Loss: 0.3678


 46%|████▌     | 90/196 [02:08<02:18,  1.31s/it]

Epoch [36/40], Step [90/196], Loss: 0.3682


 51%|█████     | 100/196 [02:21<02:01,  1.26s/it]

Epoch [36/40], Step [100/196], Loss: 0.3626


 56%|█████▌    | 110/196 [02:36<02:12,  1.54s/it]

Epoch [36/40], Step [110/196], Loss: 0.3588


 61%|██████    | 120/196 [02:48<01:37,  1.28s/it]

Epoch [36/40], Step [120/196], Loss: 0.3629


 66%|██████▋   | 130/196 [03:02<01:36,  1.47s/it]

Epoch [36/40], Step [130/196], Loss: 0.3673


 71%|███████▏  | 140/196 [03:16<01:10,  1.26s/it]

Epoch [36/40], Step [140/196], Loss: 0.3691


 77%|███████▋  | 150/196 [03:30<01:07,  1.46s/it]

Epoch [36/40], Step [150/196], Loss: 0.3698


 82%|████████▏ | 160/196 [03:43<00:46,  1.29s/it]

Epoch [36/40], Step [160/196], Loss: 0.3666


 87%|████████▋ | 170/196 [03:56<00:35,  1.35s/it]

Epoch [36/40], Step [170/196], Loss: 0.3741


 92%|█████████▏| 180/196 [04:10<00:20,  1.28s/it]

Epoch [36/40], Step [180/196], Loss: 0.3652


 97%|█████████▋| 190/196 [04:23<00:07,  1.29s/it]

Epoch [36/40], Step [190/196], Loss: 0.3720


100%|██████████| 196/196 [04:28<00:00,  1.37s/it]


Test Accuracy of the student model on the test images: 67.75 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:23<07:26,  2.40s/it]

Epoch [37/40], Step [10/196], Loss: 0.3592


 10%|█         | 20/196 [00:38<04:18,  1.47s/it]

Epoch [37/40], Step [20/196], Loss: 0.3542


 15%|█▌        | 30/196 [00:56<04:17,  1.55s/it]

Epoch [37/40], Step [30/196], Loss: 0.3540


 20%|██        | 40/196 [01:11<03:08,  1.21s/it]

Epoch [37/40], Step [40/196], Loss: 0.3537


 26%|██▌       | 50/196 [01:33<05:53,  2.42s/it]

Epoch [37/40], Step [50/196], Loss: 0.3489


 31%|███       | 60/196 [01:48<03:17,  1.45s/it]

Epoch [37/40], Step [60/196], Loss: 0.3494


 36%|███▌      | 70/196 [02:03<03:19,  1.59s/it]

Epoch [37/40], Step [70/196], Loss: 0.3492


 41%|████      | 80/196 [02:16<02:30,  1.29s/it]

Epoch [37/40], Step [80/196], Loss: 0.3516


 46%|████▌     | 90/196 [02:33<03:16,  1.86s/it]

Epoch [37/40], Step [90/196], Loss: 0.3419


 51%|█████     | 100/196 [02:47<02:13,  1.39s/it]

Epoch [37/40], Step [100/196], Loss: 0.3502


 56%|█████▌    | 110/196 [03:01<02:06,  1.47s/it]

Epoch [37/40], Step [110/196], Loss: 0.3594


 61%|██████    | 120/196 [03:14<01:40,  1.32s/it]

Epoch [37/40], Step [120/196], Loss: 0.3507


 66%|██████▋   | 130/196 [03:33<01:36,  1.46s/it]

Epoch [37/40], Step [130/196], Loss: 0.3588


 71%|███████▏  | 140/196 [03:47<01:08,  1.23s/it]

Epoch [37/40], Step [140/196], Loss: 0.3527


 77%|███████▋  | 150/196 [04:02<01:04,  1.40s/it]

Epoch [37/40], Step [150/196], Loss: 0.3469


 82%|████████▏ | 160/196 [04:16<00:46,  1.29s/it]

Epoch [37/40], Step [160/196], Loss: 0.3568


 87%|████████▋ | 170/196 [04:31<00:38,  1.46s/it]

Epoch [37/40], Step [170/196], Loss: 0.3512


 92%|█████████▏| 180/196 [04:44<00:19,  1.21s/it]

Epoch [37/40], Step [180/196], Loss: 0.3584


 97%|█████████▋| 190/196 [04:58<00:08,  1.35s/it]

Epoch [37/40], Step [190/196], Loss: 0.3554


100%|██████████| 196/196 [05:05<00:00,  1.56s/it]


Test Accuracy of the student model on the test images: 67.92 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:17<03:53,  1.25s/it]

Epoch [38/40], Step [10/196], Loss: 0.3401


 10%|█         | 20/196 [00:32<03:43,  1.27s/it]

Epoch [38/40], Step [20/196], Loss: 0.3418


 15%|█▌        | 30/196 [00:45<03:58,  1.43s/it]

Epoch [38/40], Step [30/196], Loss: 0.3419


 20%|██        | 40/196 [00:59<03:10,  1.22s/it]

Epoch [38/40], Step [40/196], Loss: 0.3358


 26%|██▌       | 50/196 [01:13<03:16,  1.35s/it]

Epoch [38/40], Step [50/196], Loss: 0.3315


 31%|███       | 60/196 [01:26<02:43,  1.20s/it]

Epoch [38/40], Step [60/196], Loss: 0.3362


 36%|███▌      | 70/196 [01:41<03:04,  1.47s/it]

Epoch [38/40], Step [70/196], Loss: 0.3410


 41%|████      | 80/196 [01:54<02:22,  1.23s/it]

Epoch [38/40], Step [80/196], Loss: 0.3399


 46%|████▌     | 90/196 [02:09<02:27,  1.39s/it]

Epoch [38/40], Step [90/196], Loss: 0.3367


 51%|█████     | 100/196 [02:22<01:56,  1.21s/it]

Epoch [38/40], Step [100/196], Loss: 0.3448


 56%|█████▌    | 110/196 [02:37<02:01,  1.42s/it]

Epoch [38/40], Step [110/196], Loss: 0.3406


 61%|██████    | 120/196 [02:50<01:31,  1.20s/it]

Epoch [38/40], Step [120/196], Loss: 0.3425


 66%|██████▋   | 130/196 [03:06<01:52,  1.71s/it]

Epoch [38/40], Step [130/196], Loss: 0.3349


 71%|███████▏  | 140/196 [03:18<01:09,  1.24s/it]

Epoch [38/40], Step [140/196], Loss: 0.3440


 77%|███████▋  | 150/196 [03:34<01:12,  1.58s/it]

Epoch [38/40], Step [150/196], Loss: 0.3460


 82%|████████▏ | 160/196 [03:48<00:48,  1.36s/it]

Epoch [38/40], Step [160/196], Loss: 0.3480


 87%|████████▋ | 170/196 [04:02<00:39,  1.52s/it]

Epoch [38/40], Step [170/196], Loss: 0.3402


 92%|█████████▏| 180/196 [04:15<00:20,  1.26s/it]

Epoch [38/40], Step [180/196], Loss: 0.3384


 97%|█████████▋| 190/196 [04:29<00:09,  1.52s/it]

Epoch [38/40], Step [190/196], Loss: 0.3463


100%|██████████| 196/196 [04:35<00:00,  1.40s/it]


Test Accuracy of the student model on the test images: 68.15 %
Saved best model to student_model_alg3.pth


  5%|▌         | 10/196 [00:21<04:23,  1.42s/it]

Epoch [39/40], Step [10/196], Loss: 0.3314


 10%|█         | 20/196 [00:35<03:55,  1.34s/it]

Epoch [39/40], Step [20/196], Loss: 0.3326


 15%|█▌        | 30/196 [00:48<03:50,  1.39s/it]

Epoch [39/40], Step [30/196], Loss: 0.3309


 20%|██        | 40/196 [00:59<03:00,  1.16s/it]

Epoch [39/40], Step [40/196], Loss: 0.3369


 26%|██▌       | 50/196 [01:13<03:28,  1.43s/it]

Epoch [39/40], Step [50/196], Loss: 0.3363


 31%|███       | 60/196 [01:24<02:29,  1.10s/it]

Epoch [39/40], Step [60/196], Loss: 0.3314


 36%|███▌      | 70/196 [01:37<02:38,  1.26s/it]

Epoch [39/40], Step [70/196], Loss: 0.3328


 41%|████      | 80/196 [01:49<02:11,  1.13s/it]

Epoch [39/40], Step [80/196], Loss: 0.3310


 46%|████▌     | 90/196 [02:02<02:00,  1.13s/it]

Epoch [39/40], Step [90/196], Loss: 0.3426


 51%|█████     | 100/196 [02:15<02:02,  1.28s/it]

Epoch [39/40], Step [100/196], Loss: 0.3392


 56%|█████▌    | 110/196 [02:27<01:39,  1.15s/it]

Epoch [39/40], Step [110/196], Loss: 0.3396


 61%|██████    | 120/196 [02:42<01:40,  1.32s/it]

Epoch [39/40], Step [120/196], Loss: 0.3408


 66%|██████▋   | 130/196 [02:56<01:20,  1.22s/it]

Epoch [39/40], Step [130/196], Loss: 0.3352


 71%|███████▏  | 140/196 [03:09<01:09,  1.24s/it]

Epoch [39/40], Step [140/196], Loss: 0.3334


 77%|███████▋  | 150/196 [03:22<00:54,  1.18s/it]

Epoch [39/40], Step [150/196], Loss: 0.3303


 82%|████████▏ | 160/196 [03:36<00:47,  1.32s/it]

Epoch [39/40], Step [160/196], Loss: 0.3413


 87%|████████▋ | 170/196 [03:50<00:31,  1.20s/it]

Epoch [39/40], Step [170/196], Loss: 0.3373


 92%|█████████▏| 180/196 [04:04<00:19,  1.23s/it]

Epoch [39/40], Step [180/196], Loss: 0.3392


 97%|█████████▋| 190/196 [04:18<00:07,  1.19s/it]

Epoch [39/40], Step [190/196], Loss: 0.3297


100%|██████████| 196/196 [04:23<00:00,  1.35s/it]


Test Accuracy of the student model on the test images: 67.95 %


  5%|▌         | 10/196 [00:20<05:15,  1.70s/it]

Epoch [40/40], Step [10/196], Loss: 0.3286


 10%|█         | 20/196 [00:32<03:52,  1.32s/it]

Epoch [40/40], Step [20/196], Loss: 0.3281


 15%|█▌        | 30/196 [00:47<04:26,  1.61s/it]

Epoch [40/40], Step [30/196], Loss: 0.3329


 20%|██        | 40/196 [01:00<03:32,  1.36s/it]

Epoch [40/40], Step [40/196], Loss: 0.3292


 26%|██▌       | 50/196 [01:15<04:02,  1.66s/it]

Epoch [40/40], Step [50/196], Loss: 0.3310


 31%|███       | 60/196 [01:28<03:07,  1.38s/it]

Epoch [40/40], Step [60/196], Loss: 0.3314


 36%|███▌      | 70/196 [01:42<03:13,  1.53s/it]

Epoch [40/40], Step [70/196], Loss: 0.3302


 41%|████      | 80/196 [01:56<02:42,  1.40s/it]

Epoch [40/40], Step [80/196], Loss: 0.3255


 46%|████▌     | 90/196 [02:10<02:42,  1.53s/it]

Epoch [40/40], Step [90/196], Loss: 0.3281


 51%|█████     | 100/196 [02:23<02:08,  1.34s/it]

Epoch [40/40], Step [100/196], Loss: 0.3304


 56%|█████▌    | 110/196 [02:38<02:18,  1.61s/it]

Epoch [40/40], Step [110/196], Loss: 0.3244


 61%|██████    | 120/196 [02:50<01:41,  1.34s/it]

Epoch [40/40], Step [120/196], Loss: 0.3256


 66%|██████▋   | 130/196 [03:04<01:42,  1.56s/it]

Epoch [40/40], Step [130/196], Loss: 0.3311


 71%|███████▏  | 140/196 [03:18<01:15,  1.35s/it]

Epoch [40/40], Step [140/196], Loss: 0.3325


 77%|███████▋  | 150/196 [03:32<01:09,  1.52s/it]

Epoch [40/40], Step [150/196], Loss: 0.3321


 82%|████████▏ | 160/196 [03:45<00:48,  1.34s/it]

Epoch [40/40], Step [160/196], Loss: 0.3284


 87%|████████▋ | 170/196 [03:59<00:35,  1.37s/it]

Epoch [40/40], Step [170/196], Loss: 0.3264


 92%|█████████▏| 180/196 [04:13<00:22,  1.38s/it]

Epoch [40/40], Step [180/196], Loss: 0.3240


 97%|█████████▋| 190/196 [04:27<00:08,  1.42s/it]

Epoch [40/40], Step [190/196], Loss: 0.3279


100%|██████████| 196/196 [04:33<00:00,  1.39s/it]


Test Accuracy of the student model on the test images: 67.90 %
Best Accuracy: 68.15 %
Student model (Algorithm 3) saved to student_model_alg3.pth with best accuracy: 68.15%
Training Student Model (ResNet-18) with both Teacher and TA Models (Simple Average, Algorithm 2)
Adjusted learning rate: 0.2


  scaler = GradScaler()
  with autocast():
  5%|▌         | 10/196 [00:28<07:04,  2.28s/it]

Epoch [1/40], Step [10/196], Loss: 8.4691


 10%|█         | 20/196 [00:50<06:17,  2.15s/it]

Epoch [1/40], Step [20/196], Loss: 7.8405


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [1/40], Step [30/196], Loss: 7.5303


 20%|██        | 40/196 [01:31<05:24,  2.08s/it]

Epoch [1/40], Step [40/196], Loss: 7.4671


 26%|██▌       | 50/196 [01:52<05:08,  2.11s/it]

Epoch [1/40], Step [50/196], Loss: 7.1708


 31%|███       | 60/196 [02:14<04:50,  2.14s/it]

Epoch [1/40], Step [60/196], Loss: 7.1808


 36%|███▌      | 70/196 [02:35<04:26,  2.12s/it]

Epoch [1/40], Step [70/196], Loss: 7.0180


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [1/40], Step [80/196], Loss: 6.9987


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [1/40], Step [90/196], Loss: 6.8556


 51%|█████     | 100/196 [03:38<03:23,  2.12s/it]

Epoch [1/40], Step [100/196], Loss: 6.7181


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [1/40], Step [110/196], Loss: 6.5203


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [1/40], Step [120/196], Loss: 6.5303


 66%|██████▋   | 130/196 [04:41<02:19,  2.11s/it]

Epoch [1/40], Step [130/196], Loss: 6.2798


 71%|███████▏  | 140/196 [05:02<01:58,  2.11s/it]

Epoch [1/40], Step [140/196], Loss: 6.4525


 77%|███████▋  | 150/196 [05:24<01:37,  2.11s/it]

Epoch [1/40], Step [150/196], Loss: 6.1826


 82%|████████▏ | 160/196 [05:45<01:16,  2.11s/it]

Epoch [1/40], Step [160/196], Loss: 6.1218


 87%|████████▋ | 170/196 [06:06<00:54,  2.11s/it]

Epoch [1/40], Step [170/196], Loss: 6.0343


 92%|█████████▏| 180/196 [06:27<00:33,  2.11s/it]

Epoch [1/40], Step [180/196], Loss: 5.9597


 97%|█████████▋| 190/196 [06:48<00:12,  2.11s/it]

Epoch [1/40], Step [190/196], Loss: 5.8745


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]
  with autocast():


Test Accuracy of the student model on the test images: 10.40 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<07:01,  2.26s/it]

Epoch [2/40], Step [10/196], Loss: 5.7477


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [2/40], Step [20/196], Loss: 5.5831


 15%|█▌        | 30/196 [01:10<05:47,  2.09s/it]

Epoch [2/40], Step [30/196], Loss: 5.6207


 20%|██        | 40/196 [01:31<05:24,  2.08s/it]

Epoch [2/40], Step [40/196], Loss: 5.4663


 26%|██▌       | 50/196 [01:52<05:08,  2.11s/it]

Epoch [2/40], Step [50/196], Loss: 5.3270


 31%|███       | 60/196 [02:13<04:50,  2.14s/it]

Epoch [2/40], Step [60/196], Loss: 5.3363


 36%|███▌      | 70/196 [02:34<04:28,  2.13s/it]

Epoch [2/40], Step [70/196], Loss: 5.2017


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [2/40], Step [80/196], Loss: 5.1331


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [2/40], Step [90/196], Loss: 5.2123


 51%|█████     | 100/196 [03:37<03:24,  2.13s/it]

Epoch [2/40], Step [100/196], Loss: 5.0938


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [2/40], Step [110/196], Loss: 5.0843


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [2/40], Step [120/196], Loss: 5.0940


 66%|██████▋   | 130/196 [04:41<02:19,  2.11s/it]

Epoch [2/40], Step [130/196], Loss: 5.0008


 71%|███████▏  | 140/196 [05:02<01:58,  2.11s/it]

Epoch [2/40], Step [140/196], Loss: 4.9558


 77%|███████▋  | 150/196 [05:23<01:37,  2.11s/it]

Epoch [2/40], Step [150/196], Loss: 4.8409


 82%|████████▏ | 160/196 [05:44<01:16,  2.12s/it]

Epoch [2/40], Step [160/196], Loss: 4.7290


 87%|████████▋ | 170/196 [06:05<00:55,  2.12s/it]

Epoch [2/40], Step [170/196], Loss: 4.8173


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [2/40], Step [180/196], Loss: 4.7116


 97%|█████████▋| 190/196 [06:48<00:12,  2.11s/it]

Epoch [2/40], Step [190/196], Loss: 4.7361


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 17.46 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [3/40], Step [10/196], Loss: 4.6463


 10%|█         | 20/196 [00:50<06:22,  2.17s/it]

Epoch [3/40], Step [20/196], Loss: 4.5331


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [3/40], Step [30/196], Loss: 4.4053


 20%|██        | 40/196 [01:31<05:23,  2.08s/it]

Epoch [3/40], Step [40/196], Loss: 4.3725


 26%|██▌       | 50/196 [01:52<05:07,  2.11s/it]

Epoch [3/40], Step [50/196], Loss: 4.3796


 31%|███       | 60/196 [02:14<04:51,  2.14s/it]

Epoch [3/40], Step [60/196], Loss: 4.2525


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [3/40], Step [70/196], Loss: 4.3146


 41%|████      | 80/196 [02:56<04:02,  2.09s/it]

Epoch [3/40], Step [80/196], Loss: 4.1939


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [3/40], Step [90/196], Loss: 4.1554


 51%|█████     | 100/196 [03:38<03:23,  2.12s/it]

Epoch [3/40], Step [100/196], Loss: 4.1051


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [3/40], Step [110/196], Loss: 4.0596


 61%|██████    | 120/196 [04:21<02:40,  2.11s/it]

Epoch [3/40], Step [120/196], Loss: 4.0776


 66%|██████▋   | 130/196 [04:42<02:18,  2.10s/it]

Epoch [3/40], Step [130/196], Loss: 4.0479


 71%|███████▏  | 140/196 [05:03<01:57,  2.10s/it]

Epoch [3/40], Step [140/196], Loss: 4.1099


 77%|███████▋  | 150/196 [05:24<01:36,  2.11s/it]

Epoch [3/40], Step [150/196], Loss: 4.0014


 82%|████████▏ | 160/196 [05:45<01:15,  2.11s/it]

Epoch [3/40], Step [160/196], Loss: 3.9653


 87%|████████▋ | 170/196 [06:06<00:55,  2.12s/it]

Epoch [3/40], Step [170/196], Loss: 3.9408


 92%|█████████▏| 180/196 [06:27<00:33,  2.12s/it]

Epoch [3/40], Step [180/196], Loss: 3.8934


 97%|█████████▋| 190/196 [06:48<00:12,  2.11s/it]

Epoch [3/40], Step [190/196], Loss: 3.8159


100%|██████████| 196/196 [07:00<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 21.59 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<07:04,  2.28s/it]

Epoch [4/40], Step [10/196], Loss: 3.8327


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [4/40], Step [20/196], Loss: 3.6403


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [4/40], Step [30/196], Loss: 3.5587


 20%|██        | 40/196 [01:30<05:23,  2.08s/it]

Epoch [4/40], Step [40/196], Loss: 3.5999


 26%|██▌       | 50/196 [01:51<05:09,  2.12s/it]

Epoch [4/40], Step [50/196], Loss: 3.5578


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [4/40], Step [60/196], Loss: 3.5101


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [4/40], Step [70/196], Loss: 3.4663


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [4/40], Step [80/196], Loss: 3.4768


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [4/40], Step [90/196], Loss: 3.4842


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [4/40], Step [100/196], Loss: 3.4529


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [4/40], Step [110/196], Loss: 3.4870


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [4/40], Step [120/196], Loss: 3.4071


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [4/40], Step [130/196], Loss: 3.3525


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [4/40], Step [140/196], Loss: 3.4381


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [4/40], Step [150/196], Loss: 3.3195


 82%|████████▏ | 160/196 [05:43<01:16,  2.12s/it]

Epoch [4/40], Step [160/196], Loss: 3.2550


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [4/40], Step [170/196], Loss: 3.3597


 92%|█████████▏| 180/196 [06:26<00:33,  2.12s/it]

Epoch [4/40], Step [180/196], Loss: 3.2463


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [4/40], Step [190/196], Loss: 3.2041


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 30.24 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:56,  2.24s/it]

Epoch [5/40], Step [10/196], Loss: 3.1777


 10%|█         | 20/196 [00:48<06:21,  2.17s/it]

Epoch [5/40], Step [20/196], Loss: 3.0436


 15%|█▌        | 30/196 [01:09<05:46,  2.09s/it]

Epoch [5/40], Step [30/196], Loss: 2.9807


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [5/40], Step [40/196], Loss: 3.0037


 26%|██▌       | 50/196 [01:51<05:07,  2.11s/it]

Epoch [5/40], Step [50/196], Loss: 2.9870


 31%|███       | 60/196 [02:12<04:50,  2.13s/it]

Epoch [5/40], Step [60/196], Loss: 3.0420


 36%|███▌      | 70/196 [02:33<04:27,  2.12s/it]

Epoch [5/40], Step [70/196], Loss: 2.9625


 41%|████      | 80/196 [02:54<04:03,  2.10s/it]

Epoch [5/40], Step [80/196], Loss: 2.9575


 46%|████▌     | 90/196 [03:15<03:41,  2.09s/it]

Epoch [5/40], Step [90/196], Loss: 2.9686


 51%|█████     | 100/196 [03:36<03:22,  2.11s/it]

Epoch [5/40], Step [100/196], Loss: 2.9912


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [5/40], Step [110/196], Loss: 2.9111


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [5/40], Step [120/196], Loss: 2.9399


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [5/40], Step [130/196], Loss: 2.8718


 71%|███████▏  | 140/196 [05:01<01:58,  2.11s/it]

Epoch [5/40], Step [140/196], Loss: 2.9102


 77%|███████▋  | 150/196 [05:22<01:37,  2.11s/it]

Epoch [5/40], Step [150/196], Loss: 2.8694


 82%|████████▏ | 160/196 [05:43<01:16,  2.11s/it]

Epoch [5/40], Step [160/196], Loss: 2.8545


 87%|████████▋ | 170/196 [06:04<00:54,  2.10s/it]

Epoch [5/40], Step [170/196], Loss: 2.8180


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [5/40], Step [180/196], Loss: 2.8244


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [5/40], Step [190/196], Loss: 2.7899


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 29.53 %


  5%|▌         | 10/196 [00:29<07:07,  2.30s/it]

Epoch [6/40], Step [10/196], Loss: 2.7664


 10%|█         | 20/196 [00:51<06:22,  2.17s/it]

Epoch [6/40], Step [20/196], Loss: 2.6390


 15%|█▌        | 30/196 [01:12<05:48,  2.10s/it]

Epoch [6/40], Step [30/196], Loss: 2.6010


 20%|██        | 40/196 [01:33<05:24,  2.08s/it]

Epoch [6/40], Step [40/196], Loss: 2.6031


 26%|██▌       | 50/196 [01:54<05:10,  2.12s/it]

Epoch [6/40], Step [50/196], Loss: 2.6468


 31%|███       | 60/196 [02:16<04:49,  2.13s/it]

Epoch [6/40], Step [60/196], Loss: 2.6225


 36%|███▌      | 70/196 [02:37<04:27,  2.13s/it]

Epoch [6/40], Step [70/196], Loss: 2.6292


 41%|████      | 80/196 [02:58<04:05,  2.11s/it]

Epoch [6/40], Step [80/196], Loss: 2.6328


 46%|████▌     | 90/196 [03:19<03:43,  2.11s/it]

Epoch [6/40], Step [90/196], Loss: 2.6376


 51%|█████     | 100/196 [03:40<03:24,  2.13s/it]

Epoch [6/40], Step [100/196], Loss: 2.6180


 56%|█████▌    | 110/196 [04:02<03:03,  2.13s/it]

Epoch [6/40], Step [110/196], Loss: 2.6085


 61%|██████    | 120/196 [04:23<02:41,  2.12s/it]

Epoch [6/40], Step [120/196], Loss: 2.5809


 66%|██████▋   | 130/196 [04:44<02:19,  2.11s/it]

Epoch [6/40], Step [130/196], Loss: 2.6425


 71%|███████▏  | 140/196 [05:05<01:58,  2.11s/it]

Epoch [6/40], Step [140/196], Loss: 2.5590


 77%|███████▋  | 150/196 [05:26<01:37,  2.12s/it]

Epoch [6/40], Step [150/196], Loss: 2.5473


 82%|████████▏ | 160/196 [05:47<01:16,  2.12s/it]

Epoch [6/40], Step [160/196], Loss: 2.5100


 87%|████████▋ | 170/196 [06:09<00:55,  2.12s/it]

Epoch [6/40], Step [170/196], Loss: 2.5032


 92%|█████████▏| 180/196 [06:30<00:34,  2.13s/it]

Epoch [6/40], Step [180/196], Loss: 2.4302


 97%|█████████▋| 190/196 [06:51<00:12,  2.13s/it]

Epoch [6/40], Step [190/196], Loss: 2.4528


100%|██████████| 196/196 [07:03<00:00,  2.16s/it]


Test Accuracy of the student model on the test images: 36.97 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<06:56,  2.24s/it]

Epoch [7/40], Step [10/196], Loss: 2.3771


 10%|█         | 20/196 [00:49<06:21,  2.17s/it]

Epoch [7/40], Step [20/196], Loss: 2.3471


 15%|█▌        | 30/196 [01:10<05:46,  2.09s/it]

Epoch [7/40], Step [30/196], Loss: 2.3456


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [7/40], Step [40/196], Loss: 2.3443


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [7/40], Step [50/196], Loss: 2.3425


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [7/40], Step [60/196], Loss: 2.3643


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [7/40], Step [70/196], Loss: 2.3500


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [7/40], Step [80/196], Loss: 2.3440


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [7/40], Step [90/196], Loss: 2.3423


 51%|█████     | 100/196 [03:37<03:20,  2.09s/it]

Epoch [7/40], Step [100/196], Loss: 2.3039


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [7/40], Step [110/196], Loss: 2.2888


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [7/40], Step [120/196], Loss: 2.2676


 66%|██████▋   | 130/196 [04:40<02:19,  2.11s/it]

Epoch [7/40], Step [130/196], Loss: 2.2531


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [7/40], Step [140/196], Loss: 2.2717


 77%|███████▋  | 150/196 [05:22<01:36,  2.09s/it]

Epoch [7/40], Step [150/196], Loss: 2.2960


 82%|████████▏ | 160/196 [05:43<01:15,  2.10s/it]

Epoch [7/40], Step [160/196], Loss: 2.2715


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [7/40], Step [170/196], Loss: 2.3414


 92%|█████████▏| 180/196 [06:25<00:33,  2.12s/it]

Epoch [7/40], Step [180/196], Loss: 2.3646


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [7/40], Step [190/196], Loss: 2.2479


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 39.47 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:59,  2.25s/it]

Epoch [8/40], Step [10/196], Loss: 2.1520


 10%|█         | 20/196 [00:48<06:21,  2.17s/it]

Epoch [8/40], Step [20/196], Loss: 2.1500


 15%|█▌        | 30/196 [01:09<05:45,  2.08s/it]

Epoch [8/40], Step [30/196], Loss: 2.1507


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [8/40], Step [40/196], Loss: 2.1477


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [8/40], Step [50/196], Loss: 2.0852


 31%|███       | 60/196 [02:12<04:48,  2.12s/it]

Epoch [8/40], Step [60/196], Loss: 2.0265


 36%|███▌      | 70/196 [02:33<04:26,  2.11s/it]

Epoch [8/40], Step [70/196], Loss: 2.0683


 41%|████      | 80/196 [02:54<04:03,  2.10s/it]

Epoch [8/40], Step [80/196], Loss: 2.0923


 46%|████▌     | 90/196 [03:15<03:41,  2.09s/it]

Epoch [8/40], Step [90/196], Loss: 2.0589


 51%|█████     | 100/196 [03:36<03:22,  2.11s/it]

Epoch [8/40], Step [100/196], Loss: 2.1188


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [8/40], Step [110/196], Loss: 2.1615


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [8/40], Step [120/196], Loss: 2.1006


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [8/40], Step [130/196], Loss: 2.0801


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [8/40], Step [140/196], Loss: 2.1136


 77%|███████▋  | 150/196 [05:22<01:36,  2.10s/it]

Epoch [8/40], Step [150/196], Loss: 2.1622


 82%|████████▏ | 160/196 [05:43<01:15,  2.10s/it]

Epoch [8/40], Step [160/196], Loss: 2.0853


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [8/40], Step [170/196], Loss: 2.1074


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [8/40], Step [180/196], Loss: 2.1210


 97%|█████████▋| 190/196 [06:46<00:12,  2.12s/it]

Epoch [8/40], Step [190/196], Loss: 2.1154


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 30.75 %


  5%|▌         | 10/196 [00:27<07:00,  2.26s/it]

Epoch [9/40], Step [10/196], Loss: 1.9885


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [9/40], Step [20/196], Loss: 1.9211


 15%|█▌        | 30/196 [01:10<05:44,  2.08s/it]

Epoch [9/40], Step [30/196], Loss: 1.9274


 20%|██        | 40/196 [01:30<05:23,  2.08s/it]

Epoch [9/40], Step [40/196], Loss: 1.9756


 26%|██▌       | 50/196 [01:51<05:05,  2.09s/it]

Epoch [9/40], Step [50/196], Loss: 2.0046


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [9/40], Step [60/196], Loss: 1.9697


 36%|███▌      | 70/196 [02:34<04:26,  2.12s/it]

Epoch [9/40], Step [70/196], Loss: 1.9109


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [9/40], Step [80/196], Loss: 1.9459


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [9/40], Step [90/196], Loss: 1.8809


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [9/40], Step [100/196], Loss: 1.9376


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [9/40], Step [110/196], Loss: 1.9302


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [9/40], Step [120/196], Loss: 1.9139


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [9/40], Step [130/196], Loss: 1.9026


 71%|███████▏  | 140/196 [05:01<01:57,  2.09s/it]

Epoch [9/40], Step [140/196], Loss: 1.9889


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [9/40], Step [150/196], Loss: 1.9149


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [9/40], Step [160/196], Loss: 1.9559


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [9/40], Step [170/196], Loss: 1.9224


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [9/40], Step [180/196], Loss: 1.9700


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [9/40], Step [190/196], Loss: 2.0214


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 38.86 %


  5%|▌         | 10/196 [00:30<07:05,  2.29s/it]

Epoch [10/40], Step [10/196], Loss: 1.8339


 10%|█         | 20/196 [00:52<06:20,  2.16s/it]

Epoch [10/40], Step [20/196], Loss: 1.8212


 15%|█▌        | 30/196 [01:13<05:47,  2.09s/it]

Epoch [10/40], Step [30/196], Loss: 1.8230


 20%|██        | 40/196 [01:33<05:22,  2.07s/it]

Epoch [10/40], Step [40/196], Loss: 1.8604


 26%|██▌       | 50/196 [01:54<05:06,  2.10s/it]

Epoch [10/40], Step [50/196], Loss: 1.8048


 31%|███       | 60/196 [02:15<04:48,  2.12s/it]

Epoch [10/40], Step [60/196], Loss: 1.7651


 36%|███▌      | 70/196 [02:37<04:25,  2.11s/it]

Epoch [10/40], Step [70/196], Loss: 1.7836


 41%|████      | 80/196 [02:58<04:03,  2.10s/it]

Epoch [10/40], Step [80/196], Loss: 1.7924


 46%|████▌     | 90/196 [03:19<03:42,  2.10s/it]

Epoch [10/40], Step [90/196], Loss: 1.7991


 51%|█████     | 100/196 [03:40<03:22,  2.11s/it]

Epoch [10/40], Step [100/196], Loss: 1.8175


 56%|█████▌    | 110/196 [04:01<03:01,  2.11s/it]

Epoch [10/40], Step [110/196], Loss: 1.8155


 61%|██████    | 120/196 [04:22<02:39,  2.10s/it]

Epoch [10/40], Step [120/196], Loss: 1.8180


 66%|██████▋   | 130/196 [04:43<02:18,  2.10s/it]

Epoch [10/40], Step [130/196], Loss: 1.7981


 71%|███████▏  | 140/196 [05:04<01:57,  2.10s/it]

Epoch [10/40], Step [140/196], Loss: 1.8117


 77%|███████▋  | 150/196 [05:25<01:36,  2.10s/it]

Epoch [10/40], Step [150/196], Loss: 1.8378


 82%|████████▏ | 160/196 [05:46<01:15,  2.09s/it]

Epoch [10/40], Step [160/196], Loss: 1.8145


 87%|████████▋ | 170/196 [06:07<00:54,  2.10s/it]

Epoch [10/40], Step [170/196], Loss: 1.8348


 92%|█████████▏| 180/196 [06:28<00:33,  2.10s/it]

Epoch [10/40], Step [180/196], Loss: 1.8445


 97%|█████████▋| 190/196 [06:49<00:12,  2.10s/it]

Epoch [10/40], Step [190/196], Loss: 1.8270


100%|██████████| 196/196 [07:00<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 41.03 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<06:58,  2.25s/it]

Epoch [11/40], Step [10/196], Loss: 1.7768


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [11/40], Step [20/196], Loss: 1.7274


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [11/40], Step [30/196], Loss: 1.6776


 20%|██        | 40/196 [01:30<05:24,  2.08s/it]

Epoch [11/40], Step [40/196], Loss: 1.6581


 26%|██▌       | 50/196 [01:51<05:07,  2.11s/it]

Epoch [11/40], Step [50/196], Loss: 1.6917


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [11/40], Step [60/196], Loss: 1.6808


 36%|███▌      | 70/196 [02:34<04:28,  2.13s/it]

Epoch [11/40], Step [70/196], Loss: 1.6903


 41%|████      | 80/196 [02:55<04:04,  2.11s/it]

Epoch [11/40], Step [80/196], Loss: 1.7171


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [11/40], Step [90/196], Loss: 1.7046


 51%|█████     | 100/196 [03:37<03:23,  2.12s/it]

Epoch [11/40], Step [100/196], Loss: 1.7224


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [11/40], Step [110/196], Loss: 1.7459


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [11/40], Step [120/196], Loss: 1.7484


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [11/40], Step [130/196], Loss: 1.7675


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [11/40], Step [140/196], Loss: 1.7095


 77%|███████▋  | 150/196 [05:22<01:36,  2.10s/it]

Epoch [11/40], Step [150/196], Loss: 1.7336


 82%|████████▏ | 160/196 [05:44<01:16,  2.12s/it]

Epoch [11/40], Step [160/196], Loss: 1.7691


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [11/40], Step [170/196], Loss: 1.7653


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [11/40], Step [180/196], Loss: 1.7560


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [11/40], Step [190/196], Loss: 1.7295


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 42.67 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:28<06:59,  2.26s/it]

Epoch [12/40], Step [10/196], Loss: 1.6939


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [12/40], Step [20/196], Loss: 1.6187


 15%|█▌        | 30/196 [01:10<05:44,  2.08s/it]

Epoch [12/40], Step [30/196], Loss: 1.5996


 20%|██        | 40/196 [01:31<05:21,  2.06s/it]

Epoch [12/40], Step [40/196], Loss: 1.6354


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [12/40], Step [50/196], Loss: 1.6383


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [12/40], Step [60/196], Loss: 1.6262


 36%|███▌      | 70/196 [02:34<04:26,  2.11s/it]

Epoch [12/40], Step [70/196], Loss: 1.6283


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [12/40], Step [80/196], Loss: 1.6060


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [12/40], Step [90/196], Loss: 1.6136


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [12/40], Step [100/196], Loss: 1.6182


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [12/40], Step [110/196], Loss: 1.6240


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [12/40], Step [120/196], Loss: 1.6137


 66%|██████▋   | 130/196 [04:40<02:18,  2.09s/it]

Epoch [12/40], Step [130/196], Loss: 1.6258


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [12/40], Step [140/196], Loss: 1.6402


 77%|███████▋  | 150/196 [05:22<01:36,  2.10s/it]

Epoch [12/40], Step [150/196], Loss: 1.6436


 82%|████████▏ | 160/196 [05:43<01:16,  2.11s/it]

Epoch [12/40], Step [160/196], Loss: 1.6402


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [12/40], Step [170/196], Loss: 1.6615


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [12/40], Step [180/196], Loss: 1.6236


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [12/40], Step [190/196], Loss: 1.6315


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 44.88 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<07:00,  2.26s/it]

Epoch [13/40], Step [10/196], Loss: 1.5271


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [13/40], Step [20/196], Loss: 1.5132


 15%|█▌        | 30/196 [01:10<05:44,  2.08s/it]

Epoch [13/40], Step [30/196], Loss: 1.5155


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [13/40], Step [40/196], Loss: 1.5091


 26%|██▌       | 50/196 [01:51<05:05,  2.10s/it]

Epoch [13/40], Step [50/196], Loss: 1.4891


 31%|███       | 60/196 [02:12<04:48,  2.12s/it]

Epoch [13/40], Step [60/196], Loss: 1.4957


 36%|███▌      | 70/196 [02:34<04:28,  2.13s/it]

Epoch [13/40], Step [70/196], Loss: 1.5204


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [13/40], Step [80/196], Loss: 1.5403


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [13/40], Step [90/196], Loss: 1.5171


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [13/40], Step [100/196], Loss: 1.5420


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [13/40], Step [110/196], Loss: 1.5315


 61%|██████    | 120/196 [04:19<02:39,  2.11s/it]

Epoch [13/40], Step [120/196], Loss: 1.5450


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [13/40], Step [130/196], Loss: 1.5742


 71%|███████▏  | 140/196 [05:01<01:58,  2.11s/it]

Epoch [13/40], Step [140/196], Loss: 1.5672


 77%|███████▋  | 150/196 [05:22<01:37,  2.11s/it]

Epoch [13/40], Step [150/196], Loss: 1.5544


 82%|████████▏ | 160/196 [05:43<01:16,  2.11s/it]

Epoch [13/40], Step [160/196], Loss: 1.6232


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [13/40], Step [170/196], Loss: 1.5409


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [13/40], Step [180/196], Loss: 1.5595


 97%|█████████▋| 190/196 [06:46<00:12,  2.11s/it]

Epoch [13/40], Step [190/196], Loss: 1.5887


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 48.35 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:28<07:05,  2.29s/it]

Epoch [14/40], Step [10/196], Loss: 1.4437


 10%|█         | 20/196 [00:50<06:21,  2.16s/it]

Epoch [14/40], Step [20/196], Loss: 1.4359


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [14/40], Step [30/196], Loss: 1.4249


 20%|██        | 40/196 [01:31<05:21,  2.06s/it]

Epoch [14/40], Step [40/196], Loss: 1.4164


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [14/40], Step [50/196], Loss: 1.4683


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [14/40], Step [60/196], Loss: 1.4498


 36%|███▌      | 70/196 [02:35<04:27,  2.13s/it]

Epoch [14/40], Step [70/196], Loss: 1.4213


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [14/40], Step [80/196], Loss: 1.4475


 46%|████▌     | 90/196 [03:17<03:41,  2.09s/it]

Epoch [14/40], Step [90/196], Loss: 1.4246


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [14/40], Step [100/196], Loss: 1.4004


 56%|█████▌    | 110/196 [03:59<03:01,  2.12s/it]

Epoch [14/40], Step [110/196], Loss: 1.4465


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [14/40], Step [120/196], Loss: 1.4508


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [14/40], Step [130/196], Loss: 1.4758


 71%|███████▏  | 140/196 [05:02<01:58,  2.11s/it]

Epoch [14/40], Step [140/196], Loss: 1.4889


 77%|███████▋  | 150/196 [05:23<01:36,  2.11s/it]

Epoch [14/40], Step [150/196], Loss: 1.4668


 82%|████████▏ | 160/196 [05:44<01:15,  2.11s/it]

Epoch [14/40], Step [160/196], Loss: 1.4628


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [14/40], Step [170/196], Loss: 1.4860


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [14/40], Step [180/196], Loss: 1.4573


 97%|█████████▋| 190/196 [06:47<00:12,  2.10s/it]

Epoch [14/40], Step [190/196], Loss: 1.4765


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 50.50 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:25<06:55,  2.24s/it]

Epoch [15/40], Step [10/196], Loss: 1.3758


 10%|█         | 20/196 [00:47<06:18,  2.15s/it]

Epoch [15/40], Step [20/196], Loss: 1.3936


 15%|█▌        | 30/196 [01:08<05:46,  2.09s/it]

Epoch [15/40], Step [30/196], Loss: 1.3281


 20%|██        | 40/196 [01:29<05:22,  2.07s/it]

Epoch [15/40], Step [40/196], Loss: 1.3206


 26%|██▌       | 50/196 [01:50<05:06,  2.10s/it]

Epoch [15/40], Step [50/196], Loss: 1.3403


 31%|███       | 60/196 [02:11<04:49,  2.13s/it]

Epoch [15/40], Step [60/196], Loss: 1.3370


 36%|███▌      | 70/196 [02:32<04:27,  2.12s/it]

Epoch [15/40], Step [70/196], Loss: 1.3498


 41%|████      | 80/196 [02:53<04:04,  2.10s/it]

Epoch [15/40], Step [80/196], Loss: 1.3219


 46%|████▌     | 90/196 [03:14<03:41,  2.09s/it]

Epoch [15/40], Step [90/196], Loss: 1.3536


 51%|█████     | 100/196 [03:35<03:22,  2.11s/it]

Epoch [15/40], Step [100/196], Loss: 1.3886


 56%|█████▌    | 110/196 [03:56<03:01,  2.11s/it]

Epoch [15/40], Step [110/196], Loss: 1.3527


 61%|██████    | 120/196 [04:18<02:41,  2.12s/it]

Epoch [15/40], Step [120/196], Loss: 1.3504


 66%|██████▋   | 130/196 [04:39<02:19,  2.11s/it]

Epoch [15/40], Step [130/196], Loss: 1.3748


 71%|███████▏  | 140/196 [05:00<01:58,  2.11s/it]

Epoch [15/40], Step [140/196], Loss: 1.3975


 77%|███████▋  | 150/196 [05:21<01:36,  2.11s/it]

Epoch [15/40], Step [150/196], Loss: 1.4069


 82%|████████▏ | 160/196 [05:42<01:15,  2.10s/it]

Epoch [15/40], Step [160/196], Loss: 1.3884


 87%|████████▋ | 170/196 [06:03<00:54,  2.10s/it]

Epoch [15/40], Step [170/196], Loss: 1.4082


 92%|█████████▏| 180/196 [06:24<00:33,  2.10s/it]

Epoch [15/40], Step [180/196], Loss: 1.3930


 97%|█████████▋| 190/196 [06:45<00:12,  2.10s/it]

Epoch [15/40], Step [190/196], Loss: 1.4157


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 47.86 %


  5%|▌         | 10/196 [00:26<06:57,  2.24s/it]

Epoch [16/40], Step [10/196], Loss: 1.3472


 10%|█         | 20/196 [00:48<06:20,  2.16s/it]

Epoch [16/40], Step [20/196], Loss: 1.2734


 15%|█▌        | 30/196 [01:09<05:44,  2.08s/it]

Epoch [16/40], Step [30/196], Loss: 1.2662


 20%|██        | 40/196 [01:30<05:21,  2.06s/it]

Epoch [16/40], Step [40/196], Loss: 1.2721


 26%|██▌       | 50/196 [01:51<05:07,  2.10s/it]

Epoch [16/40], Step [50/196], Loss: 1.2556


 31%|███       | 60/196 [02:12<04:50,  2.13s/it]

Epoch [16/40], Step [60/196], Loss: 1.2506


 36%|███▌      | 70/196 [02:33<04:26,  2.11s/it]

Epoch [16/40], Step [70/196], Loss: 1.2671


 41%|████      | 80/196 [02:54<04:01,  2.08s/it]

Epoch [16/40], Step [80/196], Loss: 1.2790


 46%|████▌     | 90/196 [03:15<03:43,  2.10s/it]

Epoch [16/40], Step [90/196], Loss: 1.3082


 51%|█████     | 100/196 [03:36<03:22,  2.11s/it]

Epoch [16/40], Step [100/196], Loss: 1.3246


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [16/40], Step [110/196], Loss: 1.3152


 61%|██████    | 120/196 [04:18<02:40,  2.11s/it]

Epoch [16/40], Step [120/196], Loss: 1.3020


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [16/40], Step [130/196], Loss: 1.2975


 71%|███████▏  | 140/196 [05:00<01:57,  2.09s/it]

Epoch [16/40], Step [140/196], Loss: 1.3531


 77%|███████▋  | 150/196 [05:21<01:37,  2.12s/it]

Epoch [16/40], Step [150/196], Loss: 1.3354


 82%|████████▏ | 160/196 [05:42<01:16,  2.11s/it]

Epoch [16/40], Step [160/196], Loss: 1.3308


 87%|████████▋ | 170/196 [06:03<00:54,  2.10s/it]

Epoch [16/40], Step [170/196], Loss: 1.3727


 92%|█████████▏| 180/196 [06:24<00:33,  2.10s/it]

Epoch [16/40], Step [180/196], Loss: 1.3720


 97%|█████████▋| 190/196 [06:45<00:12,  2.10s/it]

Epoch [16/40], Step [190/196], Loss: 1.3264


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 46.56 %


  5%|▌         | 10/196 [00:28<07:21,  2.37s/it]

Epoch [17/40], Step [10/196], Loss: 1.2657


 10%|█         | 20/196 [00:50<06:21,  2.17s/it]

Epoch [17/40], Step [20/196], Loss: 1.2179


 15%|█▌        | 30/196 [01:11<05:46,  2.09s/it]

Epoch [17/40], Step [30/196], Loss: 1.2007


 20%|██        | 40/196 [01:32<05:24,  2.08s/it]

Epoch [17/40], Step [40/196], Loss: 1.1748


 26%|██▌       | 50/196 [01:53<05:09,  2.12s/it]

Epoch [17/40], Step [50/196], Loss: 1.1716


 31%|███       | 60/196 [02:14<04:52,  2.15s/it]

Epoch [17/40], Step [60/196], Loss: 1.2092


 36%|███▌      | 70/196 [02:36<04:28,  2.13s/it]

Epoch [17/40], Step [70/196], Loss: 1.2042


 41%|████      | 80/196 [02:57<04:05,  2.12s/it]

Epoch [17/40], Step [80/196], Loss: 1.1824


 46%|████▌     | 90/196 [03:18<03:42,  2.10s/it]

Epoch [17/40], Step [90/196], Loss: 1.1973


 51%|█████     | 100/196 [03:39<03:24,  2.13s/it]

Epoch [17/40], Step [100/196], Loss: 1.2236


 56%|█████▌    | 110/196 [04:00<03:02,  2.12s/it]

Epoch [17/40], Step [110/196], Loss: 1.2166


 61%|██████    | 120/196 [04:21<02:41,  2.12s/it]

Epoch [17/40], Step [120/196], Loss: 1.2131


 66%|██████▋   | 130/196 [04:43<02:19,  2.11s/it]

Epoch [17/40], Step [130/196], Loss: 1.2323


 71%|███████▏  | 140/196 [05:04<01:58,  2.12s/it]

Epoch [17/40], Step [140/196], Loss: 1.2582


 77%|███████▋  | 150/196 [05:25<01:37,  2.12s/it]

Epoch [17/40], Step [150/196], Loss: 1.2781


 82%|████████▏ | 160/196 [05:46<01:16,  2.13s/it]

Epoch [17/40], Step [160/196], Loss: 1.2870


 87%|████████▋ | 170/196 [06:08<00:55,  2.12s/it]

Epoch [17/40], Step [170/196], Loss: 1.2797


 92%|█████████▏| 180/196 [06:29<00:33,  2.12s/it]

Epoch [17/40], Step [180/196], Loss: 1.2870


 97%|█████████▋| 190/196 [06:50<00:12,  2.12s/it]

Epoch [17/40], Step [190/196], Loss: 1.2943


100%|██████████| 196/196 [07:01<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 49.67 %


  5%|▌         | 10/196 [00:26<06:56,  2.24s/it]

Epoch [18/40], Step [10/196], Loss: 1.2049


 10%|█         | 20/196 [00:48<06:23,  2.18s/it]

Epoch [18/40], Step [20/196], Loss: 1.2087


 15%|█▌        | 30/196 [01:09<05:49,  2.10s/it]

Epoch [18/40], Step [30/196], Loss: 1.1606


 20%|██        | 40/196 [01:30<05:24,  2.08s/it]

Epoch [18/40], Step [40/196], Loss: 1.1354


 26%|██▌       | 50/196 [01:51<05:08,  2.12s/it]

Epoch [18/40], Step [50/196], Loss: 1.1656


 31%|███       | 60/196 [02:12<04:52,  2.15s/it]

Epoch [18/40], Step [60/196], Loss: 1.1680


 36%|███▌      | 70/196 [02:34<04:28,  2.13s/it]

Epoch [18/40], Step [70/196], Loss: 1.1504


 41%|████      | 80/196 [02:55<04:05,  2.12s/it]

Epoch [18/40], Step [80/196], Loss: 1.1581


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [18/40], Step [90/196], Loss: 1.1567


 51%|█████     | 100/196 [03:37<03:24,  2.13s/it]

Epoch [18/40], Step [100/196], Loss: 1.2040


 56%|█████▌    | 110/196 [03:58<03:02,  2.13s/it]

Epoch [18/40], Step [110/196], Loss: 1.1782


 61%|██████    | 120/196 [04:20<02:41,  2.12s/it]

Epoch [18/40], Step [120/196], Loss: 1.1676


 66%|██████▋   | 130/196 [04:41<02:19,  2.11s/it]

Epoch [18/40], Step [130/196], Loss: 1.1877


 71%|███████▏  | 140/196 [05:02<01:58,  2.12s/it]

Epoch [18/40], Step [140/196], Loss: 1.1929


 77%|███████▋  | 150/196 [05:23<01:37,  2.12s/it]

Epoch [18/40], Step [150/196], Loss: 1.2032


 82%|████████▏ | 160/196 [05:44<01:15,  2.11s/it]

Epoch [18/40], Step [160/196], Loss: 1.2067


 87%|████████▋ | 170/196 [06:05<00:55,  2.12s/it]

Epoch [18/40], Step [170/196], Loss: 1.2382


 92%|█████████▏| 180/196 [06:27<00:33,  2.12s/it]

Epoch [18/40], Step [180/196], Loss: 1.2241


 97%|█████████▋| 190/196 [06:48<00:12,  2.12s/it]

Epoch [18/40], Step [190/196], Loss: 1.2142


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 53.72 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:30<07:06,  2.29s/it]

Epoch [19/40], Step [10/196], Loss: 1.1381


 10%|█         | 20/196 [00:52<06:20,  2.16s/it]

Epoch [19/40], Step [20/196], Loss: 1.1243


 15%|█▌        | 30/196 [01:13<05:45,  2.08s/it]

Epoch [19/40], Step [30/196], Loss: 1.0886


 20%|██        | 40/196 [01:33<05:22,  2.07s/it]

Epoch [19/40], Step [40/196], Loss: 1.0510


 26%|██▌       | 50/196 [01:54<05:07,  2.10s/it]

Epoch [19/40], Step [50/196], Loss: 1.0689


 31%|███       | 60/196 [02:16<04:49,  2.13s/it]

Epoch [19/40], Step [60/196], Loss: 1.0670


 36%|███▌      | 70/196 [02:37<04:26,  2.12s/it]

Epoch [19/40], Step [70/196], Loss: 1.0872


 41%|████      | 80/196 [02:58<04:03,  2.10s/it]

Epoch [19/40], Step [80/196], Loss: 1.0750


 46%|████▌     | 90/196 [03:19<03:42,  2.10s/it]

Epoch [19/40], Step [90/196], Loss: 1.0503


 51%|█████     | 100/196 [03:40<03:22,  2.11s/it]

Epoch [19/40], Step [100/196], Loss: 1.0928


 56%|█████▌    | 110/196 [04:01<03:01,  2.11s/it]

Epoch [19/40], Step [110/196], Loss: 1.1063


 61%|██████    | 120/196 [04:22<02:40,  2.11s/it]

Epoch [19/40], Step [120/196], Loss: 1.0794


 66%|██████▋   | 130/196 [04:43<02:18,  2.10s/it]

Epoch [19/40], Step [130/196], Loss: 1.1360


 71%|███████▏  | 140/196 [05:04<01:57,  2.10s/it]

Epoch [19/40], Step [140/196], Loss: 1.1355


 77%|███████▋  | 150/196 [05:25<01:37,  2.11s/it]

Epoch [19/40], Step [150/196], Loss: 1.1174


 82%|████████▏ | 160/196 [05:46<01:16,  2.12s/it]

Epoch [19/40], Step [160/196], Loss: 1.1335


 87%|████████▋ | 170/196 [06:07<00:54,  2.10s/it]

Epoch [19/40], Step [170/196], Loss: 1.1376


 92%|█████████▏| 180/196 [06:28<00:33,  2.10s/it]

Epoch [19/40], Step [180/196], Loss: 1.1059


 97%|█████████▋| 190/196 [06:49<00:12,  2.09s/it]

Epoch [19/40], Step [190/196], Loss: 1.1455


100%|██████████| 196/196 [07:01<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 51.87 %


  5%|▌         | 10/196 [00:25<06:56,  2.24s/it]

Epoch [20/40], Step [10/196], Loss: 1.0695


 10%|█         | 20/196 [00:47<06:18,  2.15s/it]

Epoch [20/40], Step [20/196], Loss: 1.0622


 15%|█▌        | 30/196 [01:08<05:45,  2.08s/it]

Epoch [20/40], Step [30/196], Loss: 1.0369


 20%|██        | 40/196 [01:29<05:22,  2.07s/it]

Epoch [20/40], Step [40/196], Loss: 1.0268


 26%|██▌       | 50/196 [01:50<05:06,  2.10s/it]

Epoch [20/40], Step [50/196], Loss: 1.0315


 31%|███       | 60/196 [02:11<04:49,  2.13s/it]

Epoch [20/40], Step [60/196], Loss: 1.0132


 36%|███▌      | 70/196 [02:32<04:27,  2.12s/it]

Epoch [20/40], Step [70/196], Loss: 1.0139


 41%|████      | 80/196 [02:53<04:03,  2.10s/it]

Epoch [20/40], Step [80/196], Loss: 1.0380


 46%|████▌     | 90/196 [03:14<03:42,  2.10s/it]

Epoch [20/40], Step [90/196], Loss: 1.0234


 51%|█████     | 100/196 [03:35<03:22,  2.11s/it]

Epoch [20/40], Step [100/196], Loss: 1.0339


 56%|█████▌    | 110/196 [03:56<03:01,  2.11s/it]

Epoch [20/40], Step [110/196], Loss: 1.0305


 61%|██████    | 120/196 [04:17<02:40,  2.11s/it]

Epoch [20/40], Step [120/196], Loss: 1.0249


 66%|██████▋   | 130/196 [04:38<02:18,  2.11s/it]

Epoch [20/40], Step [130/196], Loss: 1.0599


 71%|███████▏  | 140/196 [04:59<01:57,  2.11s/it]

Epoch [20/40], Step [140/196], Loss: 1.0299


 77%|███████▋  | 150/196 [05:21<01:36,  2.11s/it]

Epoch [20/40], Step [150/196], Loss: 1.0541


 82%|████████▏ | 160/196 [05:42<01:16,  2.11s/it]

Epoch [20/40], Step [160/196], Loss: 1.0857


 87%|████████▋ | 170/196 [06:03<00:54,  2.11s/it]

Epoch [20/40], Step [170/196], Loss: 1.0696


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [20/40], Step [180/196], Loss: 1.0869


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [20/40], Step [190/196], Loss: 1.1007


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 54.46 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [21/40], Step [10/196], Loss: 1.0380


 10%|█         | 20/196 [00:49<06:19,  2.16s/it]

Epoch [21/40], Step [20/196], Loss: 1.0277


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [21/40], Step [30/196], Loss: 0.9861


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [21/40], Step [40/196], Loss: 1.0008


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [21/40], Step [50/196], Loss: 0.9708


 31%|███       | 60/196 [02:13<04:50,  2.14s/it]

Epoch [21/40], Step [60/196], Loss: 0.9804


 36%|███▌      | 70/196 [02:34<04:26,  2.11s/it]

Epoch [21/40], Step [70/196], Loss: 0.9696


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [21/40], Step [80/196], Loss: 0.9787


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [21/40], Step [90/196], Loss: 0.9675


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [21/40], Step [100/196], Loss: 0.9774


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [21/40], Step [110/196], Loss: 0.9777


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [21/40], Step [120/196], Loss: 0.9898


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [21/40], Step [130/196], Loss: 1.0063


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [21/40], Step [140/196], Loss: 0.9999


 77%|███████▋  | 150/196 [05:23<01:36,  2.11s/it]

Epoch [21/40], Step [150/196], Loss: 1.0134


 82%|████████▏ | 160/196 [05:44<01:15,  2.11s/it]

Epoch [21/40], Step [160/196], Loss: 1.0077


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [21/40], Step [170/196], Loss: 1.0344


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [21/40], Step [180/196], Loss: 1.0290


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [21/40], Step [190/196], Loss: 1.0347


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 55.49 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<06:58,  2.25s/it]

Epoch [22/40], Step [10/196], Loss: 0.9395


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [22/40], Step [20/196], Loss: 0.9399


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [22/40], Step [30/196], Loss: 0.9140


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [22/40], Step [40/196], Loss: 0.9143


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [22/40], Step [50/196], Loss: 0.8888


 31%|███       | 60/196 [02:12<04:50,  2.13s/it]

Epoch [22/40], Step [60/196], Loss: 0.9153


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [22/40], Step [70/196], Loss: 0.8926


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [22/40], Step [80/196], Loss: 0.9037


 46%|████▌     | 90/196 [03:15<03:40,  2.08s/it]

Epoch [22/40], Step [90/196], Loss: 0.9092


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [22/40], Step [100/196], Loss: 0.9307


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [22/40], Step [110/196], Loss: 0.9208


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [22/40], Step [120/196], Loss: 0.9432


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [22/40], Step [130/196], Loss: 0.9456


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [22/40], Step [140/196], Loss: 0.9121


 77%|███████▋  | 150/196 [05:22<01:36,  2.10s/it]

Epoch [22/40], Step [150/196], Loss: 0.9328


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [22/40], Step [160/196], Loss: 0.9672


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [22/40], Step [170/196], Loss: 0.9665


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [22/40], Step [180/196], Loss: 0.9774


 97%|█████████▋| 190/196 [06:46<00:12,  2.11s/it]

Epoch [22/40], Step [190/196], Loss: 0.9875


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 55.86 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<06:59,  2.26s/it]

Epoch [23/40], Step [10/196], Loss: 0.9200


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [23/40], Step [20/196], Loss: 0.8912


 15%|█▌        | 30/196 [01:10<05:44,  2.07s/it]

Epoch [23/40], Step [30/196], Loss: 0.8476


 20%|██        | 40/196 [01:31<05:23,  2.07s/it]

Epoch [23/40], Step [40/196], Loss: 0.8410


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [23/40], Step [50/196], Loss: 0.8625


 31%|███       | 60/196 [02:13<04:51,  2.14s/it]

Epoch [23/40], Step [60/196], Loss: 0.8495


 36%|███▌      | 70/196 [02:34<04:26,  2.12s/it]

Epoch [23/40], Step [70/196], Loss: 0.8603


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [23/40], Step [80/196], Loss: 0.8619


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [23/40], Step [90/196], Loss: 0.8518


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [23/40], Step [100/196], Loss: 0.8608


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [23/40], Step [110/196], Loss: 0.8798


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [23/40], Step [120/196], Loss: 0.8576


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [23/40], Step [130/196], Loss: 0.8802


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [23/40], Step [140/196], Loss: 0.8819


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [23/40], Step [150/196], Loss: 0.8869


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [23/40], Step [160/196], Loss: 0.9167


 87%|████████▋ | 170/196 [06:04<00:54,  2.10s/it]

Epoch [23/40], Step [170/196], Loss: 0.8941


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [23/40], Step [180/196], Loss: 0.9077


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [23/40], Step [190/196], Loss: 0.8931


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 59.53 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:58,  2.25s/it]

Epoch [24/40], Step [10/196], Loss: 0.8569


 10%|█         | 20/196 [00:47<06:19,  2.16s/it]

Epoch [24/40], Step [20/196], Loss: 0.8605


 15%|█▌        | 30/196 [01:08<05:46,  2.09s/it]

Epoch [24/40], Step [30/196], Loss: 0.8383


 20%|██        | 40/196 [01:29<05:22,  2.07s/it]

Epoch [24/40], Step [40/196], Loss: 0.8034


 26%|██▌       | 50/196 [01:50<05:07,  2.11s/it]

Epoch [24/40], Step [50/196], Loss: 0.8052


 31%|███       | 60/196 [02:11<04:48,  2.12s/it]

Epoch [24/40], Step [60/196], Loss: 0.8131


 36%|███▌      | 70/196 [02:32<04:25,  2.11s/it]

Epoch [24/40], Step [70/196], Loss: 0.8304


 41%|████      | 80/196 [02:53<04:02,  2.09s/it]

Epoch [24/40], Step [80/196], Loss: 0.8127


 46%|████▌     | 90/196 [03:14<03:41,  2.09s/it]

Epoch [24/40], Step [90/196], Loss: 0.8317


 51%|█████     | 100/196 [03:35<03:22,  2.11s/it]

Epoch [24/40], Step [100/196], Loss: 0.8081


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [24/40], Step [110/196], Loss: 0.8048


 61%|██████    | 120/196 [04:18<02:39,  2.10s/it]

Epoch [24/40], Step [120/196], Loss: 0.8112


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [24/40], Step [130/196], Loss: 0.8499


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [24/40], Step [140/196], Loss: 0.8435


 77%|███████▋  | 150/196 [05:21<01:36,  2.11s/it]

Epoch [24/40], Step [150/196], Loss: 0.8342


 82%|████████▏ | 160/196 [05:42<01:15,  2.11s/it]

Epoch [24/40], Step [160/196], Loss: 0.8336


 87%|████████▋ | 170/196 [06:03<00:54,  2.11s/it]

Epoch [24/40], Step [170/196], Loss: 0.8363


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [24/40], Step [180/196], Loss: 0.8297


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [24/40], Step [190/196], Loss: 0.8582


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 59.13 %


  5%|▌         | 10/196 [00:27<06:58,  2.25s/it]

Epoch [25/40], Step [10/196], Loss: 0.7905


 10%|█         | 20/196 [00:48<06:21,  2.17s/it]

Epoch [25/40], Step [20/196], Loss: 0.7745


 15%|█▌        | 30/196 [01:09<05:45,  2.08s/it]

Epoch [25/40], Step [30/196], Loss: 0.7549


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [25/40], Step [40/196], Loss: 0.7467


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [25/40], Step [50/196], Loss: 0.7488


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [25/40], Step [60/196], Loss: 0.7378


 36%|███▌      | 70/196 [02:34<04:27,  2.13s/it]

Epoch [25/40], Step [70/196], Loss: 0.7341


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [25/40], Step [80/196], Loss: 0.7494


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [25/40], Step [90/196], Loss: 0.7530


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [25/40], Step [100/196], Loss: 0.7553


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [25/40], Step [110/196], Loss: 0.7474


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [25/40], Step [120/196], Loss: 0.7668


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [25/40], Step [130/196], Loss: 0.7543


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [25/40], Step [140/196], Loss: 0.7477


 77%|███████▋  | 150/196 [05:22<01:37,  2.12s/it]

Epoch [25/40], Step [150/196], Loss: 0.7751


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [25/40], Step [160/196], Loss: 0.7852


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [25/40], Step [170/196], Loss: 0.7725


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [25/40], Step [180/196], Loss: 0.7853


 97%|█████████▋| 190/196 [06:46<00:12,  2.11s/it]

Epoch [25/40], Step [190/196], Loss: 0.7930


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 58.32 %


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [26/40], Step [10/196], Loss: 0.7487


 10%|█         | 20/196 [00:50<06:19,  2.16s/it]

Epoch [26/40], Step [20/196], Loss: 0.7060


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [26/40], Step [30/196], Loss: 0.6928


 20%|██        | 40/196 [01:31<05:23,  2.07s/it]

Epoch [26/40], Step [40/196], Loss: 0.6987


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [26/40], Step [50/196], Loss: 0.6848


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [26/40], Step [60/196], Loss: 0.6859


 36%|███▌      | 70/196 [02:35<04:28,  2.13s/it]

Epoch [26/40], Step [70/196], Loss: 0.6841


 41%|████      | 80/196 [02:56<04:02,  2.09s/it]

Epoch [26/40], Step [80/196], Loss: 0.6795


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [26/40], Step [90/196], Loss: 0.7047


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [26/40], Step [100/196], Loss: 0.7037


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [26/40], Step [110/196], Loss: 0.7078


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [26/40], Step [120/196], Loss: 0.7084


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [26/40], Step [130/196], Loss: 0.7114


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [26/40], Step [140/196], Loss: 0.7013


 77%|███████▋  | 150/196 [05:23<01:36,  2.10s/it]

Epoch [26/40], Step [150/196], Loss: 0.7246


 82%|████████▏ | 160/196 [05:44<01:15,  2.10s/it]

Epoch [26/40], Step [160/196], Loss: 0.7069


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [26/40], Step [170/196], Loss: 0.6941


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [26/40], Step [180/196], Loss: 0.7140


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [26/40], Step [190/196], Loss: 0.7318


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 61.45 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:56,  2.24s/it]

Epoch [27/40], Step [10/196], Loss: 0.6790


 10%|█         | 20/196 [00:48<06:22,  2.17s/it]

Epoch [27/40], Step [20/196], Loss: 0.6709


 15%|█▌        | 30/196 [01:09<05:46,  2.09s/it]

Epoch [27/40], Step [30/196], Loss: 0.6479


 20%|██        | 40/196 [01:30<05:23,  2.07s/it]

Epoch [27/40], Step [40/196], Loss: 0.6381


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [27/40], Step [50/196], Loss: 0.6421


 31%|███       | 60/196 [02:12<04:50,  2.13s/it]

Epoch [27/40], Step [60/196], Loss: 0.6329


 36%|███▌      | 70/196 [02:33<04:26,  2.11s/it]

Epoch [27/40], Step [70/196], Loss: 0.6401


 41%|████      | 80/196 [02:54<04:03,  2.10s/it]

Epoch [27/40], Step [80/196], Loss: 0.6179


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [27/40], Step [90/196], Loss: 0.6244


 51%|█████     | 100/196 [03:36<03:22,  2.11s/it]

Epoch [27/40], Step [100/196], Loss: 0.6496


 56%|█████▌    | 110/196 [03:57<03:02,  2.12s/it]

Epoch [27/40], Step [110/196], Loss: 0.6544


 61%|██████    | 120/196 [04:18<02:39,  2.10s/it]

Epoch [27/40], Step [120/196], Loss: 0.6521


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [27/40], Step [130/196], Loss: 0.6636


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [27/40], Step [140/196], Loss: 0.6647


 77%|███████▋  | 150/196 [05:21<01:36,  2.10s/it]

Epoch [27/40], Step [150/196], Loss: 0.6498


 82%|████████▏ | 160/196 [05:42<01:15,  2.10s/it]

Epoch [27/40], Step [160/196], Loss: 0.6422


 87%|████████▋ | 170/196 [06:03<00:54,  2.10s/it]

Epoch [27/40], Step [170/196], Loss: 0.6453


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [27/40], Step [180/196], Loss: 0.6545


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [27/40], Step [190/196], Loss: 0.6638


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 61.79 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:57,  2.25s/it]

Epoch [28/40], Step [10/196], Loss: 0.6105


 10%|█         | 20/196 [00:48<06:18,  2.15s/it]

Epoch [28/40], Step [20/196], Loss: 0.5984


 15%|█▌        | 30/196 [01:09<05:45,  2.08s/it]

Epoch [28/40], Step [30/196], Loss: 0.5900


 20%|██        | 40/196 [01:30<05:21,  2.06s/it]

Epoch [28/40], Step [40/196], Loss: 0.5694


 26%|██▌       | 50/196 [01:51<05:07,  2.11s/it]

Epoch [28/40], Step [50/196], Loss: 0.5901


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [28/40], Step [60/196], Loss: 0.5882


 36%|███▌      | 70/196 [02:33<04:26,  2.12s/it]

Epoch [28/40], Step [70/196], Loss: 0.6015


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [28/40], Step [80/196], Loss: 0.5966


 46%|████▌     | 90/196 [03:15<03:42,  2.09s/it]

Epoch [28/40], Step [90/196], Loss: 0.5898


 51%|█████     | 100/196 [03:36<03:21,  2.10s/it]

Epoch [28/40], Step [100/196], Loss: 0.5875


 56%|█████▌    | 110/196 [03:57<03:00,  2.10s/it]

Epoch [28/40], Step [110/196], Loss: 0.6054


 61%|██████    | 120/196 [04:18<02:39,  2.10s/it]

Epoch [28/40], Step [120/196], Loss: 0.5958


 66%|██████▋   | 130/196 [04:39<02:18,  2.11s/it]

Epoch [28/40], Step [130/196], Loss: 0.5849


 71%|███████▏  | 140/196 [05:00<01:58,  2.12s/it]

Epoch [28/40], Step [140/196], Loss: 0.6060


 77%|███████▋  | 150/196 [05:21<01:37,  2.11s/it]

Epoch [28/40], Step [150/196], Loss: 0.6076


 82%|████████▏ | 160/196 [05:42<01:16,  2.11s/it]

Epoch [28/40], Step [160/196], Loss: 0.5989


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [28/40], Step [170/196], Loss: 0.6034


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [28/40], Step [180/196], Loss: 0.6150


 97%|█████████▋| 190/196 [06:46<00:12,  2.11s/it]

Epoch [28/40], Step [190/196], Loss: 0.6102


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 62.31 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:30<07:05,  2.29s/it]

Epoch [29/40], Step [10/196], Loss: 0.5755


 10%|█         | 20/196 [00:52<06:20,  2.16s/it]

Epoch [29/40], Step [20/196], Loss: 0.5555


 15%|█▌        | 30/196 [01:13<05:46,  2.09s/it]

Epoch [29/40], Step [30/196], Loss: 0.5438


 20%|██        | 40/196 [01:34<05:23,  2.07s/it]

Epoch [29/40], Step [40/196], Loss: 0.5401


 26%|██▌       | 50/196 [01:55<05:06,  2.10s/it]

Epoch [29/40], Step [50/196], Loss: 0.5493


 31%|███       | 60/196 [02:16<04:49,  2.13s/it]

Epoch [29/40], Step [60/196], Loss: 0.5469


 36%|███▌      | 70/196 [02:37<04:26,  2.11s/it]

Epoch [29/40], Step [70/196], Loss: 0.5353


 41%|████      | 80/196 [02:58<04:02,  2.09s/it]

Epoch [29/40], Step [80/196], Loss: 0.5396


 46%|████▌     | 90/196 [03:19<03:41,  2.09s/it]

Epoch [29/40], Step [90/196], Loss: 0.5474


 51%|█████     | 100/196 [03:40<03:21,  2.10s/it]

Epoch [29/40], Step [100/196], Loss: 0.5525


 56%|█████▌    | 110/196 [04:01<03:01,  2.11s/it]

Epoch [29/40], Step [110/196], Loss: 0.5587


 61%|██████    | 120/196 [04:22<02:40,  2.11s/it]

Epoch [29/40], Step [120/196], Loss: 0.5455


 66%|██████▋   | 130/196 [04:43<02:19,  2.11s/it]

Epoch [29/40], Step [130/196], Loss: 0.5415


 71%|███████▏  | 140/196 [05:04<01:57,  2.10s/it]

Epoch [29/40], Step [140/196], Loss: 0.5534


 77%|███████▋  | 150/196 [05:25<01:36,  2.11s/it]

Epoch [29/40], Step [150/196], Loss: 0.5515


 82%|████████▏ | 160/196 [05:47<01:15,  2.11s/it]

Epoch [29/40], Step [160/196], Loss: 0.5489


 87%|████████▋ | 170/196 [06:08<00:54,  2.11s/it]

Epoch [29/40], Step [170/196], Loss: 0.5484


 92%|█████████▏| 180/196 [06:29<00:33,  2.11s/it]

Epoch [29/40], Step [180/196], Loss: 0.5505


 97%|█████████▋| 190/196 [06:50<00:12,  2.10s/it]

Epoch [29/40], Step [190/196], Loss: 0.5449


100%|██████████| 196/196 [07:01<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 64.04 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:55,  2.23s/it]

Epoch [30/40], Step [10/196], Loss: 0.5257


 10%|█         | 20/196 [00:48<06:19,  2.16s/it]

Epoch [30/40], Step [20/196], Loss: 0.5224


 15%|█▌        | 30/196 [01:09<05:44,  2.08s/it]

Epoch [30/40], Step [30/196], Loss: 0.5082


 20%|██        | 40/196 [01:29<05:22,  2.07s/it]

Epoch [30/40], Step [40/196], Loss: 0.5111


 26%|██▌       | 50/196 [01:50<05:07,  2.10s/it]

Epoch [30/40], Step [50/196], Loss: 0.5047


 31%|███       | 60/196 [02:11<04:49,  2.13s/it]

Epoch [30/40], Step [60/196], Loss: 0.4981


 36%|███▌      | 70/196 [02:33<04:26,  2.11s/it]

Epoch [30/40], Step [70/196], Loss: 0.5079


 41%|████      | 80/196 [02:53<04:02,  2.09s/it]

Epoch [30/40], Step [80/196], Loss: 0.5082


 46%|████▌     | 90/196 [03:14<03:42,  2.10s/it]

Epoch [30/40], Step [90/196], Loss: 0.4967


 51%|█████     | 100/196 [03:35<03:21,  2.10s/it]

Epoch [30/40], Step [100/196], Loss: 0.5062


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [30/40], Step [110/196], Loss: 0.5019


 61%|██████    | 120/196 [04:18<02:40,  2.11s/it]

Epoch [30/40], Step [120/196], Loss: 0.4996


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [30/40], Step [130/196], Loss: 0.5132


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [30/40], Step [140/196], Loss: 0.5118


 77%|███████▋  | 150/196 [05:21<01:36,  2.10s/it]

Epoch [30/40], Step [150/196], Loss: 0.5160


 82%|████████▏ | 160/196 [05:42<01:15,  2.10s/it]

Epoch [30/40], Step [160/196], Loss: 0.5038


 87%|████████▋ | 170/196 [06:03<00:54,  2.11s/it]

Epoch [30/40], Step [170/196], Loss: 0.5071


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [30/40], Step [180/196], Loss: 0.5080


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [30/40], Step [190/196], Loss: 0.5285


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 65.21 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:29<07:05,  2.29s/it]

Epoch [31/40], Step [10/196], Loss: 0.4822


 10%|█         | 20/196 [00:51<06:19,  2.16s/it]

Epoch [31/40], Step [20/196], Loss: 0.4745


 15%|█▌        | 30/196 [01:12<05:44,  2.07s/it]

Epoch [31/40], Step [30/196], Loss: 0.4611


 20%|██        | 40/196 [01:32<05:22,  2.07s/it]

Epoch [31/40], Step [40/196], Loss: 0.4651


 26%|██▌       | 50/196 [01:53<05:06,  2.10s/it]

Epoch [31/40], Step [50/196], Loss: 0.4618


 31%|███       | 60/196 [02:14<04:48,  2.12s/it]

Epoch [31/40], Step [60/196], Loss: 0.4634


 36%|███▌      | 70/196 [02:36<04:26,  2.11s/it]

Epoch [31/40], Step [70/196], Loss: 0.4585


 41%|████      | 80/196 [02:57<04:03,  2.10s/it]

Epoch [31/40], Step [80/196], Loss: 0.4572


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [31/40], Step [90/196], Loss: 0.4617


 51%|█████     | 100/196 [03:39<03:22,  2.11s/it]

Epoch [31/40], Step [100/196], Loss: 0.4751


 56%|█████▌    | 110/196 [04:00<03:01,  2.11s/it]

Epoch [31/40], Step [110/196], Loss: 0.4646


 61%|██████    | 120/196 [04:21<02:39,  2.10s/it]

Epoch [31/40], Step [120/196], Loss: 0.4689


 66%|██████▋   | 130/196 [04:42<02:19,  2.11s/it]

Epoch [31/40], Step [130/196], Loss: 0.4530


 71%|███████▏  | 140/196 [05:03<01:57,  2.10s/it]

Epoch [31/40], Step [140/196], Loss: 0.4670


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [31/40], Step [150/196], Loss: 0.4538


 82%|████████▏ | 160/196 [05:45<01:15,  2.10s/it]

Epoch [31/40], Step [160/196], Loss: 0.4624


 87%|████████▋ | 170/196 [06:06<00:54,  2.10s/it]

Epoch [31/40], Step [170/196], Loss: 0.4567


 92%|█████████▏| 180/196 [06:27<00:33,  2.10s/it]

Epoch [31/40], Step [180/196], Loss: 0.4717


 97%|█████████▋| 190/196 [06:48<00:12,  2.10s/it]

Epoch [31/40], Step [190/196], Loss: 0.4599


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 66.55 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:29<08:27,  2.73s/it]

Epoch [32/40], Step [10/196], Loss: 0.4386


 10%|█         | 20/196 [00:51<06:21,  2.17s/it]

Epoch [32/40], Step [20/196], Loss: 0.4448


 15%|█▌        | 30/196 [01:12<05:46,  2.09s/it]

Epoch [32/40], Step [30/196], Loss: 0.4359


 20%|██        | 40/196 [01:33<05:22,  2.07s/it]

Epoch [32/40], Step [40/196], Loss: 0.4288


 26%|██▌       | 50/196 [01:54<05:07,  2.10s/it]

Epoch [32/40], Step [50/196], Loss: 0.4226


 31%|███       | 60/196 [02:15<04:49,  2.13s/it]

Epoch [32/40], Step [60/196], Loss: 0.4286


 36%|███▌      | 70/196 [02:36<04:26,  2.12s/it]

Epoch [32/40], Step [70/196], Loss: 0.4232


 41%|████      | 80/196 [02:57<04:02,  2.09s/it]

Epoch [32/40], Step [80/196], Loss: 0.4255


 46%|████▌     | 90/196 [03:18<03:42,  2.09s/it]

Epoch [32/40], Step [90/196], Loss: 0.4174


 51%|█████     | 100/196 [03:39<03:21,  2.10s/it]

Epoch [32/40], Step [100/196], Loss: 0.4179


 56%|█████▌    | 110/196 [04:00<03:02,  2.12s/it]

Epoch [32/40], Step [110/196], Loss: 0.4244


 61%|██████    | 120/196 [04:22<02:40,  2.11s/it]

Epoch [32/40], Step [120/196], Loss: 0.4143


 66%|██████▋   | 130/196 [04:43<02:18,  2.11s/it]

Epoch [32/40], Step [130/196], Loss: 0.4296


 71%|███████▏  | 140/196 [05:04<01:57,  2.10s/it]

Epoch [32/40], Step [140/196], Loss: 0.4250


 77%|███████▋  | 150/196 [05:25<01:36,  2.11s/it]

Epoch [32/40], Step [150/196], Loss: 0.4170


 82%|████████▏ | 160/196 [05:46<01:15,  2.11s/it]

Epoch [32/40], Step [160/196], Loss: 0.4256


 87%|████████▋ | 170/196 [06:07<00:54,  2.11s/it]

Epoch [32/40], Step [170/196], Loss: 0.4332


 92%|█████████▏| 180/196 [06:28<00:33,  2.11s/it]

Epoch [32/40], Step [180/196], Loss: 0.4249


 97%|█████████▋| 190/196 [06:49<00:12,  2.10s/it]

Epoch [32/40], Step [190/196], Loss: 0.4289


100%|██████████| 196/196 [07:00<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 66.35 %


  5%|▌         | 10/196 [00:28<07:00,  2.26s/it]

Epoch [33/40], Step [10/196], Loss: 0.4123


 10%|█         | 20/196 [00:50<06:19,  2.15s/it]

Epoch [33/40], Step [20/196], Loss: 0.4043


 15%|█▌        | 30/196 [01:11<05:46,  2.08s/it]

Epoch [33/40], Step [30/196], Loss: 0.4055


 20%|██        | 40/196 [01:31<05:23,  2.08s/it]

Epoch [33/40], Step [40/196], Loss: 0.3873


 26%|██▌       | 50/196 [01:52<05:07,  2.11s/it]

Epoch [33/40], Step [50/196], Loss: 0.3950


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [33/40], Step [60/196], Loss: 0.3922


 36%|███▌      | 70/196 [02:35<04:26,  2.12s/it]

Epoch [33/40], Step [70/196], Loss: 0.3916


 41%|████      | 80/196 [02:56<04:02,  2.09s/it]

Epoch [33/40], Step [80/196], Loss: 0.3860


 46%|████▌     | 90/196 [03:17<03:41,  2.09s/it]

Epoch [33/40], Step [90/196], Loss: 0.3935


 51%|█████     | 100/196 [03:38<03:21,  2.10s/it]

Epoch [33/40], Step [100/196], Loss: 0.3989


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [33/40], Step [110/196], Loss: 0.3825


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [33/40], Step [120/196], Loss: 0.3917


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [33/40], Step [130/196], Loss: 0.3861


 71%|███████▏  | 140/196 [05:02<01:58,  2.11s/it]

Epoch [33/40], Step [140/196], Loss: 0.3857


 77%|███████▋  | 150/196 [05:23<01:36,  2.10s/it]

Epoch [33/40], Step [150/196], Loss: 0.3908


 82%|████████▏ | 160/196 [05:44<01:15,  2.10s/it]

Epoch [33/40], Step [160/196], Loss: 0.3922


 87%|████████▋ | 170/196 [06:05<00:55,  2.12s/it]

Epoch [33/40], Step [170/196], Loss: 0.4027


 92%|█████████▏| 180/196 [06:27<00:33,  2.12s/it]

Epoch [33/40], Step [180/196], Loss: 0.3991


 97%|█████████▋| 190/196 [06:48<00:12,  2.11s/it]

Epoch [33/40], Step [190/196], Loss: 0.3947


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 67.21 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:57,  2.25s/it]

Epoch [34/40], Step [10/196], Loss: 0.3757


 10%|█         | 20/196 [00:48<06:19,  2.16s/it]

Epoch [34/40], Step [20/196], Loss: 0.3667


 15%|█▌        | 30/196 [01:09<05:45,  2.08s/it]

Epoch [34/40], Step [30/196], Loss: 0.3671


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [34/40], Step [40/196], Loss: 0.3607


 26%|██▌       | 50/196 [01:51<05:07,  2.11s/it]

Epoch [34/40], Step [50/196], Loss: 0.3603


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [34/40], Step [60/196], Loss: 0.3628


 36%|███▌      | 70/196 [02:33<04:27,  2.12s/it]

Epoch [34/40], Step [70/196], Loss: 0.3684


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [34/40], Step [80/196], Loss: 0.3629


 46%|████▌     | 90/196 [03:15<03:43,  2.10s/it]

Epoch [34/40], Step [90/196], Loss: 0.3681


 51%|█████     | 100/196 [03:36<03:23,  2.12s/it]

Epoch [34/40], Step [100/196], Loss: 0.3654


 56%|█████▌    | 110/196 [03:57<03:02,  2.12s/it]

Epoch [34/40], Step [110/196], Loss: 0.3744


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [34/40], Step [120/196], Loss: 0.3676


 66%|██████▋   | 130/196 [04:40<02:18,  2.09s/it]

Epoch [34/40], Step [130/196], Loss: 0.3637


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [34/40], Step [140/196], Loss: 0.3662


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [34/40], Step [150/196], Loss: 0.3684


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [34/40], Step [160/196], Loss: 0.3719


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [34/40], Step [170/196], Loss: 0.3661


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [34/40], Step [180/196], Loss: 0.3661


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [34/40], Step [190/196], Loss: 0.3755


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 67.86 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:27<06:59,  2.26s/it]

Epoch [35/40], Step [10/196], Loss: 0.3524


 10%|█         | 20/196 [00:49<06:19,  2.16s/it]

Epoch [35/40], Step [20/196], Loss: 0.3504


 15%|█▌        | 30/196 [01:10<05:46,  2.09s/it]

Epoch [35/40], Step [30/196], Loss: 0.3418


 20%|██        | 40/196 [01:31<05:23,  2.07s/it]

Epoch [35/40], Step [40/196], Loss: 0.3426


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [35/40], Step [50/196], Loss: 0.3455


 31%|███       | 60/196 [02:13<04:48,  2.12s/it]

Epoch [35/40], Step [60/196], Loss: 0.3468


 36%|███▌      | 70/196 [02:34<04:26,  2.12s/it]

Epoch [35/40], Step [70/196], Loss: 0.3374


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [35/40], Step [80/196], Loss: 0.3519


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [35/40], Step [90/196], Loss: 0.3492


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [35/40], Step [100/196], Loss: 0.3474


 56%|█████▌    | 110/196 [03:58<03:01,  2.12s/it]

Epoch [35/40], Step [110/196], Loss: 0.3433


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [35/40], Step [120/196], Loss: 0.3466


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [35/40], Step [130/196], Loss: 0.3419


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [35/40], Step [140/196], Loss: 0.3457


 77%|███████▋  | 150/196 [05:22<01:37,  2.11s/it]

Epoch [35/40], Step [150/196], Loss: 0.3479


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [35/40], Step [160/196], Loss: 0.3431


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [35/40], Step [170/196], Loss: 0.3452


 92%|█████████▏| 180/196 [06:26<00:33,  2.10s/it]

Epoch [35/40], Step [180/196], Loss: 0.3475


 97%|█████████▋| 190/196 [06:47<00:12,  2.10s/it]

Epoch [35/40], Step [190/196], Loss: 0.3472


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 68.00 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:55,  2.23s/it]

Epoch [36/40], Step [10/196], Loss: 0.3379


 10%|█         | 20/196 [00:48<06:20,  2.16s/it]

Epoch [36/40], Step [20/196], Loss: 0.3226


 15%|█▌        | 30/196 [01:09<05:46,  2.08s/it]

Epoch [36/40], Step [30/196], Loss: 0.3306


 20%|██        | 40/196 [01:30<05:21,  2.06s/it]

Epoch [36/40], Step [40/196], Loss: 0.3280


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [36/40], Step [50/196], Loss: 0.3277


 31%|███       | 60/196 [02:12<04:48,  2.12s/it]

Epoch [36/40], Step [60/196], Loss: 0.3216


 36%|███▌      | 70/196 [02:33<04:26,  2.12s/it]

Epoch [36/40], Step [70/196], Loss: 0.3312


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [36/40], Step [80/196], Loss: 0.3292


 46%|████▌     | 90/196 [03:15<03:41,  2.09s/it]

Epoch [36/40], Step [90/196], Loss: 0.3272


 51%|█████     | 100/196 [03:36<03:21,  2.10s/it]

Epoch [36/40], Step [100/196], Loss: 0.3260


 56%|█████▌    | 110/196 [03:57<03:02,  2.12s/it]

Epoch [36/40], Step [110/196], Loss: 0.3232


 61%|██████    | 120/196 [04:18<02:40,  2.11s/it]

Epoch [36/40], Step [120/196], Loss: 0.3259


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [36/40], Step [130/196], Loss: 0.3197


 71%|███████▏  | 140/196 [05:00<01:57,  2.11s/it]

Epoch [36/40], Step [140/196], Loss: 0.3317


 77%|███████▋  | 150/196 [05:21<01:37,  2.11s/it]

Epoch [36/40], Step [150/196], Loss: 0.3278


 82%|████████▏ | 160/196 [05:42<01:15,  2.10s/it]

Epoch [36/40], Step [160/196], Loss: 0.3351


 87%|████████▋ | 170/196 [06:03<00:54,  2.10s/it]

Epoch [36/40], Step [170/196], Loss: 0.3262


 92%|█████████▏| 180/196 [06:24<00:33,  2.10s/it]

Epoch [36/40], Step [180/196], Loss: 0.3251


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [36/40], Step [190/196], Loss: 0.3244


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 68.59 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:30<07:14,  2.34s/it]

Epoch [37/40], Step [10/196], Loss: 0.3174


 10%|█         | 20/196 [00:52<06:20,  2.16s/it]

Epoch [37/40], Step [20/196], Loss: 0.3179


 15%|█▌        | 30/196 [01:13<05:46,  2.09s/it]

Epoch [37/40], Step [30/196], Loss: 0.3135


 20%|██        | 40/196 [01:34<05:22,  2.07s/it]

Epoch [37/40], Step [40/196], Loss: 0.3137


 26%|██▌       | 50/196 [01:55<05:06,  2.10s/it]

Epoch [37/40], Step [50/196], Loss: 0.3145


 31%|███       | 60/196 [02:16<04:50,  2.13s/it]

Epoch [37/40], Step [60/196], Loss: 0.3110


 36%|███▌      | 70/196 [02:37<04:27,  2.12s/it]

Epoch [37/40], Step [70/196], Loss: 0.3144


 41%|████      | 80/196 [02:58<04:02,  2.09s/it]

Epoch [37/40], Step [80/196], Loss: 0.3170


 46%|████▌     | 90/196 [03:19<03:41,  2.09s/it]

Epoch [37/40], Step [90/196], Loss: 0.3120


 51%|█████     | 100/196 [03:40<03:22,  2.11s/it]

Epoch [37/40], Step [100/196], Loss: 0.3119


 56%|█████▌    | 110/196 [04:01<03:01,  2.11s/it]

Epoch [37/40], Step [110/196], Loss: 0.3140


 61%|██████    | 120/196 [04:22<02:40,  2.11s/it]

Epoch [37/40], Step [120/196], Loss: 0.3120


 66%|██████▋   | 130/196 [04:43<02:18,  2.10s/it]

Epoch [37/40], Step [130/196], Loss: 0.3110


 71%|███████▏  | 140/196 [05:04<01:57,  2.09s/it]

Epoch [37/40], Step [140/196], Loss: 0.3161


 77%|███████▋  | 150/196 [05:25<01:36,  2.10s/it]

Epoch [37/40], Step [150/196], Loss: 0.3172


 82%|████████▏ | 160/196 [05:46<01:15,  2.11s/it]

Epoch [37/40], Step [160/196], Loss: 0.3171


 87%|████████▋ | 170/196 [06:07<00:54,  2.11s/it]

Epoch [37/40], Step [170/196], Loss: 0.3190


 92%|█████████▏| 180/196 [06:28<00:33,  2.11s/it]

Epoch [37/40], Step [180/196], Loss: 0.3169


 97%|█████████▋| 190/196 [06:49<00:12,  2.10s/it]

Epoch [37/40], Step [190/196], Loss: 0.3144


100%|██████████| 196/196 [07:01<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 68.58 %


  5%|▌         | 10/196 [00:27<06:58,  2.25s/it]

Epoch [38/40], Step [10/196], Loss: 0.3060


 10%|█         | 20/196 [00:49<06:21,  2.17s/it]

Epoch [38/40], Step [20/196], Loss: 0.3100


 15%|█▌        | 30/196 [01:10<05:44,  2.08s/it]

Epoch [38/40], Step [30/196], Loss: 0.3061


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [38/40], Step [40/196], Loss: 0.3046


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [38/40], Step [50/196], Loss: 0.3052


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [38/40], Step [60/196], Loss: 0.3120


 36%|███▌      | 70/196 [02:34<04:26,  2.11s/it]

Epoch [38/40], Step [70/196], Loss: 0.3063


 41%|████      | 80/196 [02:55<04:01,  2.09s/it]

Epoch [38/40], Step [80/196], Loss: 0.3103


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [38/40], Step [90/196], Loss: 0.3089


 51%|█████     | 100/196 [03:37<03:23,  2.12s/it]

Epoch [38/40], Step [100/196], Loss: 0.3012


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [38/40], Step [110/196], Loss: 0.3054


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [38/40], Step [120/196], Loss: 0.3044


 66%|██████▋   | 130/196 [04:40<02:18,  2.09s/it]

Epoch [38/40], Step [130/196], Loss: 0.3044


 71%|███████▏  | 140/196 [05:01<01:57,  2.09s/it]

Epoch [38/40], Step [140/196], Loss: 0.3084


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [38/40], Step [150/196], Loss: 0.3085


 82%|████████▏ | 160/196 [05:43<01:15,  2.10s/it]

Epoch [38/40], Step [160/196], Loss: 0.3062


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [38/40], Step [170/196], Loss: 0.3109


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [38/40], Step [180/196], Loss: 0.3050


 97%|█████████▋| 190/196 [06:46<00:12,  2.11s/it]

Epoch [38/40], Step [190/196], Loss: 0.3089


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 68.87 %
Saved best model to student_model_alg2.pth


  5%|▌         | 10/196 [00:26<06:55,  2.23s/it]

Epoch [39/40], Step [10/196], Loss: 0.3050


 10%|█         | 20/196 [00:48<06:20,  2.16s/it]

Epoch [39/40], Step [20/196], Loss: 0.3022


 15%|█▌        | 30/196 [01:09<05:45,  2.08s/it]

Epoch [39/40], Step [30/196], Loss: 0.3025


 20%|██        | 40/196 [01:29<05:22,  2.07s/it]

Epoch [39/40], Step [40/196], Loss: 0.3048


 26%|██▌       | 50/196 [01:50<05:06,  2.10s/it]

Epoch [39/40], Step [50/196], Loss: 0.2954


 31%|███       | 60/196 [02:11<04:49,  2.13s/it]

Epoch [39/40], Step [60/196], Loss: 0.3125


 36%|███▌      | 70/196 [02:33<04:28,  2.13s/it]

Epoch [39/40], Step [70/196], Loss: 0.2990


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [39/40], Step [80/196], Loss: 0.3061


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [39/40], Step [90/196], Loss: 0.3034


 51%|█████     | 100/196 [03:36<03:22,  2.11s/it]

Epoch [39/40], Step [100/196], Loss: 0.2963


 56%|█████▌    | 110/196 [03:57<03:01,  2.12s/it]

Epoch [39/40], Step [110/196], Loss: 0.3003


 61%|██████    | 120/196 [04:18<02:39,  2.10s/it]

Epoch [39/40], Step [120/196], Loss: 0.3014


 66%|██████▋   | 130/196 [04:39<02:18,  2.09s/it]

Epoch [39/40], Step [130/196], Loss: 0.2985


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [39/40], Step [140/196], Loss: 0.3006


 77%|███████▋  | 150/196 [05:21<01:37,  2.11s/it]

Epoch [39/40], Step [150/196], Loss: 0.3014


 82%|████████▏ | 160/196 [05:42<01:16,  2.11s/it]

Epoch [39/40], Step [160/196], Loss: 0.2959


 87%|████████▋ | 170/196 [06:03<00:54,  2.10s/it]

Epoch [39/40], Step [170/196], Loss: 0.3041


 92%|█████████▏| 180/196 [06:24<00:33,  2.10s/it]

Epoch [39/40], Step [180/196], Loss: 0.3020


 97%|█████████▋| 190/196 [06:45<00:12,  2.10s/it]

Epoch [39/40], Step [190/196], Loss: 0.3058


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 68.72 %


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [40/40], Step [10/196], Loss: 0.2948


 10%|█         | 20/196 [00:49<06:17,  2.14s/it]

Epoch [40/40], Step [20/196], Loss: 0.2937


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [40/40], Step [30/196], Loss: 0.2943


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [40/40], Step [40/196], Loss: 0.3023


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [40/40], Step [50/196], Loss: 0.3028


 31%|███       | 60/196 [02:13<04:50,  2.13s/it]

Epoch [40/40], Step [60/196], Loss: 0.2979


 36%|███▌      | 70/196 [02:34<04:26,  2.12s/it]

Epoch [40/40], Step [70/196], Loss: 0.3043


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [40/40], Step [80/196], Loss: 0.2976


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [40/40], Step [90/196], Loss: 0.2996


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [40/40], Step [100/196], Loss: 0.2953


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [40/40], Step [110/196], Loss: 0.3006


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [40/40], Step [120/196], Loss: 0.2971


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [40/40], Step [130/196], Loss: 0.3027


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [40/40], Step [140/196], Loss: 0.2957


 77%|███████▋  | 150/196 [05:22<01:36,  2.10s/it]

Epoch [40/40], Step [150/196], Loss: 0.2937


 82%|████████▏ | 160/196 [05:43<01:15,  2.10s/it]

Epoch [40/40], Step [160/196], Loss: 0.2983


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [40/40], Step [170/196], Loss: 0.2914


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [40/40], Step [180/196], Loss: 0.2991


 97%|█████████▋| 190/196 [06:47<00:12,  2.10s/it]

Epoch [40/40], Step [190/196], Loss: 0.2968


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 68.82 %
Best Accuracy: 68.87 %
Student model (Algorithm 2) saved to student_model_alg2.pth with best accuracy: 68.87%
Training Student Model (ResNet-18) with both Teacher and TA Models (New Distillation Algorithm, Algorithm 1)


  scaler = GradScaler()


Adjusted learning rate: 0.2


  with autocast():
  5%|▌         | 10/196 [00:25<06:53,  2.22s/it]

Epoch [1/40], Step [10/196], Loss: 6.9025


 10%|█         | 20/196 [00:47<06:20,  2.16s/it]

Epoch [1/40], Step [20/196], Loss: 6.4406


 15%|█▌        | 30/196 [01:08<05:46,  2.09s/it]

Epoch [1/40], Step [30/196], Loss: 6.1122


 20%|██        | 40/196 [01:28<05:22,  2.07s/it]

Epoch [1/40], Step [40/196], Loss: 6.0186


 26%|██▌       | 50/196 [01:49<05:05,  2.09s/it]

Epoch [1/40], Step [50/196], Loss: 5.9413


 31%|███       | 60/196 [02:10<04:49,  2.13s/it]

Epoch [1/40], Step [60/196], Loss: 5.7782


 36%|███▌      | 70/196 [02:32<04:27,  2.12s/it]

Epoch [1/40], Step [70/196], Loss: 5.6953


 41%|████      | 80/196 [02:53<04:04,  2.11s/it]

Epoch [1/40], Step [80/196], Loss: 5.5517


 46%|████▌     | 90/196 [03:14<03:42,  2.10s/it]

Epoch [1/40], Step [90/196], Loss: 5.5222


 51%|█████     | 100/196 [03:35<03:22,  2.11s/it]

Epoch [1/40], Step [100/196], Loss: 5.3430


 56%|█████▌    | 110/196 [03:56<03:01,  2.11s/it]

Epoch [1/40], Step [110/196], Loss: 5.2226


 61%|██████    | 120/196 [04:17<02:40,  2.11s/it]

Epoch [1/40], Step [120/196], Loss: 5.1776


 66%|██████▋   | 130/196 [04:38<02:19,  2.11s/it]

Epoch [1/40], Step [130/196], Loss: 5.0270


 71%|███████▏  | 140/196 [04:59<01:57,  2.11s/it]

Epoch [1/40], Step [140/196], Loss: 4.9433


 77%|███████▋  | 150/196 [05:20<01:36,  2.11s/it]

Epoch [1/40], Step [150/196], Loss: 4.8001


 82%|████████▏ | 160/196 [05:41<01:15,  2.10s/it]

Epoch [1/40], Step [160/196], Loss: 4.8135


 87%|████████▋ | 170/196 [06:02<00:54,  2.11s/it]

Epoch [1/40], Step [170/196], Loss: 4.7685


 92%|█████████▏| 180/196 [06:23<00:33,  2.11s/it]

Epoch [1/40], Step [180/196], Loss: 4.7270


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [1/40], Step [190/196], Loss: 4.5659


100%|██████████| 196/196 [06:56<00:00,  2.12s/it]
  with autocast():


Test Accuracy of the student model on the test images: 7.72 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:29<07:04,  2.28s/it]

Epoch [2/40], Step [10/196], Loss: 4.5443


 10%|█         | 20/196 [00:51<06:20,  2.16s/it]

Epoch [2/40], Step [20/196], Loss: 4.3959


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [2/40], Step [30/196], Loss: 4.3781


 20%|██        | 40/196 [01:32<05:22,  2.07s/it]

Epoch [2/40], Step [40/196], Loss: 4.4034


 26%|██▌       | 50/196 [01:53<05:06,  2.10s/it]

Epoch [2/40], Step [50/196], Loss: 4.3056


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [2/40], Step [60/196], Loss: 4.2618


 36%|███▌      | 70/196 [02:36<04:27,  2.12s/it]

Epoch [2/40], Step [70/196], Loss: 4.2377


 41%|████      | 80/196 [02:57<04:03,  2.10s/it]

Epoch [2/40], Step [80/196], Loss: 4.2223


 46%|████▌     | 90/196 [03:18<03:42,  2.10s/it]

Epoch [2/40], Step [90/196], Loss: 4.0773


 51%|█████     | 100/196 [03:39<03:22,  2.11s/it]

Epoch [2/40], Step [100/196], Loss: 4.0744


 56%|█████▌    | 110/196 [04:00<03:01,  2.11s/it]

Epoch [2/40], Step [110/196], Loss: 3.9610


 61%|██████    | 120/196 [04:21<02:40,  2.12s/it]

Epoch [2/40], Step [120/196], Loss: 3.9762


 66%|██████▋   | 130/196 [04:42<02:19,  2.11s/it]

Epoch [2/40], Step [130/196], Loss: 3.9099


 71%|███████▏  | 140/196 [05:03<01:57,  2.10s/it]

Epoch [2/40], Step [140/196], Loss: 3.8840


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [2/40], Step [150/196], Loss: 3.8348


 82%|████████▏ | 160/196 [05:45<01:15,  2.11s/it]

Epoch [2/40], Step [160/196], Loss: 3.8182


 87%|████████▋ | 170/196 [06:06<00:55,  2.12s/it]

Epoch [2/40], Step [170/196], Loss: 3.8322


 92%|█████████▏| 180/196 [06:27<00:33,  2.12s/it]

Epoch [2/40], Step [180/196], Loss: 3.7311


 97%|█████████▋| 190/196 [06:49<00:12,  2.11s/it]

Epoch [2/40], Step [190/196], Loss: 3.7450


100%|██████████| 196/196 [07:00<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 18.72 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:30<07:08,  2.30s/it]

Epoch [3/40], Step [10/196], Loss: 3.7362


 10%|█         | 20/196 [00:52<06:20,  2.16s/it]

Epoch [3/40], Step [20/196], Loss: 3.6394


 15%|█▌        | 30/196 [01:13<05:45,  2.08s/it]

Epoch [3/40], Step [30/196], Loss: 3.6068


 20%|██        | 40/196 [01:34<05:23,  2.07s/it]

Epoch [3/40], Step [40/196], Loss: 3.5700


 26%|██▌       | 50/196 [01:55<05:06,  2.10s/it]

Epoch [3/40], Step [50/196], Loss: 3.5267


 31%|███       | 60/196 [02:16<04:50,  2.14s/it]

Epoch [3/40], Step [60/196], Loss: 3.5057


 36%|███▌      | 70/196 [02:37<04:27,  2.13s/it]

Epoch [3/40], Step [70/196], Loss: 3.4606


 41%|████      | 80/196 [02:58<04:03,  2.10s/it]

Epoch [3/40], Step [80/196], Loss: 3.3516


 46%|████▌     | 90/196 [03:19<03:43,  2.10s/it]

Epoch [3/40], Step [90/196], Loss: 3.3623


 51%|█████     | 100/196 [03:40<03:22,  2.11s/it]

Epoch [3/40], Step [100/196], Loss: 3.3665


 56%|█████▌    | 110/196 [04:02<03:02,  2.12s/it]

Epoch [3/40], Step [110/196], Loss: 3.3474


 61%|██████    | 120/196 [04:23<02:40,  2.11s/it]

Epoch [3/40], Step [120/196], Loss: 3.3461


 66%|██████▋   | 130/196 [04:44<02:19,  2.11s/it]

Epoch [3/40], Step [130/196], Loss: 3.2242


 71%|███████▏  | 140/196 [05:05<01:57,  2.11s/it]

Epoch [3/40], Step [140/196], Loss: 3.2902


 77%|███████▋  | 150/196 [05:26<01:36,  2.11s/it]

Epoch [3/40], Step [150/196], Loss: 3.2022


 82%|████████▏ | 160/196 [05:47<01:16,  2.12s/it]

Epoch [3/40], Step [160/196], Loss: 3.1872


 87%|████████▋ | 170/196 [06:08<00:54,  2.12s/it]

Epoch [3/40], Step [170/196], Loss: 3.0951


 92%|█████████▏| 180/196 [06:29<00:33,  2.11s/it]

Epoch [3/40], Step [180/196], Loss: 3.0616


 97%|█████████▋| 190/196 [06:50<00:12,  2.11s/it]

Epoch [3/40], Step [190/196], Loss: 3.1193


100%|██████████| 196/196 [07:02<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 25.22 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:59,  2.25s/it]

Epoch [4/40], Step [10/196], Loss: 3.0781


 10%|█         | 20/196 [00:49<06:21,  2.17s/it]

Epoch [4/40], Step [20/196], Loss: 2.9961


 15%|█▌        | 30/196 [01:10<05:47,  2.09s/it]

Epoch [4/40], Step [30/196], Loss: 2.9759


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [4/40], Step [40/196], Loss: 2.9376


 26%|██▌       | 50/196 [01:52<05:08,  2.11s/it]

Epoch [4/40], Step [50/196], Loss: 2.9351


 31%|███       | 60/196 [02:13<04:50,  2.14s/it]

Epoch [4/40], Step [60/196], Loss: 2.8707


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [4/40], Step [70/196], Loss: 2.8492


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [4/40], Step [80/196], Loss: 2.7851


 46%|████▌     | 90/196 [03:16<03:43,  2.10s/it]

Epoch [4/40], Step [90/196], Loss: 2.7923


 51%|█████     | 100/196 [03:37<03:23,  2.12s/it]

Epoch [4/40], Step [100/196], Loss: 2.7979


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [4/40], Step [110/196], Loss: 2.8200


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [4/40], Step [120/196], Loss: 2.7453


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [4/40], Step [130/196], Loss: 2.7184


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [4/40], Step [140/196], Loss: 2.7839


 77%|███████▋  | 150/196 [05:23<01:37,  2.11s/it]

Epoch [4/40], Step [150/196], Loss: 2.6995


 82%|████████▏ | 160/196 [05:44<01:16,  2.11s/it]

Epoch [4/40], Step [160/196], Loss: 2.7033


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [4/40], Step [170/196], Loss: 2.6650


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [4/40], Step [180/196], Loss: 2.7073


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [4/40], Step [190/196], Loss: 2.6190


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 26.08 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:30<07:04,  2.28s/it]

Epoch [5/40], Step [10/196], Loss: 2.6100


 10%|█         | 20/196 [00:52<06:20,  2.16s/it]

Epoch [5/40], Step [20/196], Loss: 2.5660


 15%|█▌        | 30/196 [01:13<05:46,  2.09s/it]

Epoch [5/40], Step [30/196], Loss: 2.5559


 20%|██        | 40/196 [01:34<05:23,  2.07s/it]

Epoch [5/40], Step [40/196], Loss: 2.4455


 26%|██▌       | 50/196 [01:54<05:07,  2.10s/it]

Epoch [5/40], Step [50/196], Loss: 2.4901


 31%|███       | 60/196 [02:16<04:49,  2.13s/it]

Epoch [5/40], Step [60/196], Loss: 2.4508


 36%|███▌      | 70/196 [02:37<04:27,  2.12s/it]

Epoch [5/40], Step [70/196], Loss: 2.5145


 41%|████      | 80/196 [02:58<04:03,  2.10s/it]

Epoch [5/40], Step [80/196], Loss: 2.4829


 46%|████▌     | 90/196 [03:19<03:42,  2.10s/it]

Epoch [5/40], Step [90/196], Loss: 2.4027


 51%|█████     | 100/196 [03:40<03:23,  2.12s/it]

Epoch [5/40], Step [100/196], Loss: 2.4338


 56%|█████▌    | 110/196 [04:01<03:02,  2.12s/it]

Epoch [5/40], Step [110/196], Loss: 2.3865


 61%|██████    | 120/196 [04:22<02:39,  2.10s/it]

Epoch [5/40], Step [120/196], Loss: 2.3531


 66%|██████▋   | 130/196 [04:43<02:18,  2.10s/it]

Epoch [5/40], Step [130/196], Loss: 2.3644


 71%|███████▏  | 140/196 [05:04<01:57,  2.10s/it]

Epoch [5/40], Step [140/196], Loss: 2.3724


 77%|███████▋  | 150/196 [05:25<01:36,  2.10s/it]

Epoch [5/40], Step [150/196], Loss: 2.4114


 82%|████████▏ | 160/196 [05:46<01:15,  2.11s/it]

Epoch [5/40], Step [160/196], Loss: 2.3660


 87%|████████▋ | 170/196 [06:07<00:54,  2.11s/it]

Epoch [5/40], Step [170/196], Loss: 2.2947


 92%|█████████▏| 180/196 [06:29<00:33,  2.11s/it]

Epoch [5/40], Step [180/196], Loss: 2.2725


 97%|█████████▋| 190/196 [06:50<00:12,  2.11s/it]

Epoch [5/40], Step [190/196], Loss: 2.3128


100%|██████████| 196/196 [07:01<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 30.42 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [6/40], Step [10/196], Loss: 2.2186


 10%|█         | 20/196 [00:50<06:20,  2.16s/it]

Epoch [6/40], Step [20/196], Loss: 2.1977


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [6/40], Step [30/196], Loss: 2.2221


 20%|██        | 40/196 [01:32<05:24,  2.08s/it]

Epoch [6/40], Step [40/196], Loss: 2.1916


 26%|██▌       | 50/196 [01:53<05:07,  2.11s/it]

Epoch [6/40], Step [50/196], Loss: 2.1800


 31%|███       | 60/196 [02:14<04:50,  2.14s/it]

Epoch [6/40], Step [60/196], Loss: 2.1986


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [6/40], Step [70/196], Loss: 2.1954


 41%|████      | 80/196 [02:56<04:04,  2.11s/it]

Epoch [6/40], Step [80/196], Loss: 2.1762


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [6/40], Step [90/196], Loss: 2.1251


 51%|█████     | 100/196 [03:38<03:21,  2.10s/it]

Epoch [6/40], Step [100/196], Loss: 2.1075


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [6/40], Step [110/196], Loss: 2.1554


 61%|██████    | 120/196 [04:21<02:40,  2.11s/it]

Epoch [6/40], Step [120/196], Loss: 2.1154


 66%|██████▋   | 130/196 [04:42<02:19,  2.11s/it]

Epoch [6/40], Step [130/196], Loss: 2.1442


 71%|███████▏  | 140/196 [05:03<01:57,  2.10s/it]

Epoch [6/40], Step [140/196], Loss: 2.1286


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [6/40], Step [150/196], Loss: 2.1304


 82%|████████▏ | 160/196 [05:45<01:15,  2.10s/it]

Epoch [6/40], Step [160/196], Loss: 2.0989


 87%|████████▋ | 170/196 [06:06<00:54,  2.10s/it]

Epoch [6/40], Step [170/196], Loss: 2.0796


 92%|█████████▏| 180/196 [06:27<00:33,  2.10s/it]

Epoch [6/40], Step [180/196], Loss: 2.1013


 97%|█████████▋| 190/196 [06:48<00:12,  2.11s/it]

Epoch [6/40], Step [190/196], Loss: 2.1100


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 41.51 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:59,  2.25s/it]

Epoch [7/40], Step [10/196], Loss: 2.0157


 10%|█         | 20/196 [00:49<06:21,  2.17s/it]

Epoch [7/40], Step [20/196], Loss: 1.9938


 15%|█▌        | 30/196 [01:10<05:46,  2.08s/it]

Epoch [7/40], Step [30/196], Loss: 2.0286


 20%|██        | 40/196 [01:30<05:23,  2.07s/it]

Epoch [7/40], Step [40/196], Loss: 1.9381


 26%|██▌       | 50/196 [01:51<05:05,  2.10s/it]

Epoch [7/40], Step [50/196], Loss: 1.9942


 31%|███       | 60/196 [02:12<04:50,  2.13s/it]

Epoch [7/40], Step [60/196], Loss: 1.9639


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [7/40], Step [70/196], Loss: 1.9692


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [7/40], Step [80/196], Loss: 1.9381


 46%|████▌     | 90/196 [03:15<03:41,  2.09s/it]

Epoch [7/40], Step [90/196], Loss: 1.9442


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [7/40], Step [100/196], Loss: 1.9187


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [7/40], Step [110/196], Loss: 1.9519


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [7/40], Step [120/196], Loss: 1.9389


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [7/40], Step [130/196], Loss: 1.9521


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [7/40], Step [140/196], Loss: 1.9590


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [7/40], Step [150/196], Loss: 1.9037


 82%|████████▏ | 160/196 [05:43<01:15,  2.10s/it]

Epoch [7/40], Step [160/196], Loss: 1.9609


 87%|████████▋ | 170/196 [06:04<00:54,  2.10s/it]

Epoch [7/40], Step [170/196], Loss: 1.9682


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [7/40], Step [180/196], Loss: 1.8943


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [7/40], Step [190/196], Loss: 1.9033


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 33.99 %


  5%|▌         | 10/196 [00:29<07:08,  2.30s/it]

Epoch [8/40], Step [10/196], Loss: 1.9147


 10%|█         | 20/196 [00:51<06:19,  2.16s/it]

Epoch [8/40], Step [20/196], Loss: 1.8132


 15%|█▌        | 30/196 [01:12<05:45,  2.08s/it]

Epoch [8/40], Step [30/196], Loss: 1.8411


 20%|██        | 40/196 [01:32<05:23,  2.07s/it]

Epoch [8/40], Step [40/196], Loss: 1.7964


 26%|██▌       | 50/196 [01:53<05:07,  2.11s/it]

Epoch [8/40], Step [50/196], Loss: 1.8107


 31%|███       | 60/196 [02:15<04:49,  2.13s/it]

Epoch [8/40], Step [60/196], Loss: 1.8023


 36%|███▌      | 70/196 [02:36<04:26,  2.12s/it]

Epoch [8/40], Step [70/196], Loss: 1.7465


 41%|████      | 80/196 [02:57<04:03,  2.10s/it]

Epoch [8/40], Step [80/196], Loss: 1.7758


 46%|████▌     | 90/196 [03:18<03:42,  2.10s/it]

Epoch [8/40], Step [90/196], Loss: 1.8274


 51%|█████     | 100/196 [03:39<03:23,  2.12s/it]

Epoch [8/40], Step [100/196], Loss: 1.7933


 56%|█████▌    | 110/196 [04:00<03:02,  2.12s/it]

Epoch [8/40], Step [110/196], Loss: 1.8135


 61%|██████    | 120/196 [04:21<02:39,  2.11s/it]

Epoch [8/40], Step [120/196], Loss: 1.8188


 66%|██████▋   | 130/196 [04:42<02:18,  2.10s/it]

Epoch [8/40], Step [130/196], Loss: 1.7826


 71%|███████▏  | 140/196 [05:03<01:57,  2.10s/it]

Epoch [8/40], Step [140/196], Loss: 1.8102


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [8/40], Step [150/196], Loss: 1.8024


 82%|████████▏ | 160/196 [05:45<01:15,  2.10s/it]

Epoch [8/40], Step [160/196], Loss: 1.8337


 87%|████████▋ | 170/196 [06:06<00:54,  2.10s/it]

Epoch [8/40], Step [170/196], Loss: 1.8231


 92%|█████████▏| 180/196 [06:27<00:33,  2.11s/it]

Epoch [8/40], Step [180/196], Loss: 1.8222


 97%|█████████▋| 190/196 [06:48<00:12,  2.11s/it]

Epoch [8/40], Step [190/196], Loss: 1.8133


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 38.87 %


  5%|▌         | 10/196 [00:27<07:00,  2.26s/it]

Epoch [9/40], Step [10/196], Loss: 1.7014


 10%|█         | 20/196 [00:49<06:19,  2.16s/it]

Epoch [9/40], Step [20/196], Loss: 1.6736


 15%|█▌        | 30/196 [01:10<05:44,  2.08s/it]

Epoch [9/40], Step [30/196], Loss: 1.6498


 20%|██        | 40/196 [01:31<05:23,  2.08s/it]

Epoch [9/40], Step [40/196], Loss: 1.6805


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [9/40], Step [50/196], Loss: 1.6595


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [9/40], Step [60/196], Loss: 1.6332


 36%|███▌      | 70/196 [02:34<04:25,  2.11s/it]

Epoch [9/40], Step [70/196], Loss: 1.6467


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [9/40], Step [80/196], Loss: 1.6445


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [9/40], Step [90/196], Loss: 1.6966


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [9/40], Step [100/196], Loss: 1.6342


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [9/40], Step [110/196], Loss: 1.6870


 61%|██████    | 120/196 [04:19<02:39,  2.11s/it]

Epoch [9/40], Step [120/196], Loss: 1.6905


 66%|██████▋   | 130/196 [04:40<02:18,  2.09s/it]

Epoch [9/40], Step [130/196], Loss: 1.7116


 71%|███████▏  | 140/196 [05:01<01:57,  2.09s/it]

Epoch [9/40], Step [140/196], Loss: 1.6782


 77%|███████▋  | 150/196 [05:22<01:36,  2.09s/it]

Epoch [9/40], Step [150/196], Loss: 1.7070


 82%|████████▏ | 160/196 [05:43<01:16,  2.11s/it]

Epoch [9/40], Step [160/196], Loss: 1.6462


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [9/40], Step [170/196], Loss: 1.6801


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [9/40], Step [180/196], Loss: 1.6944


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [9/40], Step [190/196], Loss: 1.6764


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 42.34 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:05,  2.29s/it]

Epoch [10/40], Step [10/196], Loss: 1.6563


 10%|█         | 20/196 [00:50<06:20,  2.16s/it]

Epoch [10/40], Step [20/196], Loss: 1.6060


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [10/40], Step [30/196], Loss: 1.5793


 20%|██        | 40/196 [01:32<05:22,  2.07s/it]

Epoch [10/40], Step [40/196], Loss: 1.5607


 26%|██▌       | 50/196 [01:53<05:07,  2.10s/it]

Epoch [10/40], Step [50/196], Loss: 1.5841


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [10/40], Step [60/196], Loss: 1.5470


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [10/40], Step [70/196], Loss: 1.5801


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [10/40], Step [80/196], Loss: 1.5714


 46%|████▌     | 90/196 [03:17<03:41,  2.09s/it]

Epoch [10/40], Step [90/196], Loss: 1.5511


 51%|█████     | 100/196 [03:38<03:21,  2.10s/it]

Epoch [10/40], Step [100/196], Loss: 1.5495


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [10/40], Step [110/196], Loss: 1.5713


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [10/40], Step [120/196], Loss: 1.5563


 66%|██████▋   | 130/196 [04:41<02:18,  2.11s/it]

Epoch [10/40], Step [130/196], Loss: 1.5706


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [10/40], Step [140/196], Loss: 1.5807


 77%|███████▋  | 150/196 [05:23<01:36,  2.10s/it]

Epoch [10/40], Step [150/196], Loss: 1.6170


 82%|████████▏ | 160/196 [05:44<01:15,  2.10s/it]

Epoch [10/40], Step [160/196], Loss: 1.5645


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [10/40], Step [170/196], Loss: 1.5826


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [10/40], Step [180/196], Loss: 1.6072


 97%|█████████▋| 190/196 [06:48<00:12,  2.10s/it]

Epoch [10/40], Step [190/196], Loss: 1.5857


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 42.41 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:29<07:03,  2.28s/it]

Epoch [11/40], Step [10/196], Loss: 1.5426


 10%|█         | 20/196 [00:51<06:19,  2.16s/it]

Epoch [11/40], Step [20/196], Loss: 1.4900


 15%|█▌        | 30/196 [01:12<05:45,  2.08s/it]

Epoch [11/40], Step [30/196], Loss: 1.4823


 20%|██        | 40/196 [01:33<05:22,  2.07s/it]

Epoch [11/40], Step [40/196], Loss: 1.4386


 26%|██▌       | 50/196 [01:54<05:06,  2.10s/it]

Epoch [11/40], Step [50/196], Loss: 1.4538


 31%|███       | 60/196 [02:15<04:49,  2.13s/it]

Epoch [11/40], Step [60/196], Loss: 1.5034


 36%|███▌      | 70/196 [02:36<04:26,  2.12s/it]

Epoch [11/40], Step [70/196], Loss: 1.4534


 41%|████      | 80/196 [02:57<04:01,  2.08s/it]

Epoch [11/40], Step [80/196], Loss: 1.4480


 46%|████▌     | 90/196 [03:18<03:42,  2.10s/it]

Epoch [11/40], Step [90/196], Loss: 1.4707


 51%|█████     | 100/196 [03:39<03:22,  2.11s/it]

Epoch [11/40], Step [100/196], Loss: 1.4818


 56%|█████▌    | 110/196 [04:00<03:01,  2.11s/it]

Epoch [11/40], Step [110/196], Loss: 1.4717


 61%|██████    | 120/196 [04:21<02:40,  2.11s/it]

Epoch [11/40], Step [120/196], Loss: 1.4783


 66%|██████▋   | 130/196 [04:42<02:18,  2.10s/it]

Epoch [11/40], Step [130/196], Loss: 1.4974


 71%|███████▏  | 140/196 [05:03<01:57,  2.10s/it]

Epoch [11/40], Step [140/196], Loss: 1.4878


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [11/40], Step [150/196], Loss: 1.5106


 82%|████████▏ | 160/196 [05:45<01:15,  2.10s/it]

Epoch [11/40], Step [160/196], Loss: 1.5325


 87%|████████▋ | 170/196 [06:06<00:54,  2.10s/it]

Epoch [11/40], Step [170/196], Loss: 1.4927


 92%|█████████▏| 180/196 [06:28<00:33,  2.11s/it]

Epoch [11/40], Step [180/196], Loss: 1.4972


 97%|█████████▋| 190/196 [06:49<00:12,  2.11s/it]

Epoch [11/40], Step [190/196], Loss: 1.5014


100%|██████████| 196/196 [07:00<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 48.39 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:58,  2.25s/it]

Epoch [12/40], Step [10/196], Loss: 1.4505


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [12/40], Step [20/196], Loss: 1.4101


 15%|█▌        | 30/196 [01:10<05:46,  2.09s/it]

Epoch [12/40], Step [30/196], Loss: 1.3765


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [12/40], Step [40/196], Loss: 1.3790


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [12/40], Step [50/196], Loss: 1.3933


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [12/40], Step [60/196], Loss: 1.3877


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [12/40], Step [70/196], Loss: 1.4144


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [12/40], Step [80/196], Loss: 1.4232


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [12/40], Step [90/196], Loss: 1.4142


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [12/40], Step [100/196], Loss: 1.4162


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [12/40], Step [110/196], Loss: 1.4184


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [12/40], Step [120/196], Loss: 1.4197


 66%|██████▋   | 130/196 [04:40<02:17,  2.09s/it]

Epoch [12/40], Step [130/196], Loss: 1.4050


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [12/40], Step [140/196], Loss: 1.4205


 77%|███████▋  | 150/196 [05:22<01:37,  2.11s/it]

Epoch [12/40], Step [150/196], Loss: 1.4068


 82%|████████▏ | 160/196 [05:44<01:16,  2.11s/it]

Epoch [12/40], Step [160/196], Loss: 1.3930


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [12/40], Step [170/196], Loss: 1.4181


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [12/40], Step [180/196], Loss: 1.4514


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [12/40], Step [190/196], Loss: 1.4657


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 42.42 %


  5%|▌         | 10/196 [00:26<06:57,  2.25s/it]

Epoch [13/40], Step [10/196], Loss: 1.3555


 10%|█         | 20/196 [00:48<06:21,  2.16s/it]

Epoch [13/40], Step [20/196], Loss: 1.3583


 15%|█▌        | 30/196 [01:09<05:46,  2.09s/it]

Epoch [13/40], Step [30/196], Loss: 1.3039


 20%|██        | 40/196 [01:30<05:23,  2.07s/it]

Epoch [13/40], Step [40/196], Loss: 1.2892


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [13/40], Step [50/196], Loss: 1.2816


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [13/40], Step [60/196], Loss: 1.3251


 36%|███▌      | 70/196 [02:33<04:28,  2.13s/it]

Epoch [13/40], Step [70/196], Loss: 1.3306


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [13/40], Step [80/196], Loss: 1.3061


 46%|████▌     | 90/196 [03:15<03:40,  2.08s/it]

Epoch [13/40], Step [90/196], Loss: 1.3503


 51%|█████     | 100/196 [03:36<03:21,  2.10s/it]

Epoch [13/40], Step [100/196], Loss: 1.3448


 56%|█████▌    | 110/196 [03:57<03:02,  2.12s/it]

Epoch [13/40], Step [110/196], Loss: 1.3471


 61%|██████    | 120/196 [04:18<02:40,  2.11s/it]

Epoch [13/40], Step [120/196], Loss: 1.3417


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [13/40], Step [130/196], Loss: 1.3434


 71%|███████▏  | 140/196 [05:00<01:57,  2.09s/it]

Epoch [13/40], Step [140/196], Loss: 1.3339


 77%|███████▋  | 150/196 [05:21<01:37,  2.11s/it]

Epoch [13/40], Step [150/196], Loss: 1.3469


 82%|████████▏ | 160/196 [05:42<01:15,  2.11s/it]

Epoch [13/40], Step [160/196], Loss: 1.3676


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [13/40], Step [170/196], Loss: 1.3926


 92%|█████████▏| 180/196 [06:25<00:33,  2.11s/it]

Epoch [13/40], Step [180/196], Loss: 1.3869


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [13/40], Step [190/196], Loss: 1.3502


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 46.31 %


  5%|▌         | 10/196 [00:30<07:06,  2.29s/it]

Epoch [14/40], Step [10/196], Loss: 1.3175


 10%|█         | 20/196 [00:52<06:20,  2.16s/it]

Epoch [14/40], Step [20/196], Loss: 1.3003


 15%|█▌        | 30/196 [01:13<05:47,  2.10s/it]

Epoch [14/40], Step [30/196], Loss: 1.2877


 20%|██        | 40/196 [01:33<05:23,  2.07s/it]

Epoch [14/40], Step [40/196], Loss: 1.2457


 26%|██▌       | 50/196 [01:54<05:06,  2.10s/it]

Epoch [14/40], Step [50/196], Loss: 1.2496


 31%|███       | 60/196 [02:16<04:50,  2.14s/it]

Epoch [14/40], Step [60/196], Loss: 1.2371


 36%|███▌      | 70/196 [02:37<04:27,  2.13s/it]

Epoch [14/40], Step [70/196], Loss: 1.2483


 41%|████      | 80/196 [02:58<04:03,  2.10s/it]

Epoch [14/40], Step [80/196], Loss: 1.2700


 46%|████▌     | 90/196 [03:19<03:41,  2.09s/it]

Epoch [14/40], Step [90/196], Loss: 1.2717


 51%|█████     | 100/196 [03:40<03:22,  2.11s/it]

Epoch [14/40], Step [100/196], Loss: 1.3060


 56%|█████▌    | 110/196 [04:01<03:01,  2.11s/it]

Epoch [14/40], Step [110/196], Loss: 1.2932


 61%|██████    | 120/196 [04:22<02:40,  2.11s/it]

Epoch [14/40], Step [120/196], Loss: 1.2874


 66%|██████▋   | 130/196 [04:43<02:18,  2.11s/it]

Epoch [14/40], Step [130/196], Loss: 1.2861


 71%|███████▏  | 140/196 [05:04<01:57,  2.10s/it]

Epoch [14/40], Step [140/196], Loss: 1.3029


 77%|███████▋  | 150/196 [05:25<01:36,  2.10s/it]

Epoch [14/40], Step [150/196], Loss: 1.2871


 82%|████████▏ | 160/196 [05:46<01:15,  2.10s/it]

Epoch [14/40], Step [160/196], Loss: 1.2912


 87%|████████▋ | 170/196 [06:07<00:54,  2.10s/it]

Epoch [14/40], Step [170/196], Loss: 1.3136


 92%|█████████▏| 180/196 [06:28<00:33,  2.11s/it]

Epoch [14/40], Step [180/196], Loss: 1.3131


 97%|█████████▋| 190/196 [06:49<00:12,  2.11s/it]

Epoch [14/40], Step [190/196], Loss: 1.3181


100%|██████████| 196/196 [07:01<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 47.98 %


  5%|▌         | 10/196 [00:28<06:58,  2.25s/it]

Epoch [15/40], Step [10/196], Loss: 1.2422


 10%|█         | 20/196 [00:50<06:20,  2.16s/it]

Epoch [15/40], Step [20/196], Loss: 1.2160


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [15/40], Step [30/196], Loss: 1.2028


 20%|██        | 40/196 [01:31<05:24,  2.08s/it]

Epoch [15/40], Step [40/196], Loss: 1.2130


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [15/40], Step [50/196], Loss: 1.1948


 31%|███       | 60/196 [02:13<04:50,  2.13s/it]

Epoch [15/40], Step [60/196], Loss: 1.2093


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [15/40], Step [70/196], Loss: 1.2033


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [15/40], Step [80/196], Loss: 1.2378


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [15/40], Step [90/196], Loss: 1.2409


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [15/40], Step [100/196], Loss: 1.2194


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [15/40], Step [110/196], Loss: 1.2237


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [15/40], Step [120/196], Loss: 1.2217


 66%|██████▋   | 130/196 [04:41<02:19,  2.11s/it]

Epoch [15/40], Step [130/196], Loss: 1.2088


 71%|███████▏  | 140/196 [05:02<01:58,  2.11s/it]

Epoch [15/40], Step [140/196], Loss: 1.2049


 77%|███████▋  | 150/196 [05:23<01:37,  2.12s/it]

Epoch [15/40], Step [150/196], Loss: 1.2215


 82%|████████▏ | 160/196 [05:44<01:15,  2.11s/it]

Epoch [15/40], Step [160/196], Loss: 1.2274


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [15/40], Step [170/196], Loss: 1.2132


 92%|█████████▏| 180/196 [06:26<00:33,  2.10s/it]

Epoch [15/40], Step [180/196], Loss: 1.2413


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [15/40], Step [190/196], Loss: 1.2472


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 50.22 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:03,  2.27s/it]

Epoch [16/40], Step [10/196], Loss: 1.2193


 10%|█         | 20/196 [00:50<06:19,  2.16s/it]

Epoch [16/40], Step [20/196], Loss: 1.1688


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [16/40], Step [30/196], Loss: 1.1470


 20%|██        | 40/196 [01:32<05:22,  2.07s/it]

Epoch [16/40], Step [40/196], Loss: 1.1724


 26%|██▌       | 50/196 [01:53<05:06,  2.10s/it]

Epoch [16/40], Step [50/196], Loss: 1.1749


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [16/40], Step [60/196], Loss: 1.1492


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [16/40], Step [70/196], Loss: 1.1345


 41%|████      | 80/196 [02:56<04:02,  2.09s/it]

Epoch [16/40], Step [80/196], Loss: 1.1293


 46%|████▌     | 90/196 [03:17<03:41,  2.09s/it]

Epoch [16/40], Step [90/196], Loss: 1.1552


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [16/40], Step [100/196], Loss: 1.1758


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [16/40], Step [110/196], Loss: 1.1831


 61%|██████    | 120/196 [04:20<02:39,  2.11s/it]

Epoch [16/40], Step [120/196], Loss: 1.1941


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [16/40], Step [130/196], Loss: 1.1637


 71%|███████▏  | 140/196 [05:02<01:58,  2.11s/it]

Epoch [16/40], Step [140/196], Loss: 1.1745


 77%|███████▋  | 150/196 [05:24<01:37,  2.12s/it]

Epoch [16/40], Step [150/196], Loss: 1.2055


 82%|████████▏ | 160/196 [05:45<01:16,  2.12s/it]

Epoch [16/40], Step [160/196], Loss: 1.2005


 87%|████████▋ | 170/196 [06:06<00:54,  2.11s/it]

Epoch [16/40], Step [170/196], Loss: 1.1669


 92%|█████████▏| 180/196 [06:27<00:33,  2.11s/it]

Epoch [16/40], Step [180/196], Loss: 1.2103


 97%|█████████▋| 190/196 [06:48<00:12,  2.10s/it]

Epoch [16/40], Step [190/196], Loss: 1.2150


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 50.13 %


  5%|▌         | 10/196 [00:27<06:59,  2.26s/it]

Epoch [17/40], Step [10/196], Loss: 1.1614


 10%|█         | 20/196 [00:49<06:20,  2.16s/it]

Epoch [17/40], Step [20/196], Loss: 1.1176


 15%|█▌        | 30/196 [01:10<05:46,  2.09s/it]

Epoch [17/40], Step [30/196], Loss: 1.0989


 20%|██        | 40/196 [01:31<05:23,  2.07s/it]

Epoch [17/40], Step [40/196], Loss: 1.0793


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [17/40], Step [50/196], Loss: 1.0940


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [17/40], Step [60/196], Loss: 1.0969


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [17/40], Step [70/196], Loss: 1.0833


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [17/40], Step [80/196], Loss: 1.1030


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [17/40], Step [90/196], Loss: 1.1039


 51%|█████     | 100/196 [03:37<03:23,  2.12s/it]

Epoch [17/40], Step [100/196], Loss: 1.0921


 56%|█████▌    | 110/196 [03:58<03:02,  2.13s/it]

Epoch [17/40], Step [110/196], Loss: 1.1128


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [17/40], Step [120/196], Loss: 1.1003


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [17/40], Step [130/196], Loss: 1.1350


 71%|███████▏  | 140/196 [05:01<01:57,  2.09s/it]

Epoch [17/40], Step [140/196], Loss: 1.1031


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [17/40], Step [150/196], Loss: 1.1134


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [17/40], Step [160/196], Loss: 1.1462


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [17/40], Step [170/196], Loss: 1.1777


 92%|█████████▏| 180/196 [06:26<00:33,  2.10s/it]

Epoch [17/40], Step [180/196], Loss: 1.1738


 97%|█████████▋| 190/196 [06:47<00:12,  2.10s/it]

Epoch [17/40], Step [190/196], Loss: 1.1498


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 52.14 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [18/40], Step [10/196], Loss: 1.1078


 10%|█         | 20/196 [00:50<06:19,  2.16s/it]

Epoch [18/40], Step [20/196], Loss: 1.0962


 15%|█▌        | 30/196 [01:11<05:44,  2.08s/it]

Epoch [18/40], Step [30/196], Loss: 1.0686


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [18/40], Step [40/196], Loss: 1.0840


 26%|██▌       | 50/196 [01:52<05:08,  2.11s/it]

Epoch [18/40], Step [50/196], Loss: 1.0611


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [18/40], Step [60/196], Loss: 1.0468


 36%|███▌      | 70/196 [02:35<04:26,  2.12s/it]

Epoch [18/40], Step [70/196], Loss: 1.0644


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [18/40], Step [80/196], Loss: 1.0489


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [18/40], Step [90/196], Loss: 1.0465


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [18/40], Step [100/196], Loss: 1.0514


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [18/40], Step [110/196], Loss: 1.0348


 61%|██████    | 120/196 [04:20<02:39,  2.10s/it]

Epoch [18/40], Step [120/196], Loss: 1.0522


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [18/40], Step [130/196], Loss: 1.0639


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [18/40], Step [140/196], Loss: 1.0658


 77%|███████▋  | 150/196 [05:23<01:36,  2.10s/it]

Epoch [18/40], Step [150/196], Loss: 1.0732


 82%|████████▏ | 160/196 [05:44<01:15,  2.10s/it]

Epoch [18/40], Step [160/196], Loss: 1.0819


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [18/40], Step [170/196], Loss: 1.0858


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [18/40], Step [180/196], Loss: 1.0958


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [18/40], Step [190/196], Loss: 1.1120


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 52.77 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:00,  2.26s/it]

Epoch [19/40], Step [10/196], Loss: 1.0611


 10%|█         | 20/196 [00:50<06:21,  2.17s/it]

Epoch [19/40], Step [20/196], Loss: 1.0072


 15%|█▌        | 30/196 [01:10<05:46,  2.09s/it]

Epoch [19/40], Step [30/196], Loss: 1.0014


 20%|██        | 40/196 [01:31<05:23,  2.07s/it]

Epoch [19/40], Step [40/196], Loss: 1.0052


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [19/40], Step [50/196], Loss: 0.9879


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [19/40], Step [60/196], Loss: 0.9810


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [19/40], Step [70/196], Loss: 0.9770


 41%|████      | 80/196 [02:56<04:02,  2.09s/it]

Epoch [19/40], Step [80/196], Loss: 0.9854


 46%|████▌     | 90/196 [03:17<03:42,  2.09s/it]

Epoch [19/40], Step [90/196], Loss: 1.0055


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [19/40], Step [100/196], Loss: 0.9994


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [19/40], Step [110/196], Loss: 1.0076


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [19/40], Step [120/196], Loss: 1.0326


 66%|██████▋   | 130/196 [04:41<02:18,  2.11s/it]

Epoch [19/40], Step [130/196], Loss: 1.0375


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [19/40], Step [140/196], Loss: 1.0515


 77%|███████▋  | 150/196 [05:23<01:36,  2.10s/it]

Epoch [19/40], Step [150/196], Loss: 1.0261


 82%|████████▏ | 160/196 [05:44<01:15,  2.10s/it]

Epoch [19/40], Step [160/196], Loss: 1.0306


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [19/40], Step [170/196], Loss: 1.0452


 92%|█████████▏| 180/196 [06:26<00:33,  2.10s/it]

Epoch [19/40], Step [180/196], Loss: 1.0573


 97%|█████████▋| 190/196 [06:47<00:12,  2.10s/it]

Epoch [19/40], Step [190/196], Loss: 1.0667


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 55.33 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:26<06:55,  2.23s/it]

Epoch [20/40], Step [10/196], Loss: 1.0316


 10%|█         | 20/196 [00:48<06:19,  2.16s/it]

Epoch [20/40], Step [20/196], Loss: 0.9976


 15%|█▌        | 30/196 [01:09<05:45,  2.08s/it]

Epoch [20/40], Step [30/196], Loss: 0.9833


 20%|██        | 40/196 [01:30<05:22,  2.07s/it]

Epoch [20/40], Step [40/196], Loss: 0.9418


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [20/40], Step [50/196], Loss: 0.9533


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [20/40], Step [60/196], Loss: 0.9492


 36%|███▌      | 70/196 [02:33<04:26,  2.12s/it]

Epoch [20/40], Step [70/196], Loss: 0.9473


 41%|████      | 80/196 [02:54<04:01,  2.09s/it]

Epoch [20/40], Step [80/196], Loss: 0.9483


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [20/40], Step [90/196], Loss: 0.9779


 51%|█████     | 100/196 [03:36<03:23,  2.12s/it]

Epoch [20/40], Step [100/196], Loss: 0.9828


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [20/40], Step [110/196], Loss: 0.9736


 61%|██████    | 120/196 [04:18<02:40,  2.11s/it]

Epoch [20/40], Step [120/196], Loss: 0.9417


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [20/40], Step [130/196], Loss: 0.9677


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [20/40], Step [140/196], Loss: 0.9628


 77%|███████▋  | 150/196 [05:21<01:36,  2.09s/it]

Epoch [20/40], Step [150/196], Loss: 0.9771


 82%|████████▏ | 160/196 [05:42<01:15,  2.10s/it]

Epoch [20/40], Step [160/196], Loss: 0.9810


 87%|████████▋ | 170/196 [06:03<00:54,  2.10s/it]

Epoch [20/40], Step [170/196], Loss: 0.9953


 92%|█████████▏| 180/196 [06:24<00:33,  2.10s/it]

Epoch [20/40], Step [180/196], Loss: 0.9686


 97%|█████████▋| 190/196 [06:45<00:12,  2.10s/it]

Epoch [20/40], Step [190/196], Loss: 0.9951


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 57.14 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<08:36,  2.78s/it]

Epoch [21/40], Step [10/196], Loss: 0.9533


 10%|█         | 20/196 [00:50<06:23,  2.18s/it]

Epoch [21/40], Step [20/196], Loss: 0.9233


 15%|█▌        | 30/196 [01:11<05:46,  2.09s/it]

Epoch [21/40], Step [30/196], Loss: 0.9182


 20%|██        | 40/196 [01:32<05:23,  2.08s/it]

Epoch [21/40], Step [40/196], Loss: 0.9152


 26%|██▌       | 50/196 [01:53<05:06,  2.10s/it]

Epoch [21/40], Step [50/196], Loss: 0.9027


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [21/40], Step [60/196], Loss: 0.8770


 36%|███▌      | 70/196 [02:35<04:26,  2.12s/it]

Epoch [21/40], Step [70/196], Loss: 0.9046


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [21/40], Step [80/196], Loss: 0.8955


 46%|████▌     | 90/196 [03:17<03:41,  2.09s/it]

Epoch [21/40], Step [90/196], Loss: 0.9140


 51%|█████     | 100/196 [03:38<03:22,  2.10s/it]

Epoch [21/40], Step [100/196], Loss: 0.9336


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [21/40], Step [110/196], Loss: 0.9224


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [21/40], Step [120/196], Loss: 0.9046


 66%|██████▋   | 130/196 [04:42<02:18,  2.10s/it]

Epoch [21/40], Step [130/196], Loss: 0.9098


 71%|███████▏  | 140/196 [05:03<01:57,  2.11s/it]

Epoch [21/40], Step [140/196], Loss: 0.9399


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [21/40], Step [150/196], Loss: 0.9418


 82%|████████▏ | 160/196 [05:45<01:15,  2.10s/it]

Epoch [21/40], Step [160/196], Loss: 0.9521


 87%|████████▋ | 170/196 [06:06<00:54,  2.11s/it]

Epoch [21/40], Step [170/196], Loss: 0.9492


 92%|█████████▏| 180/196 [06:27<00:33,  2.12s/it]

Epoch [21/40], Step [180/196], Loss: 0.9577


 97%|█████████▋| 190/196 [06:48<00:12,  2.10s/it]

Epoch [21/40], Step [190/196], Loss: 0.9762


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 55.61 %


  5%|▌         | 10/196 [00:28<07:02,  2.27s/it]

Epoch [22/40], Step [10/196], Loss: 0.9166


 10%|█         | 20/196 [00:50<06:22,  2.17s/it]

Epoch [22/40], Step [20/196], Loss: 0.8988


 15%|█▌        | 30/196 [01:11<05:49,  2.11s/it]

Epoch [22/40], Step [30/196], Loss: 0.8586


 20%|██        | 40/196 [01:31<05:23,  2.07s/it]

Epoch [22/40], Step [40/196], Loss: 0.8713


 26%|██▌       | 50/196 [01:53<05:09,  2.12s/it]

Epoch [22/40], Step [50/196], Loss: 0.8780


 31%|███       | 60/196 [02:14<04:51,  2.14s/it]

Epoch [22/40], Step [60/196], Loss: 0.8615


 36%|███▌      | 70/196 [02:35<04:28,  2.13s/it]

Epoch [22/40], Step [70/196], Loss: 0.8496


 41%|████      | 80/196 [02:56<04:04,  2.10s/it]

Epoch [22/40], Step [80/196], Loss: 0.8729


 46%|████▌     | 90/196 [03:18<03:43,  2.11s/it]

Epoch [22/40], Step [90/196], Loss: 0.8636


 51%|█████     | 100/196 [03:39<03:23,  2.12s/it]

Epoch [22/40], Step [100/196], Loss: 0.8779


 56%|█████▌    | 110/196 [04:00<03:02,  2.13s/it]

Epoch [22/40], Step [110/196], Loss: 0.8964


 61%|██████    | 120/196 [04:21<02:41,  2.12s/it]

Epoch [22/40], Step [120/196], Loss: 0.8856


 66%|██████▋   | 130/196 [04:42<02:20,  2.13s/it]

Epoch [22/40], Step [130/196], Loss: 0.8795


 71%|███████▏  | 140/196 [05:04<01:58,  2.11s/it]

Epoch [22/40], Step [140/196], Loss: 0.8978


 77%|███████▋  | 150/196 [05:25<01:37,  2.12s/it]

Epoch [22/40], Step [150/196], Loss: 0.8893


 82%|████████▏ | 160/196 [05:46<01:16,  2.12s/it]

Epoch [22/40], Step [160/196], Loss: 0.9031


 87%|████████▋ | 170/196 [06:07<00:55,  2.12s/it]

Epoch [22/40], Step [170/196], Loss: 0.9160


 92%|█████████▏| 180/196 [06:28<00:33,  2.11s/it]

Epoch [22/40], Step [180/196], Loss: 0.9100


 97%|█████████▋| 190/196 [06:50<00:12,  2.12s/it]

Epoch [22/40], Step [190/196], Loss: 0.9125


100%|██████████| 196/196 [07:01<00:00,  2.15s/it]


Test Accuracy of the student model on the test images: 59.41 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:02,  2.27s/it]

Epoch [23/40], Step [10/196], Loss: 0.8741


 10%|█         | 20/196 [00:50<06:22,  2.17s/it]

Epoch [23/40], Step [20/196], Loss: 0.8452


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [23/40], Step [30/196], Loss: 0.8408


 20%|██        | 40/196 [01:32<05:22,  2.07s/it]

Epoch [23/40], Step [40/196], Loss: 0.8206


 26%|██▌       | 50/196 [01:53<05:06,  2.10s/it]

Epoch [23/40], Step [50/196], Loss: 0.8270


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [23/40], Step [60/196], Loss: 0.8177


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [23/40], Step [70/196], Loss: 0.8181


 41%|████      | 80/196 [02:56<04:02,  2.09s/it]

Epoch [23/40], Step [80/196], Loss: 0.8088


 46%|████▌     | 90/196 [03:17<03:41,  2.09s/it]

Epoch [23/40], Step [90/196], Loss: 0.8347


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [23/40], Step [100/196], Loss: 0.8233


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [23/40], Step [110/196], Loss: 0.8325


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [23/40], Step [120/196], Loss: 0.8400


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [23/40], Step [130/196], Loss: 0.8559


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [23/40], Step [140/196], Loss: 0.8518


 77%|███████▋  | 150/196 [05:24<01:36,  2.11s/it]

Epoch [23/40], Step [150/196], Loss: 0.8454


 82%|████████▏ | 160/196 [05:45<01:16,  2.11s/it]

Epoch [23/40], Step [160/196], Loss: 0.8582


 87%|████████▋ | 170/196 [06:06<00:54,  2.11s/it]

Epoch [23/40], Step [170/196], Loss: 0.8587


 92%|█████████▏| 180/196 [06:27<00:33,  2.10s/it]

Epoch [23/40], Step [180/196], Loss: 0.8365


 97%|█████████▋| 190/196 [06:48<00:12,  2.09s/it]

Epoch [23/40], Step [190/196], Loss: 0.8505


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 56.87 %


  5%|▌         | 10/196 [00:26<07:06,  2.29s/it]

Epoch [24/40], Step [10/196], Loss: 0.8161


 10%|█         | 20/196 [00:48<06:17,  2.14s/it]

Epoch [24/40], Step [20/196], Loss: 0.8054


 15%|█▌        | 30/196 [01:09<05:44,  2.08s/it]

Epoch [24/40], Step [30/196], Loss: 0.7846


 20%|██        | 40/196 [01:29<05:23,  2.08s/it]

Epoch [24/40], Step [40/196], Loss: 0.7748


 26%|██▌       | 50/196 [01:50<05:07,  2.10s/it]

Epoch [24/40], Step [50/196], Loss: 0.7573


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [24/40], Step [60/196], Loss: 0.7603


 36%|███▌      | 70/196 [02:33<04:26,  2.11s/it]

Epoch [24/40], Step [70/196], Loss: 0.7681


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [24/40], Step [80/196], Loss: 0.7640


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [24/40], Step [90/196], Loss: 0.7803


 51%|█████     | 100/196 [03:36<03:21,  2.10s/it]

Epoch [24/40], Step [100/196], Loss: 0.7666


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [24/40], Step [110/196], Loss: 0.8007


 61%|██████    | 120/196 [04:18<02:41,  2.12s/it]

Epoch [24/40], Step [120/196], Loss: 0.7903


 66%|██████▋   | 130/196 [04:39<02:18,  2.11s/it]

Epoch [24/40], Step [130/196], Loss: 0.7883


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [24/40], Step [140/196], Loss: 0.7903


 77%|███████▋  | 150/196 [05:21<01:36,  2.10s/it]

Epoch [24/40], Step [150/196], Loss: 0.7921


 82%|████████▏ | 160/196 [05:42<01:15,  2.10s/it]

Epoch [24/40], Step [160/196], Loss: 0.7946


 87%|████████▋ | 170/196 [06:03<00:54,  2.11s/it]

Epoch [24/40], Step [170/196], Loss: 0.8134


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [24/40], Step [180/196], Loss: 0.8131


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [24/40], Step [190/196], Loss: 0.8091


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 60.92 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:59,  2.26s/it]

Epoch [25/40], Step [10/196], Loss: 0.7617


 10%|█         | 20/196 [00:48<06:20,  2.16s/it]

Epoch [25/40], Step [20/196], Loss: 0.7610


 15%|█▌        | 30/196 [01:09<05:46,  2.09s/it]

Epoch [25/40], Step [30/196], Loss: 0.7622


 20%|██        | 40/196 [01:30<05:24,  2.08s/it]

Epoch [25/40], Step [40/196], Loss: 0.7489


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [25/40], Step [50/196], Loss: 0.7321


 31%|███       | 60/196 [02:12<04:49,  2.13s/it]

Epoch [25/40], Step [60/196], Loss: 0.7476


 36%|███▌      | 70/196 [02:33<04:27,  2.12s/it]

Epoch [25/40], Step [70/196], Loss: 0.7294


 41%|████      | 80/196 [02:55<04:04,  2.11s/it]

Epoch [25/40], Step [80/196], Loss: 0.7309


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [25/40], Step [90/196], Loss: 0.7352


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [25/40], Step [100/196], Loss: 0.7489


 56%|█████▌    | 110/196 [03:58<03:01,  2.12s/it]

Epoch [25/40], Step [110/196], Loss: 0.7310


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [25/40], Step [120/196], Loss: 0.7466


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [25/40], Step [130/196], Loss: 0.7518


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [25/40], Step [140/196], Loss: 0.7546


 77%|███████▋  | 150/196 [05:22<01:36,  2.10s/it]

Epoch [25/40], Step [150/196], Loss: 0.7458


 82%|████████▏ | 160/196 [05:43<01:16,  2.11s/it]

Epoch [25/40], Step [160/196], Loss: 0.7477


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [25/40], Step [170/196], Loss: 0.7528


 92%|█████████▏| 180/196 [06:25<00:33,  2.09s/it]

Epoch [25/40], Step [180/196], Loss: 0.7707


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [25/40], Step [190/196], Loss: 0.7676


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 61.55 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:00,  2.26s/it]

Epoch [26/40], Step [10/196], Loss: 0.7292


 10%|█         | 20/196 [00:50<06:20,  2.16s/it]

Epoch [26/40], Step [20/196], Loss: 0.7063


 15%|█▌        | 30/196 [01:11<05:46,  2.09s/it]

Epoch [26/40], Step [30/196], Loss: 0.7083


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [26/40], Step [40/196], Loss: 0.6930


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [26/40], Step [50/196], Loss: 0.7031


 31%|███       | 60/196 [02:13<04:48,  2.12s/it]

Epoch [26/40], Step [60/196], Loss: 0.7045


 36%|███▌      | 70/196 [02:35<04:26,  2.12s/it]

Epoch [26/40], Step [70/196], Loss: 0.6987


 41%|████      | 80/196 [02:56<04:02,  2.09s/it]

Epoch [26/40], Step [80/196], Loss: 0.6931


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [26/40], Step [90/196], Loss: 0.6794


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [26/40], Step [100/196], Loss: 0.6988


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [26/40], Step [110/196], Loss: 0.6936


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [26/40], Step [120/196], Loss: 0.7070


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [26/40], Step [130/196], Loss: 0.7021


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [26/40], Step [140/196], Loss: 0.7138


 77%|███████▋  | 150/196 [05:23<01:36,  2.11s/it]

Epoch [26/40], Step [150/196], Loss: 0.6977


 82%|████████▏ | 160/196 [05:44<01:15,  2.11s/it]

Epoch [26/40], Step [160/196], Loss: 0.7163


 87%|████████▋ | 170/196 [06:05<00:54,  2.11s/it]

Epoch [26/40], Step [170/196], Loss: 0.7207


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [26/40], Step [180/196], Loss: 0.7139


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [26/40], Step [190/196], Loss: 0.7200


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 58.73 %


  5%|▌         | 10/196 [00:29<07:06,  2.29s/it]

Epoch [27/40], Step [10/196], Loss: 0.6881


 10%|█         | 20/196 [00:51<06:20,  2.16s/it]

Epoch [27/40], Step [20/196], Loss: 0.6774


 15%|█▌        | 30/196 [01:12<05:45,  2.08s/it]

Epoch [27/40], Step [30/196], Loss: 0.6702


 20%|██        | 40/196 [01:33<05:23,  2.08s/it]

Epoch [27/40], Step [40/196], Loss: 0.6648


 26%|██▌       | 50/196 [01:54<05:08,  2.11s/it]

Epoch [27/40], Step [50/196], Loss: 0.6548


 31%|███       | 60/196 [02:15<04:49,  2.13s/it]

Epoch [27/40], Step [60/196], Loss: 0.6477


 36%|███▌      | 70/196 [02:36<04:26,  2.12s/it]

Epoch [27/40], Step [70/196], Loss: 0.6610


 41%|████      | 80/196 [02:57<04:02,  2.09s/it]

Epoch [27/40], Step [80/196], Loss: 0.6590


 46%|████▌     | 90/196 [03:18<03:41,  2.09s/it]

Epoch [27/40], Step [90/196], Loss: 0.6559


 51%|█████     | 100/196 [03:39<03:23,  2.12s/it]

Epoch [27/40], Step [100/196], Loss: 0.6553


 56%|█████▌    | 110/196 [04:00<03:02,  2.12s/it]

Epoch [27/40], Step [110/196], Loss: 0.6483


 61%|██████    | 120/196 [04:21<02:40,  2.11s/it]

Epoch [27/40], Step [120/196], Loss: 0.6536


 66%|██████▋   | 130/196 [04:42<02:18,  2.10s/it]

Epoch [27/40], Step [130/196], Loss: 0.6555


 71%|███████▏  | 140/196 [05:03<01:57,  2.10s/it]

Epoch [27/40], Step [140/196], Loss: 0.6690


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [27/40], Step [150/196], Loss: 0.6688


 82%|████████▏ | 160/196 [05:45<01:15,  2.11s/it]

Epoch [27/40], Step [160/196], Loss: 0.6660


 87%|████████▋ | 170/196 [06:07<00:54,  2.11s/it]

Epoch [27/40], Step [170/196], Loss: 0.6683


 92%|█████████▏| 180/196 [06:28<00:33,  2.10s/it]

Epoch [27/40], Step [180/196], Loss: 0.6694


 97%|█████████▋| 190/196 [06:49<00:12,  2.10s/it]

Epoch [27/40], Step [190/196], Loss: 0.6811


100%|██████████| 196/196 [07:00<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 62.11 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:26<06:57,  2.24s/it]

Epoch [28/40], Step [10/196], Loss: 0.6502


 10%|█         | 20/196 [00:48<06:20,  2.16s/it]

Epoch [28/40], Step [20/196], Loss: 0.6399


 15%|█▌        | 30/196 [01:09<05:45,  2.08s/it]

Epoch [28/40], Step [30/196], Loss: 0.6241


 20%|██        | 40/196 [01:29<05:22,  2.07s/it]

Epoch [28/40], Step [40/196], Loss: 0.6131


 26%|██▌       | 50/196 [01:50<05:08,  2.11s/it]

Epoch [28/40], Step [50/196], Loss: 0.5995


 31%|███       | 60/196 [02:12<04:50,  2.13s/it]

Epoch [28/40], Step [60/196], Loss: 0.6181


 36%|███▌      | 70/196 [02:33<04:26,  2.11s/it]

Epoch [28/40], Step [70/196], Loss: 0.6059


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [28/40], Step [80/196], Loss: 0.6128


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [28/40], Step [90/196], Loss: 0.6158


 51%|█████     | 100/196 [03:36<03:22,  2.11s/it]

Epoch [28/40], Step [100/196], Loss: 0.6147


 56%|█████▌    | 110/196 [03:57<03:01,  2.11s/it]

Epoch [28/40], Step [110/196], Loss: 0.6165


 61%|██████    | 120/196 [04:18<02:40,  2.12s/it]

Epoch [28/40], Step [120/196], Loss: 0.6274


 66%|██████▋   | 130/196 [04:39<02:19,  2.11s/it]

Epoch [28/40], Step [130/196], Loss: 0.6289


 71%|███████▏  | 140/196 [05:00<01:57,  2.11s/it]

Epoch [28/40], Step [140/196], Loss: 0.6308


 77%|███████▋  | 150/196 [05:21<01:36,  2.10s/it]

Epoch [28/40], Step [150/196], Loss: 0.6324


 82%|████████▏ | 160/196 [05:42<01:15,  2.10s/it]

Epoch [28/40], Step [160/196], Loss: 0.6304


 87%|████████▋ | 170/196 [06:03<00:54,  2.11s/it]

Epoch [28/40], Step [170/196], Loss: 0.6332


 92%|█████████▏| 180/196 [06:24<00:33,  2.10s/it]

Epoch [28/40], Step [180/196], Loss: 0.6247


 97%|█████████▋| 190/196 [06:45<00:12,  2.10s/it]

Epoch [28/40], Step [190/196], Loss: 0.6274


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 64.73 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:02,  2.27s/it]

Epoch [29/40], Step [10/196], Loss: 0.5934


 10%|█         | 20/196 [00:50<06:19,  2.16s/it]

Epoch [29/40], Step [20/196], Loss: 0.5891


 15%|█▌        | 30/196 [01:11<05:45,  2.08s/it]

Epoch [29/40], Step [30/196], Loss: 0.5939


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [29/40], Step [40/196], Loss: 0.5836


 26%|██▌       | 50/196 [01:52<05:07,  2.11s/it]

Epoch [29/40], Step [50/196], Loss: 0.5794


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [29/40], Step [60/196], Loss: 0.5803


 36%|███▌      | 70/196 [02:35<04:25,  2.11s/it]

Epoch [29/40], Step [70/196], Loss: 0.5814


 41%|████      | 80/196 [02:56<04:01,  2.08s/it]

Epoch [29/40], Step [80/196], Loss: 0.5693


 46%|████▌     | 90/196 [03:17<03:43,  2.10s/it]

Epoch [29/40], Step [90/196], Loss: 0.5752


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [29/40], Step [100/196], Loss: 0.5807


 56%|█████▌    | 110/196 [03:59<03:02,  2.12s/it]

Epoch [29/40], Step [110/196], Loss: 0.5739


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [29/40], Step [120/196], Loss: 0.5761


 66%|██████▋   | 130/196 [04:41<02:18,  2.09s/it]

Epoch [29/40], Step [130/196], Loss: 0.5816


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [29/40], Step [140/196], Loss: 0.5770


 77%|███████▋  | 150/196 [05:23<01:37,  2.12s/it]

Epoch [29/40], Step [150/196], Loss: 0.5813


 82%|████████▏ | 160/196 [05:44<01:16,  2.12s/it]

Epoch [29/40], Step [160/196], Loss: 0.5825


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [29/40], Step [170/196], Loss: 0.5968


 92%|█████████▏| 180/196 [06:26<00:33,  2.10s/it]

Epoch [29/40], Step [180/196], Loss: 0.5905


 97%|█████████▋| 190/196 [06:47<00:12,  2.10s/it]

Epoch [29/40], Step [190/196], Loss: 0.5847


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 64.84 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:58,  2.25s/it]

Epoch [30/40], Step [10/196], Loss: 0.5570


 10%|█         | 20/196 [00:49<06:21,  2.17s/it]

Epoch [30/40], Step [20/196], Loss: 0.5529


 15%|█▌        | 30/196 [01:10<05:45,  2.08s/it]

Epoch [30/40], Step [30/196], Loss: 0.5566


 20%|██        | 40/196 [01:31<05:24,  2.08s/it]

Epoch [30/40], Step [40/196], Loss: 0.5397


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [30/40], Step [50/196], Loss: 0.5386


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [30/40], Step [60/196], Loss: 0.5410


 36%|███▌      | 70/196 [02:34<04:27,  2.12s/it]

Epoch [30/40], Step [70/196], Loss: 0.5454


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [30/40], Step [80/196], Loss: 0.5345


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [30/40], Step [90/196], Loss: 0.5446


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [30/40], Step [100/196], Loss: 0.5439


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [30/40], Step [110/196], Loss: 0.5462


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [30/40], Step [120/196], Loss: 0.5489


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [30/40], Step [130/196], Loss: 0.5504


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [30/40], Step [140/196], Loss: 0.5492


 77%|███████▋  | 150/196 [05:23<01:36,  2.11s/it]

Epoch [30/40], Step [150/196], Loss: 0.5522


 82%|████████▏ | 160/196 [05:44<01:15,  2.10s/it]

Epoch [30/40], Step [160/196], Loss: 0.5501


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [30/40], Step [170/196], Loss: 0.5503


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [30/40], Step [180/196], Loss: 0.5450


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [30/40], Step [190/196], Loss: 0.5512


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 64.87 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [31/40], Step [10/196], Loss: 0.5300


 10%|█         | 20/196 [00:50<06:19,  2.16s/it]

Epoch [31/40], Step [20/196], Loss: 0.5199


 15%|█▌        | 30/196 [01:11<05:44,  2.08s/it]

Epoch [31/40], Step [30/196], Loss: 0.5101


 20%|██        | 40/196 [01:31<05:23,  2.07s/it]

Epoch [31/40], Step [40/196], Loss: 0.5101


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [31/40], Step [50/196], Loss: 0.5023


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [31/40], Step [60/196], Loss: 0.5061


 36%|███▌      | 70/196 [02:35<04:27,  2.12s/it]

Epoch [31/40], Step [70/196], Loss: 0.5090


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [31/40], Step [80/196], Loss: 0.5150


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [31/40], Step [90/196], Loss: 0.5088


 51%|█████     | 100/196 [03:38<03:23,  2.12s/it]

Epoch [31/40], Step [100/196], Loss: 0.5077


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [31/40], Step [110/196], Loss: 0.5185


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [31/40], Step [120/196], Loss: 0.5060


 66%|██████▋   | 130/196 [04:41<02:19,  2.11s/it]

Epoch [31/40], Step [130/196], Loss: 0.5141


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [31/40], Step [140/196], Loss: 0.5072


 77%|███████▋  | 150/196 [05:23<01:36,  2.10s/it]

Epoch [31/40], Step [150/196], Loss: 0.5175


 82%|████████▏ | 160/196 [05:44<01:16,  2.11s/it]

Epoch [31/40], Step [160/196], Loss: 0.5120


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [31/40], Step [170/196], Loss: 0.5156


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [31/40], Step [180/196], Loss: 0.5143


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [31/40], Step [190/196], Loss: 0.5234


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 66.28 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [32/40], Step [10/196], Loss: 0.4988


 10%|█         | 20/196 [00:50<06:19,  2.16s/it]

Epoch [32/40], Step [20/196], Loss: 0.4855


 15%|█▌        | 30/196 [01:11<05:44,  2.07s/it]

Epoch [32/40], Step [30/196], Loss: 0.4863


 20%|██        | 40/196 [01:31<05:22,  2.07s/it]

Epoch [32/40], Step [40/196], Loss: 0.4858


 26%|██▌       | 50/196 [01:52<05:06,  2.10s/it]

Epoch [32/40], Step [50/196], Loss: 0.4798


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [32/40], Step [60/196], Loss: 0.4819


 36%|███▌      | 70/196 [02:35<04:28,  2.13s/it]

Epoch [32/40], Step [70/196], Loss: 0.4834


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [32/40], Step [80/196], Loss: 0.4806


 46%|████▌     | 90/196 [03:17<03:42,  2.10s/it]

Epoch [32/40], Step [90/196], Loss: 0.4829


 51%|█████     | 100/196 [03:38<03:22,  2.11s/it]

Epoch [32/40], Step [100/196], Loss: 0.4763


 56%|█████▌    | 110/196 [03:59<03:01,  2.11s/it]

Epoch [32/40], Step [110/196], Loss: 0.4826


 61%|██████    | 120/196 [04:20<02:40,  2.11s/it]

Epoch [32/40], Step [120/196], Loss: 0.4747


 66%|██████▋   | 130/196 [04:41<02:18,  2.10s/it]

Epoch [32/40], Step [130/196], Loss: 0.4860


 71%|███████▏  | 140/196 [05:02<01:57,  2.10s/it]

Epoch [32/40], Step [140/196], Loss: 0.4827


 77%|███████▋  | 150/196 [05:23<01:37,  2.11s/it]

Epoch [32/40], Step [150/196], Loss: 0.4892


 82%|████████▏ | 160/196 [05:44<01:16,  2.12s/it]

Epoch [32/40], Step [160/196], Loss: 0.4902


 87%|████████▋ | 170/196 [06:05<00:54,  2.10s/it]

Epoch [32/40], Step [170/196], Loss: 0.4842


 92%|█████████▏| 180/196 [06:26<00:33,  2.11s/it]

Epoch [32/40], Step [180/196], Loss: 0.4905


 97%|█████████▋| 190/196 [06:47<00:12,  2.10s/it]

Epoch [32/40], Step [190/196], Loss: 0.4892


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 67.09 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:59,  2.25s/it]

Epoch [33/40], Step [10/196], Loss: 0.4752


 10%|█         | 20/196 [00:49<06:19,  2.16s/it]

Epoch [33/40], Step [20/196], Loss: 0.4626


 15%|█▌        | 30/196 [01:10<05:46,  2.09s/it]

Epoch [33/40], Step [30/196], Loss: 0.4589


 20%|██        | 40/196 [01:30<05:23,  2.07s/it]

Epoch [33/40], Step [40/196], Loss: 0.4655


 26%|██▌       | 50/196 [01:51<05:07,  2.11s/it]

Epoch [33/40], Step [50/196], Loss: 0.4631


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [33/40], Step [60/196], Loss: 0.4614


 36%|███▌      | 70/196 [02:34<04:26,  2.12s/it]

Epoch [33/40], Step [70/196], Loss: 0.4647


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [33/40], Step [80/196], Loss: 0.4623


 46%|████▌     | 90/196 [03:16<03:42,  2.10s/it]

Epoch [33/40], Step [90/196], Loss: 0.4563


 51%|█████     | 100/196 [03:37<03:21,  2.10s/it]

Epoch [33/40], Step [100/196], Loss: 0.4616


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [33/40], Step [110/196], Loss: 0.4591


 61%|██████    | 120/196 [04:19<02:41,  2.12s/it]

Epoch [33/40], Step [120/196], Loss: 0.4620


 66%|██████▋   | 130/196 [04:40<02:19,  2.12s/it]

Epoch [33/40], Step [130/196], Loss: 0.4561


 71%|███████▏  | 140/196 [05:02<01:58,  2.11s/it]

Epoch [33/40], Step [140/196], Loss: 0.4554


 77%|███████▋  | 150/196 [05:23<01:36,  2.10s/it]

Epoch [33/40], Step [150/196], Loss: 0.4597


 82%|████████▏ | 160/196 [05:44<01:15,  2.10s/it]

Epoch [33/40], Step [160/196], Loss: 0.4582


 87%|████████▋ | 170/196 [06:05<00:55,  2.12s/it]

Epoch [33/40], Step [170/196], Loss: 0.4622


 92%|█████████▏| 180/196 [06:26<00:33,  2.12s/it]

Epoch [33/40], Step [180/196], Loss: 0.4687


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [33/40], Step [190/196], Loss: 0.4636


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 68.21 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:56,  2.24s/it]

Epoch [34/40], Step [10/196], Loss: 0.4392


 10%|█         | 20/196 [00:49<06:21,  2.17s/it]

Epoch [34/40], Step [20/196], Loss: 0.4346


 15%|█▌        | 30/196 [01:10<05:47,  2.09s/it]

Epoch [34/40], Step [30/196], Loss: 0.4472


 20%|██        | 40/196 [01:30<05:24,  2.08s/it]

Epoch [34/40], Step [40/196], Loss: 0.4379


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [34/40], Step [50/196], Loss: 0.4369


 31%|███       | 60/196 [02:13<04:49,  2.13s/it]

Epoch [34/40], Step [60/196], Loss: 0.4436


 36%|███▌      | 70/196 [02:34<04:26,  2.12s/it]

Epoch [34/40], Step [70/196], Loss: 0.4437


 41%|████      | 80/196 [02:55<04:02,  2.09s/it]

Epoch [34/40], Step [80/196], Loss: 0.4398


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [34/40], Step [90/196], Loss: 0.4444


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [34/40], Step [100/196], Loss: 0.4337


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [34/40], Step [110/196], Loss: 0.4471


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [34/40], Step [120/196], Loss: 0.4341


 66%|██████▋   | 130/196 [04:40<02:18,  2.11s/it]

Epoch [34/40], Step [130/196], Loss: 0.4369


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [34/40], Step [140/196], Loss: 0.4389


 77%|███████▋  | 150/196 [05:22<01:36,  2.11s/it]

Epoch [34/40], Step [150/196], Loss: 0.4371


 82%|████████▏ | 160/196 [05:43<01:16,  2.12s/it]

Epoch [34/40], Step [160/196], Loss: 0.4389


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [34/40], Step [170/196], Loss: 0.4513


 92%|█████████▏| 180/196 [06:26<00:33,  2.12s/it]

Epoch [34/40], Step [180/196], Loss: 0.4372


 97%|█████████▋| 190/196 [06:47<00:12,  2.11s/it]

Epoch [34/40], Step [190/196], Loss: 0.4447


100%|██████████| 196/196 [06:58<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 67.91 %


  5%|▌         | 10/196 [00:28<07:01,  2.27s/it]

Epoch [35/40], Step [10/196], Loss: 0.4299


 10%|█         | 20/196 [00:50<06:21,  2.17s/it]

Epoch [35/40], Step [20/196], Loss: 0.4201


 15%|█▌        | 30/196 [01:11<05:46,  2.08s/it]

Epoch [35/40], Step [30/196], Loss: 0.4216


 20%|██        | 40/196 [01:32<05:23,  2.07s/it]

Epoch [35/40], Step [40/196], Loss: 0.4201


 26%|██▌       | 50/196 [01:53<05:07,  2.10s/it]

Epoch [35/40], Step [50/196], Loss: 0.4212


 31%|███       | 60/196 [02:14<04:49,  2.13s/it]

Epoch [35/40], Step [60/196], Loss: 0.4274


 36%|███▌      | 70/196 [02:35<04:27,  2.13s/it]

Epoch [35/40], Step [70/196], Loss: 0.4158


 41%|████      | 80/196 [02:56<04:03,  2.10s/it]

Epoch [35/40], Step [80/196], Loss: 0.4176


 46%|████▌     | 90/196 [03:17<03:41,  2.09s/it]

Epoch [35/40], Step [90/196], Loss: 0.4227


 51%|█████     | 100/196 [03:38<03:23,  2.12s/it]

Epoch [35/40], Step [100/196], Loss: 0.4145


 56%|█████▌    | 110/196 [04:00<03:02,  2.12s/it]

Epoch [35/40], Step [110/196], Loss: 0.4256


 61%|██████    | 120/196 [04:21<02:40,  2.11s/it]

Epoch [35/40], Step [120/196], Loss: 0.4233


 66%|██████▋   | 130/196 [04:42<02:19,  2.11s/it]

Epoch [35/40], Step [130/196], Loss: 0.4236


 71%|███████▏  | 140/196 [05:03<01:57,  2.11s/it]

Epoch [35/40], Step [140/196], Loss: 0.4311


 77%|███████▋  | 150/196 [05:24<01:36,  2.10s/it]

Epoch [35/40], Step [150/196], Loss: 0.4226


 82%|████████▏ | 160/196 [05:45<01:15,  2.10s/it]

Epoch [35/40], Step [160/196], Loss: 0.4274


 87%|████████▋ | 170/196 [06:06<00:54,  2.11s/it]

Epoch [35/40], Step [170/196], Loss: 0.4247


 92%|█████████▏| 180/196 [06:27<00:33,  2.11s/it]

Epoch [35/40], Step [180/196], Loss: 0.4190


 97%|█████████▋| 190/196 [06:48<00:12,  2.11s/it]

Epoch [35/40], Step [190/196], Loss: 0.4243


100%|██████████| 196/196 [06:59<00:00,  2.14s/it]


Test Accuracy of the student model on the test images: 68.75 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:25<06:54,  2.23s/it]

Epoch [36/40], Step [10/196], Loss: 0.4114


 10%|█         | 20/196 [00:47<06:20,  2.16s/it]

Epoch [36/40], Step [20/196], Loss: 0.4106


 15%|█▌        | 30/196 [01:08<05:46,  2.08s/it]

Epoch [36/40], Step [30/196], Loss: 0.4083


 20%|██        | 40/196 [01:28<05:23,  2.08s/it]

Epoch [36/40], Step [40/196], Loss: 0.4103


 26%|██▌       | 50/196 [01:49<05:08,  2.12s/it]

Epoch [36/40], Step [50/196], Loss: 0.4059


 31%|███       | 60/196 [02:11<04:50,  2.14s/it]

Epoch [36/40], Step [60/196], Loss: 0.4057


 36%|███▌      | 70/196 [02:32<04:26,  2.12s/it]

Epoch [36/40], Step [70/196], Loss: 0.4073


 41%|████      | 80/196 [02:53<04:03,  2.10s/it]

Epoch [36/40], Step [80/196], Loss: 0.4086


 46%|████▌     | 90/196 [03:14<03:42,  2.10s/it]

Epoch [36/40], Step [90/196], Loss: 0.4124


 51%|█████     | 100/196 [03:35<03:23,  2.12s/it]

Epoch [36/40], Step [100/196], Loss: 0.4111


 56%|█████▌    | 110/196 [03:56<03:01,  2.12s/it]

Epoch [36/40], Step [110/196], Loss: 0.4051


 61%|██████    | 120/196 [04:17<02:40,  2.11s/it]

Epoch [36/40], Step [120/196], Loss: 0.4044


 66%|██████▋   | 130/196 [04:38<02:18,  2.10s/it]

Epoch [36/40], Step [130/196], Loss: 0.4166


 71%|███████▏  | 140/196 [04:59<01:57,  2.10s/it]

Epoch [36/40], Step [140/196], Loss: 0.4118


 77%|███████▋  | 150/196 [05:20<01:37,  2.11s/it]

Epoch [36/40], Step [150/196], Loss: 0.4164


 82%|████████▏ | 160/196 [05:42<01:16,  2.12s/it]

Epoch [36/40], Step [160/196], Loss: 0.4037


 87%|████████▋ | 170/196 [06:03<00:54,  2.11s/it]

Epoch [36/40], Step [170/196], Loss: 0.4105


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [36/40], Step [180/196], Loss: 0.4153


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [36/40], Step [190/196], Loss: 0.4087


100%|██████████| 196/196 [06:56<00:00,  2.12s/it]


Test Accuracy of the student model on the test images: 69.10 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:25<06:53,  2.22s/it]

Epoch [37/40], Step [10/196], Loss: 0.4055


 10%|█         | 20/196 [00:47<06:21,  2.17s/it]

Epoch [37/40], Step [20/196], Loss: 0.3950


 15%|█▌        | 30/196 [01:08<05:45,  2.08s/it]

Epoch [37/40], Step [30/196], Loss: 0.3980


 20%|██        | 40/196 [01:28<05:23,  2.07s/it]

Epoch [37/40], Step [40/196], Loss: 0.4005


 26%|██▌       | 50/196 [01:49<05:06,  2.10s/it]

Epoch [37/40], Step [50/196], Loss: 0.4041


 31%|███       | 60/196 [02:11<04:50,  2.13s/it]

Epoch [37/40], Step [60/196], Loss: 0.3932


 36%|███▌      | 70/196 [02:32<04:28,  2.13s/it]

Epoch [37/40], Step [70/196], Loss: 0.3964


 41%|████      | 80/196 [02:53<04:02,  2.09s/it]

Epoch [37/40], Step [80/196], Loss: 0.3928


 46%|████▌     | 90/196 [03:14<03:42,  2.10s/it]

Epoch [37/40], Step [90/196], Loss: 0.3961


 51%|█████     | 100/196 [03:35<03:22,  2.11s/it]

Epoch [37/40], Step [100/196], Loss: 0.4005


 56%|█████▌    | 110/196 [03:56<03:02,  2.12s/it]

Epoch [37/40], Step [110/196], Loss: 0.4051


 61%|██████    | 120/196 [04:17<02:40,  2.11s/it]

Epoch [37/40], Step [120/196], Loss: 0.3996


 66%|██████▋   | 130/196 [04:38<02:19,  2.11s/it]

Epoch [37/40], Step [130/196], Loss: 0.3975


 71%|███████▏  | 140/196 [04:59<01:57,  2.10s/it]

Epoch [37/40], Step [140/196], Loss: 0.4041


 77%|███████▋  | 150/196 [05:20<01:37,  2.11s/it]

Epoch [37/40], Step [150/196], Loss: 0.3915


 82%|████████▏ | 160/196 [05:41<01:15,  2.10s/it]

Epoch [37/40], Step [160/196], Loss: 0.3946


 87%|████████▋ | 170/196 [06:02<00:54,  2.10s/it]

Epoch [37/40], Step [170/196], Loss: 0.3965


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [37/40], Step [180/196], Loss: 0.3923


 97%|█████████▋| 190/196 [06:45<00:12,  2.11s/it]

Epoch [37/40], Step [190/196], Loss: 0.4017


100%|██████████| 196/196 [06:56<00:00,  2.12s/it]


Test Accuracy of the student model on the test images: 69.07 %


  5%|▌         | 10/196 [00:26<06:56,  2.24s/it]

Epoch [38/40], Step [10/196], Loss: 0.3921


 10%|█         | 20/196 [00:47<06:18,  2.15s/it]

Epoch [38/40], Step [20/196], Loss: 0.3906


 15%|█▌        | 30/196 [01:08<05:44,  2.08s/it]

Epoch [38/40], Step [30/196], Loss: 0.3956


 20%|██        | 40/196 [01:29<05:22,  2.07s/it]

Epoch [38/40], Step [40/196], Loss: 0.3884


 26%|██▌       | 50/196 [01:50<05:07,  2.10s/it]

Epoch [38/40], Step [50/196], Loss: 0.3879


 31%|███       | 60/196 [02:11<04:50,  2.13s/it]

Epoch [38/40], Step [60/196], Loss: 0.3906


 36%|███▌      | 70/196 [02:33<04:27,  2.12s/it]

Epoch [38/40], Step [70/196], Loss: 0.3905


 41%|████      | 80/196 [02:54<04:02,  2.09s/it]

Epoch [38/40], Step [80/196], Loss: 0.3960


 46%|████▌     | 90/196 [03:14<03:41,  2.09s/it]

Epoch [38/40], Step [90/196], Loss: 0.3908


 51%|█████     | 100/196 [03:35<03:22,  2.11s/it]

Epoch [38/40], Step [100/196], Loss: 0.3923


 56%|█████▌    | 110/196 [03:57<03:02,  2.12s/it]

Epoch [38/40], Step [110/196], Loss: 0.3930


 61%|██████    | 120/196 [04:18<02:39,  2.10s/it]

Epoch [38/40], Step [120/196], Loss: 0.3929


 66%|██████▋   | 130/196 [04:39<02:18,  2.09s/it]

Epoch [38/40], Step [130/196], Loss: 0.3922


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [38/40], Step [140/196], Loss: 0.3851


 77%|███████▋  | 150/196 [05:21<01:37,  2.11s/it]

Epoch [38/40], Step [150/196], Loss: 0.3874


 82%|████████▏ | 160/196 [05:42<01:16,  2.12s/it]

Epoch [38/40], Step [160/196], Loss: 0.3908


 87%|████████▋ | 170/196 [06:03<00:54,  2.11s/it]

Epoch [38/40], Step [170/196], Loss: 0.3919


 92%|█████████▏| 180/196 [06:24<00:33,  2.11s/it]

Epoch [38/40], Step [180/196], Loss: 0.3952


 97%|█████████▋| 190/196 [06:45<00:12,  2.10s/it]

Epoch [38/40], Step [190/196], Loss: 0.3887


100%|██████████| 196/196 [06:56<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 69.18 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:57,  2.24s/it]

Epoch [39/40], Step [10/196], Loss: 0.3881


 10%|█         | 20/196 [00:49<06:19,  2.16s/it]

Epoch [39/40], Step [20/196], Loss: 0.3899


 15%|█▌        | 30/196 [01:09<05:43,  2.07s/it]

Epoch [39/40], Step [30/196], Loss: 0.3939


 20%|██        | 40/196 [01:30<05:21,  2.06s/it]

Epoch [39/40], Step [40/196], Loss: 0.3839


 26%|██▌       | 50/196 [01:51<05:05,  2.10s/it]

Epoch [39/40], Step [50/196], Loss: 0.3914


 31%|███       | 60/196 [02:12<04:50,  2.13s/it]

Epoch [39/40], Step [60/196], Loss: 0.3836


 36%|███▌      | 70/196 [02:33<04:26,  2.11s/it]

Epoch [39/40], Step [70/196], Loss: 0.3901


 41%|████      | 80/196 [02:54<04:03,  2.10s/it]

Epoch [39/40], Step [80/196], Loss: 0.3798


 46%|████▌     | 90/196 [03:15<03:42,  2.10s/it]

Epoch [39/40], Step [90/196], Loss: 0.3901


 51%|█████     | 100/196 [03:36<03:22,  2.11s/it]

Epoch [39/40], Step [100/196], Loss: 0.3891


 56%|█████▌    | 110/196 [03:58<03:02,  2.12s/it]

Epoch [39/40], Step [110/196], Loss: 0.3862


 61%|██████    | 120/196 [04:19<02:39,  2.10s/it]

Epoch [39/40], Step [120/196], Loss: 0.3852


 66%|██████▋   | 130/196 [04:39<02:18,  2.10s/it]

Epoch [39/40], Step [130/196], Loss: 0.3875


 71%|███████▏  | 140/196 [05:00<01:57,  2.10s/it]

Epoch [39/40], Step [140/196], Loss: 0.3910


 77%|███████▋  | 150/196 [05:22<01:37,  2.11s/it]

Epoch [39/40], Step [150/196], Loss: 0.3928


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [39/40], Step [160/196], Loss: 0.3837


 87%|████████▋ | 170/196 [06:04<00:54,  2.10s/it]

Epoch [39/40], Step [170/196], Loss: 0.3857


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [39/40], Step [180/196], Loss: 0.3833


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [39/40], Step [190/196], Loss: 0.3789


100%|██████████| 196/196 [06:57<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 69.21 %
Saved best model to student_model_alg1.pth


  5%|▌         | 10/196 [00:27<06:56,  2.24s/it]

Epoch [40/40], Step [10/196], Loss: 0.3811


 10%|█         | 20/196 [00:49<06:21,  2.17s/it]

Epoch [40/40], Step [20/196], Loss: 0.3860


 15%|█▌        | 30/196 [01:10<05:46,  2.09s/it]

Epoch [40/40], Step [30/196], Loss: 0.3833


 20%|██        | 40/196 [01:30<05:23,  2.07s/it]

Epoch [40/40], Step [40/196], Loss: 0.3820


 26%|██▌       | 50/196 [01:51<05:06,  2.10s/it]

Epoch [40/40], Step [50/196], Loss: 0.3886


 31%|███       | 60/196 [02:12<04:50,  2.14s/it]

Epoch [40/40], Step [60/196], Loss: 0.3854


 36%|███▌      | 70/196 [02:34<04:27,  2.13s/it]

Epoch [40/40], Step [70/196], Loss: 0.3873


 41%|████      | 80/196 [02:55<04:03,  2.10s/it]

Epoch [40/40], Step [80/196], Loss: 0.3880


 46%|████▌     | 90/196 [03:16<03:41,  2.09s/it]

Epoch [40/40], Step [90/196], Loss: 0.3906


 51%|█████     | 100/196 [03:37<03:22,  2.11s/it]

Epoch [40/40], Step [100/196], Loss: 0.3889


 56%|█████▌    | 110/196 [03:58<03:01,  2.11s/it]

Epoch [40/40], Step [110/196], Loss: 0.3887


 61%|██████    | 120/196 [04:19<02:40,  2.11s/it]

Epoch [40/40], Step [120/196], Loss: 0.3762


 66%|██████▋   | 130/196 [04:40<02:18,  2.10s/it]

Epoch [40/40], Step [130/196], Loss: 0.3869


 71%|███████▏  | 140/196 [05:01<01:57,  2.10s/it]

Epoch [40/40], Step [140/196], Loss: 0.3822


 77%|███████▋  | 150/196 [05:22<01:36,  2.10s/it]

Epoch [40/40], Step [150/196], Loss: 0.3841


 82%|████████▏ | 160/196 [05:43<01:15,  2.11s/it]

Epoch [40/40], Step [160/196], Loss: 0.3890


 87%|████████▋ | 170/196 [06:04<00:54,  2.11s/it]

Epoch [40/40], Step [170/196], Loss: 0.3889


 92%|█████████▏| 180/196 [06:25<00:33,  2.10s/it]

Epoch [40/40], Step [180/196], Loss: 0.3841


 97%|█████████▋| 190/196 [06:46<00:12,  2.10s/it]

Epoch [40/40], Step [190/196], Loss: 0.3851


100%|██████████| 196/196 [06:58<00:00,  2.13s/it]


Test Accuracy of the student model on the test images: 69.29 %
Saved best model to student_model_alg1.pth
Best Accuracy: 69.29 %
Student model (Algorithm 1) saved to student_model_alg1.pth with best accuracy: 69.29%


In [None]:
!ls -lh

total 54M
-rw-r--r-- 1 root root  27M Dec  9 03:25 best_model.pth
-rw-r--r-- 1 root root  26M Dec  9 03:25 resnet_34_tf.pth
drwxr-xr-x 1 root root 4.0K Dec  5 14:24 sample_data
drwxr-xr-x 5 root root 4.0K Dec  9 03:27 tiny-imagenet-200
