# STL-10: ResNet-50 finetuning using pre-trained parameters from Image rotations

according to _Unsupervised Representation Learning by Predicting Image Rotations_ by Spyros Gidaris et al., paper.

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

from tqdm import tqdm
from tqdm import trange
import numpy as np
import matplotlib.pyplot as plt
import imageio
from collections import OrderedDict
import cv2, pickle, os
# import albumentations as A
# from albumentations.pytorch import ToTensorV2

  from .autonotebook import tqdm as notebook_tqdm
  warn(f"Failed to load image Python extension: {e}")


In [2]:
print(f"torch version: {torch.__version__}")

# Check if there are multiple devices (i.e., GPU cards)-
print(f"Number of GPU(s) available = {torch.cuda.device_count()}")

if torch.cuda.is_available():
    print(f"Current GPU: {torch.cuda.current_device()}")
    print(f"Current GPU name: {torch.cuda.get_device_name(torch.cuda.current_device())}")
else:
    print("PyTorch does not have access to GPU")

# Device configuration-

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Use different GPUs (1 and 2) for model, while GPU 3 for data-
# device = torch.device("cuda:0,1" if torch.cuda.is_available() else "cpu")

print(f'Available device is {device}')

torch version: 1.13.0+cu117
Number of GPU(s) available = 4
Current GPU: 0
Current GPU name: NVIDIA GeForce GTX TITAN X
Available device is cuda


In [3]:
# Define batch-size for training-
batch_size = 64

In [4]:
# Define torchvision transformations for training and test sets-
transform_train = transforms.Compose(
    [
        # transforms.RandomCrop(32, padding = 4),
        transforms.RandomHorizontalFlip(p = 0.4),
        transforms.RandomRotation(degrees = 40),
        transforms.RandomVerticalFlip(p = 0.1),
        transforms.ColorJitter(brightness = 0, contrast = 0, saturation = 0, hue = 0),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

transform_test = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ]
)

In [5]:
# Define training dataset-
train_dataset = torchvision.datasets.STL10(
    root = '/home/majumdar/Downloads/.data/', split = 'train',
    folds = None, transform = transform_train,
    target_transform = None, download = True
)

# Define testing dataset-
test_dataset = torchvision.datasets.STL10(
    root = '/home/majumdar/Downloads/.data/', split = 'test',
    folds = None, transform = transform_test,
    target_transform = None, download = True
)

Files already downloaded and verified
Files already downloaded and verified


In [6]:
# Define dataloaders-
train_loader = torch.utils.data.DataLoader(
    dataset = train_dataset, batch_size = batch_size,
    shuffle = True
)

test_loader = torch.utils.data.DataLoader(
    dataset = test_dataset, batch_size = batch_size,
    shuffle = True
)

In [7]:
print(f"len(train_dataset) = {len(train_dataset)} & len(test_dataset) = {len(test_dataset)}")
print(f"len(train_loader) = {len(train_loader)} & len(test_loader) = {len(test_loader)}")

len(train_dataset) = 5000 & len(test_dataset) = 8000
len(train_loader) = 79 & len(test_loader) = 125


In [8]:
# Sanity check-
len(train_dataset) / batch_size, len(test_dataset) / batch_size

(78.125, 125.0)

In [9]:
class ResNet_Block(nn.Module):
   
    def __init__(
        self, num_inp_channels,
        num_channels, num_bottleneck_channels,
        stride = 1, dropout = 0.2,
        use_1x1_conv = False, first_resblock = False
    ):
        super(ResNet_Block, self).__init__()
        
        self.num_inp_channels = num_inp_channels
        self.num_channels = num_channels
        self.num_bottleneck_channels = num_bottleneck_channels
        self.stride = stride
        self.dropout = dropout
        self.use_1x1_conv = use_1x1_conv
        self.first_resblock = first_resblock
    
        if self.first_resblock:
            self.conv1 = nn.Conv2d(
                in_channels = self.num_inp_channels, out_channels = self.num_bottleneck_channels,
                kernel_size = 1, padding = 0,
                stride = self.stride, bias = False
            )
            self.bn1 = nn.BatchNorm2d(num_features = self.num_bottleneck_channels)
        else:
            self.conv1 = nn.Conv2d(
                in_channels = self.num_inp_channels, out_channels = self.num_bottleneck_channels,
                kernel_size = 1, padding = 0,
                stride = self.stride, bias = False
            )
            self.bn1 = nn.BatchNorm2d(num_features = self.num_bottleneck_channels)
        
        self.conv2 = nn.Conv2d(
            in_channels = self.num_bottleneck_channels, out_channels = self.num_bottleneck_channels,
            kernel_size = 3, padding = 1,
            stride = 1, bias = False
        )
        self.bn2 = nn.BatchNorm2d(num_features = self.num_bottleneck_channels)
        self.dropout = nn.Dropout(p = self.dropout)
        
        self.conv3 = nn.Conv2d(
            in_channels = self.num_bottleneck_channels, out_channels = self.num_channels,
            kernel_size = 1, padding = 0,
            stride = 1, bias = False
        )
        self.bn3 = nn.BatchNorm2d(num_features = self.num_channels)
        
        if self.use_1x1_conv:
            self.conv4 = nn.Conv2d(
            in_channels = self.num_inp_channels, out_channels = self.num_channels,
            kernel_size = 1, padding = 0,
            stride = self.stride, bias = False
            )
            self.bn4 = nn.BatchNorm2d(num_features = self.num_channels)
        
        self.initialize_weights()

            
    def initialize_weights(self):
        for m in self.modules():
            # print(m)
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight)

                '''
                # Do not initialize bias (due to batchnorm)-
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
                '''
            
            elif isinstance(m, nn.BatchNorm2d):
                # Standard initialization for batch normalization-
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight)
                nn.init.constant_(m.bias, 0)
    
    
    def forward(self, x):
        y = F.leaky_relu(self.bn1(self.conv1(x)))
        y = self.dropout(F.leaky_relu(self.bn2(self.conv2(y))))
        y = self.dropout(F.leaky_relu(self.bn3(self.conv3(y))))
        
        if self.use_1x1_conv:
            x = self.bn4(self.conv4(x))
            
        y += x
        # return F.relu(self.dropout(y))
        return F.leaky_relu(y)
    
    
    def shape_computation(self, x):
        print(f"Input shape: {x.shape}")
        y = self.bn1(self.conv1(x))
        print(f"First conv layer output shape: {y.shape}")
        y = self.bn2(self.conv2(y))
        print(f"Second conv layer output shape: {y.shape}")
        y = self.bn3(self.conv3(y))
        print(f"Third conv layer output shape: {y.shape}")
        
        if self.use_1x1_conv:
            x = self.bn4(self.conv4(x))
            print(f"Downsample with S = 2; identity connection output shape: {x.shape}")
            
        y += x
        print(f"Residual block output shape: {y.shape}")
        return None
    

In [10]:
class ResNet50(nn.Module):
    def __init__(self):
        super(ResNet50, self).__init__()
        
        self.conv1 = nn.Conv2d(
            in_channels = 3, out_channels = 64,
            kernel_size = 3, padding = 1,
            stride = 1, bias = False
        )
        self.bn1 = nn.BatchNorm2d(num_features = 64)
        
        self.resblock1 = ResNet_Block(
            num_inp_channels = 64, num_channels = 256,
            num_bottleneck_channels = 64, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = True
        )
        
        self.resblock2 = ResNet_Block(
            num_inp_channels = 256, num_channels = 256,
            num_bottleneck_channels = 64, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock3 = ResNet_Block(
            num_inp_channels = 256, num_channels = 256,
            num_bottleneck_channels = 64, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        # Downsample-
        self.resblock4 = ResNet_Block(
            num_inp_channels = 256, num_channels = 512,
            num_bottleneck_channels = 128, stride = 2,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock5 = ResNet_Block(
            num_inp_channels = 512, num_channels = 512,
            num_bottleneck_channels = 128, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock6 = ResNet_Block(
            num_inp_channels = 512, num_channels = 512,
            num_bottleneck_channels = 128, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock7 = ResNet_Block(
            num_inp_channels = 512, num_channels = 512,
            num_bottleneck_channels = 128, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        # Downsample-
        self.resblock8 = ResNet_Block(
            num_inp_channels = 512, num_channels = 1024,
            num_bottleneck_channels = 256, stride = 2,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock9 = ResNet_Block(
            num_inp_channels = 1024, num_channels = 1024,
            num_bottleneck_channels = 256, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock10 = ResNet_Block(
            num_inp_channels = 1024, num_channels = 1024,
            num_bottleneck_channels = 256, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock11 = resblock9 = ResNet_Block(
            num_inp_channels = 1024, num_channels = 1024,
            num_bottleneck_channels = 256, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock12 = resblock9 = ResNet_Block(
            num_inp_channels = 1024, num_channels = 1024,
            num_bottleneck_channels = 256, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock13 = resblock9 = ResNet_Block(
            num_inp_channels = 1024, num_channels = 1024,
            num_bottleneck_channels = 256, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        # Downsample-
        self.resblock14 = ResNet_Block(
            num_inp_channels = 1024, num_channels = 2048,
            num_bottleneck_channels = 512, stride = 2,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock15 = ResNet_Block(
            num_inp_channels = 2048, num_channels = 2048,
            num_bottleneck_channels = 512, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        self.resblock16 = ResNet_Block(
            num_inp_channels = 2048, num_channels = 2048,
            num_bottleneck_channels = 512, stride = 1,
            dropout = 0.2, use_1x1_conv = True,
            first_resblock = False
        )
        
        # self.avg_pool = nn.AvgPool2d(kernel_size = 3, stride = 2)
        self.avg_pool = nn.AdaptiveAvgPool2d(output_size = (1, 1))
        self.output_layer = nn.Linear(in_features = 2048, out_features = 4)
        
        
    def forward(self, x):
        x = F.relu(self.bn1(self.conv1(x)))
        x = self.resblock1(x)
        x = self.resblock2(x)
        x = self.resblock3(x)
        x = self.resblock4(x)
        x = self.resblock5(x)
        x = self.resblock6(x)
        x = self.resblock7(x)
        x = self.resblock8(x)
        x = self.resblock9(x)
        x = self.resblock10(x)
        x = self.resblock11(x)
        x = self.resblock12(x)
        x = self.resblock13(x)
        x = self.resblock14(x)
        x = self.resblock15(x)
        x = self.resblock16(x)
        x = self.avg_pool(x).squeeze()
        x = self.output_layer(x)
        return x
    
    
    def shape_computation(self, x):
        print(f"Input shape: {x.shape}")
        x = F.relu(self.bn1(self.conv1(x)))
        print(f"First conv layer output shape: {x.shape}")
        x = self.resblock1(x)
        print(f"ResBlock 1 output shape: {x.shape}")
        x = self.resblock2(x)
        print(f"ResBlock 2 output shape: {x.shape}")
        x = self.resblock3(x)
        print(f"ResBlock 3 output shape: {x.shape}")
        x = self.resblock4(x)
        print(f"ResBlock 4 output shape: {x.shape}")
        x = self.resblock5(x)
        print(f"ResBlock 5 output shape: {x.shape}")
        x = self.resblock6(x)
        print(f"ResBlock 6 output shape: {x.shape}")
        x = self.resblock7(x)
        print(f"ResBlock 7 output shape: {x.shape}")
        x = self.resblock8(x)
        print(f"ResBlock 8 output shape: {x.shape}")
        x = self.resblock9(x)
        print(f"ResBlock 9 output shape: {x.shape}")
        x = self.resblock10(x)
        print(f"ResBlock 10 output shape: {x.shape}")
        x = self.resblock11(x)
        print(f"ResBlock 11 output shape: {x.shape}")
        x = self.resblock12(x)
        print(f"ResBlock 12 output shape: {x.shape}")
        x = self.resblock13(x)
        print(f"ResBlock 13 output shape: {x.shape}")
        x = self.resblock14(x)
        print(f"ResBlock 14 output shape: {x.shape}")
        x = self.resblock15(x)
        print(f"ResBlock 15 output shape: {x.shape}")
        x = self.resblock16(x)
        print(f"ResBlock 16 output shape: {x.shape}")
        x = self.avg_pool(x).squeeze()
        print(f"Average pool output shape: {x.shape}")
        x = self.output_layer(x)
        print(f"Output layer shape: {x.shape}")
        del x
        return None
    

In [11]:
# Initialize ResNet-50 architecture-
model = ResNet50()

In [12]:
# Use data parallelism, specify GPUs to use-
model = torch.nn.DataParallel(model, device_ids = [0, 1, 2, 3])

# Move to CUDA GPU-
model.to(0)

DataParallel(
  (module): ResNet50(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (resblock1): ResNet_Block(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (dropout): Dropout(p=0.2, inplace=False)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv4): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )

In [13]:
# Count number of layer-wise parameters and total parameters-
tot_params = 0
for param in model.parameters():
    # print(f"layer.shape = {param.shape} has {param.nelement()} parameters")
    tot_params += param.nelement()

print(f"Total number of trainable parameters in ResNet-50 CNN = {tot_params}")

Total number of trainable parameters in ResNet-50 CNN = 38080068


In [14]:
# Load pre-trained weights using Image rotation-
model.load_state_dict(torch.load('ResNet50_STL10_RotNet_best_acc.pth'))

<All keys matched successfully>

#### Modifying Neural Network when using ```nn.DataParallel```

[Refer](https://discuss.pytorch.org/t/changing-model-structure-in-nn-dataparallel/91261/5)

In [15]:
# Unwrap from nn.DataParallel-
if torch.cuda.device_count() > 1:
    model = model.module
    
    # Modify output layer to have 10 neurons required for classification-
    model.output_layer = nn.Linear(in_features = 2048, out_features = 10)

In [16]:
if torch.cuda.device_count() > 1:
    print(f"Wrap model again with nn.DataParallel using"
          f" {torch.cuda.device_count()} GPUs"
         )
    # Use data parallelism, specify GPUs to use-
    model = torch.nn.DataParallel(model, device_ids = [0, 1, 2, 3])
    
    # Move to CUDA GPU-
    model.to(0)

Wrap model again with nn.DataParallel using 4 GPUs


In [17]:
model.to(0)

DataParallel(
  (module): ResNet50(
    (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (resblock1): ResNet_Block(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (dropout): Dropout(p=0.2, inplace=False)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv4): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )

In [18]:
# Define number of fine-tuning epochs-
num_epochs = 50

In [19]:
# Define loss function and optimizer-
cost_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(
    params = model.parameters(), lr = 1e-3,
    momentum = 0.9
)

In [20]:
# Decay lr at 18th and 30th epochs by a factor of 10-
scheduler = torch.optim.lr_scheduler.MultiStepLR(
    optimizer = optimizer, milestones = [18, 30, 40],
    gamma = 0.1
)

In [21]:
def train_model_progress(model, train_loader, train_dataset):
    '''
    Function to perform one epoch of training by using 'train_loader'.
    Returns loss and number of correct predictions for this epoch.
    '''
    running_loss = 0.0
    running_corrects = 0.0
    
    # Place model to device-
    # model.to(device)
    # model.to(0)
    
    model.train()
    
    with tqdm(train_loader, unit = 'batch') as tepoch:
        for images, labels in tepoch:
            tepoch.set_description(f"Training: ")
            
            # Push to CUDA GPU-
            images = images.to(0)
            labels = labels.to(0)
            # images = images.to(device)
            # labels = labels.to(device)
            
            # Get model predictions-
            outputs = model(images)
            
            # Compute loss-
            J = cost_fn(outputs, labels)
            
            # Empty accumulated gradients-
            optimizer.zero_grad()
            
            # Perform backprop-
            J.backward()
            
            # Update parameters-
            optimizer.step()
            
            # global step
            # optimizer.param_groups[0]['lr'] = custom_lr_scheduler.get_lr(step)

            # step += 1
            
            # Compute model's performance statistics-
            running_loss += J.item() * images.size(0)
            _, predicted = torch.max(outputs, 1)
            running_corrects += torch.sum(predicted == labels.data)
            
            tepoch.set_postfix(
                loss = running_loss / len(train_dataset),
                accuracy = (running_corrects.double().cpu().numpy() / len(train_dataset)) * 100
            )
            
    
    train_loss = running_loss / len(train_dataset)
    train_acc = (running_corrects.double() / len(train_dataset)) * 100
    

    # return running_loss, running_corrects
    return train_loss, train_acc.cpu().numpy()


In [22]:
def test_model_progress(model, test_loader, test_dataset):
    total = 0.0
    correct = 0.0
    running_loss_val = 0.0

    with torch.no_grad():
        with tqdm(test_loader, unit = 'batch') as tepoch:
            for images, labels in tepoch:
                tepoch.set_description(f"Validation: ")
                
                # Push to CUDA GPU-
                images = images.to(0)
                labels = labels.to(0)
                # images = images.to(device)
                # labels = labels.to(device)
                
                # Set model to evaluation mode-
                model.eval()
                
                # Place model to device-
                # model.to(device)
                # model.to(0)
            
                # Predict using trained model-
                outputs = model(images)
                _, y_pred = torch.max(outputs, 1)
                
                # Compute validation loss-
                J_val = cost_fn(outputs, labels)
                
                running_loss_val += J_val.item() * labels.size(0)
    
                # Total number of labels-
                total += labels.size(0)

                # Total number of correct predictions-
                correct += (y_pred == labels).sum()
                
                tepoch.set_postfix(
                    val_loss = running_loss_val / len(test_dataset),
                    val_acc = 100 * (correct.cpu().numpy() / total)
                )
            
        
    # return (running_loss_val, correct, total)
    val_loss = running_loss_val / len(test_dataset)
    val_acc = (correct / total) * 100

    return val_loss, val_acc.cpu().numpy()


In [23]:
# Python3 dict to contain model training metrics-
train_history = {}

# To save 'best' parameters-
best_val_acc = 70

In [24]:
for epoch in range(1, num_epochs + 1):
    
    # One epoch of training-
    train_loss, train_acc = train_model_progress(
        model = model, train_loader = train_loader,
        train_dataset = train_dataset
    )
    
    # Get validation metrics after 1 epoch of training-
    val_loss, val_acc = test_model_progress(
        model = model, test_loader = test_loader,
        test_dataset = test_dataset
    )
    
    scheduler.step()
    
    current_lr = optimizer.param_groups[0]["lr"]
    
    print(f"Epoch: {epoch}; loss = {train_loss:.4f}, acc = {train_acc:.2f}%",
          f" val loss = {val_loss:.4f}, val acc = {val_acc:.2f}% & LR = {current_lr:.8f}"
         )
    
    # Save training metrics to Python3 dict-
    train_history[epoch] = {
        'train_loss': train_loss, 'val_loss': val_loss,
        'train_acc': train_acc, 'val_acc': val_acc,
        'lr': current_lr
    }
    
    # Save model with best validation accuracy-
    if (val_acc > best_val_acc):
        best_val_acc = val_acc
        print(f"Saving model with highest val_acc = {val_acc:.2f}%\n")
        torch.save(model.state_dict(), "ResNet50_STL10_RotNet_finetuned_best_val_acc.pth")
        

Training: : 100%|█| 79/79 [01:01<00:00,  1.28batch/s, accuracy=14.3, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=25.9, va


Epoch: 1; loss = 2.2989, acc = 14.30%  val loss = 2.2807, val acc = 25.85% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=22.9, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.00batch/s, val_acc=25.3, va


Epoch: 2; loss = 2.2804, acc = 22.90%  val loss = 2.2363, val acc = 25.30% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=26.8, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.00batch/s, val_acc=29.4, va


Epoch: 3; loss = 2.2402, acc = 26.76%  val loss = 2.1362, val acc = 29.45% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=28.6, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=30.8, va


Epoch: 4; loss = 2.1497, acc = 28.56%  val loss = 1.9509, val acc = 30.83% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=29.4, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=30.8, va


Epoch: 5; loss = 2.0207, acc = 29.42%  val loss = 1.8212, val acc = 30.76% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=31.2, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=33.4, va


Epoch: 6; loss = 1.9096, acc = 31.22%  val loss = 1.7224, val acc = 33.40% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=31.9, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=34.5, va


Epoch: 7; loss = 1.8230, acc = 31.90%  val loss = 1.6724, val acc = 34.49% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=34, loss=1.
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=36.8, va


Epoch: 8; loss = 1.7623, acc = 34.04%  val loss = 1.6035, val acc = 36.79% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=35.8, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=37.4, va


Epoch: 9; loss = 1.7099, acc = 35.84%  val loss = 1.5706, val acc = 37.36% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=37.6, loss=
Validation: : 100%|█| 125/125 [00:30<00:00,  4.03batch/s, val_acc=39.9, va


Epoch: 10; loss = 1.6629, acc = 37.64%  val loss = 1.5061, val acc = 39.91% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=38.8, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=41.3, va


Epoch: 11; loss = 1.6264, acc = 38.80%  val loss = 1.4530, val acc = 41.33% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=39.8, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=41.8, va


Epoch: 12; loss = 1.5822, acc = 39.82%  val loss = 1.4569, val acc = 41.75% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=41.8, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=45, val_


Epoch: 13; loss = 1.5508, acc = 41.82%  val loss = 1.4060, val acc = 44.98% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=43.8, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=46.2, va


Epoch: 14; loss = 1.5247, acc = 43.76%  val loss = 1.3819, val acc = 46.16% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=44.6, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=48.5, va


Epoch: 15; loss = 1.4910, acc = 44.62%  val loss = 1.3435, val acc = 48.55% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=46.7, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=50.3, va


Epoch: 16; loss = 1.4560, acc = 46.68%  val loss = 1.2917, val acc = 50.28% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=47.1, loss=
Validation: : 100%|█| 125/125 [00:30<00:00,  4.03batch/s, val_acc=51.7, va


Epoch: 17; loss = 1.4307, acc = 47.14%  val loss = 1.2745, val acc = 51.72% & LR = 0.00100000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=48.6, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=53.1, va


Epoch: 18; loss = 1.4125, acc = 48.56%  val loss = 1.2317, val acc = 53.14% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=50.3, loss=
Validation: : 100%|█| 125/125 [00:30<00:00,  4.04batch/s, val_acc=53.1, va


Epoch: 19; loss = 1.3739, acc = 50.28%  val loss = 1.2394, val acc = 53.15% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=50.1, loss=
Validation: : 100%|█| 125/125 [00:30<00:00,  4.04batch/s, val_acc=52.2, va


Epoch: 20; loss = 1.3755, acc = 50.10%  val loss = 1.2569, val acc = 52.18% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=50.5, loss=
Validation: : 100%|█| 125/125 [00:30<00:00,  4.04batch/s, val_acc=54.1, va


Epoch: 21; loss = 1.3709, acc = 50.50%  val loss = 1.2132, val acc = 54.15% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=50.6, loss=
Validation: : 100%|█| 125/125 [00:30<00:00,  4.04batch/s, val_acc=53.4, va


Epoch: 22; loss = 1.3664, acc = 50.56%  val loss = 1.2346, val acc = 53.38% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=50.4, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=54.5, va


Epoch: 23; loss = 1.3642, acc = 50.42%  val loss = 1.2104, val acc = 54.46% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.7, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=55.3, va


Epoch: 24; loss = 1.3626, acc = 51.68%  val loss = 1.1979, val acc = 55.26% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.2, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=54.6, va


Epoch: 25; loss = 1.3475, acc = 51.16%  val loss = 1.2127, val acc = 54.59% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.7, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=53.4, va


Epoch: 26; loss = 1.3568, acc = 51.70%  val loss = 1.2367, val acc = 53.39% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.6, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=57.4, va


Epoch: 27; loss = 1.3421, acc = 52.58%  val loss = 1.1664, val acc = 57.39% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.3, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.00batch/s, val_acc=53.6, va


Epoch: 28; loss = 1.3462, acc = 52.32%  val loss = 1.2208, val acc = 53.63% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.1, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=57.1, va


Epoch: 29; loss = 1.3501, acc = 52.10%  val loss = 1.1810, val acc = 57.11% & LR = 0.00010000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.7, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=55, val_


Epoch: 30; loss = 1.3426, acc = 51.72%  val loss = 1.1986, val acc = 55.00% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51, loss=1.
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=55.1, va


Epoch: 31; loss = 1.3393, acc = 50.96%  val loss = 1.1927, val acc = 55.11% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.5, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=54.4, va


Epoch: 32; loss = 1.3386, acc = 51.50%  val loss = 1.2096, val acc = 54.41% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.1, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=54.8, va


Epoch: 33; loss = 1.3349, acc = 52.10%  val loss = 1.2151, val acc = 54.83% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52, loss=1.
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=55, val_


Epoch: 34; loss = 1.3483, acc = 52.04%  val loss = 1.2026, val acc = 55.04% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.9, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=54, val_


Epoch: 35; loss = 1.3328, acc = 51.86%  val loss = 1.2172, val acc = 54.00% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.2, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.03batch/s, val_acc=54.6, va


Epoch: 36; loss = 1.3388, acc = 51.20%  val loss = 1.1983, val acc = 54.60% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.6, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=55.8, va


Epoch: 37; loss = 1.3424, acc = 51.62%  val loss = 1.1906, val acc = 55.83% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52, loss=1.
Validation: : 100%|█| 125/125 [00:31<00:00,  4.00batch/s, val_acc=55.8, va


Epoch: 38; loss = 1.3387, acc = 52.00%  val loss = 1.1830, val acc = 55.84% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.2, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  3.99batch/s, val_acc=56.5, va


Epoch: 39; loss = 1.3307, acc = 52.20%  val loss = 1.1817, val acc = 56.45% & LR = 0.00001000


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.1, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=54.8, va


Epoch: 40; loss = 1.3361, acc = 52.08%  val loss = 1.1947, val acc = 54.83% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.5, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  3.99batch/s, val_acc=56.4, va


Epoch: 41; loss = 1.3340, acc = 52.50%  val loss = 1.1730, val acc = 56.39% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.4, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.00batch/s, val_acc=56, val_


Epoch: 42; loss = 1.3268, acc = 52.40%  val loss = 1.1866, val acc = 55.96% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=51.5, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=54.2, va


Epoch: 43; loss = 1.3374, acc = 51.54%  val loss = 1.2036, val acc = 54.20% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.4, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=56.7, va


Epoch: 44; loss = 1.3407, acc = 52.36%  val loss = 1.1762, val acc = 56.74% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52, loss=1.
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=56.8, va


Epoch: 45; loss = 1.3324, acc = 51.96%  val loss = 1.1678, val acc = 56.81% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.1, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=56.1, va


Epoch: 46; loss = 1.3334, acc = 52.08%  val loss = 1.1750, val acc = 56.10% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.1, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=57.3, va


Epoch: 47; loss = 1.3397, acc = 52.08%  val loss = 1.1608, val acc = 57.30% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.5, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=55.1, va


Epoch: 48; loss = 1.3301, acc = 52.52%  val loss = 1.1991, val acc = 55.10% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.2, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.02batch/s, val_acc=55.5, va


Epoch: 49; loss = 1.3325, acc = 52.20%  val loss = 1.1945, val acc = 55.53% & LR = 0.00000100


Training: : 100%|█| 79/79 [00:55<00:00,  1.43batch/s, accuracy=52.5, loss=
Validation: : 100%|█| 125/125 [00:31<00:00,  4.01batch/s, val_acc=53.3, va

Epoch: 50; loss = 1.3487, acc = 52.46%  val loss = 1.2296, val acc = 53.28% & LR = 0.00000100





In [25]:
# Save training metrics as pickled object-
with open("ResNet50_STL10_RotNet_finetuned_training_history.pkl", "wb") as file:
    pickle.dump(train_history, file)