In [0]:
from google.colab import drive
drive.mount('/content/gdrive/')

%cd 'gdrive/My Drive/licenta/car_model_classification'

Drive already mounted at /content/gdrive/; to attempt to forcibly remount, call drive.mount("/content/gdrive/", force_remount=True).
[Errno 2] No such file or directory: 'gdrive/My Drive/licenta/car_model_classification'
/content/gdrive/My Drive/licenta/car_model_classification


In [0]:
import argparse
import json
import os
import time

import pandas as pd
import torch
import torch.optim as optim
import torch.nn as nn
from torch.optim import lr_scheduler
from torch.utils.tensorboard import SummaryWriter
import torchvision

from model.se_resnet import se_resnet50, SELayer
from model.se_linear_resnet import se_linear_resnet50, se_linear_resnet34, SELayer
from model.cbam_resnet import cbam_resnet50
from model.cbam_linear_resnet import cbam_linear_resnet50, cbam_linear_resnet34, CBAMBlock
from model.cbam_resnet_official import cbam_official_resnet50
# from model.se_vgg import se_vgg19_bn
from datasets import get_train_valid_loader

print("PyTorch Version: ",torch.__version__)
print("Torchvision Version: ",torchvision.__version__)

PyTorch Version:  1.5.0+cu101
Torchvision Version:  0.6.0+cu101


In [0]:
%pip install torchsummary



In [0]:
!nvidia-smi

Tue Jun  9 12:08:00 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.82       Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla K80           Off  | 00000000:00:04.0 Off |                    0 |
| N/A   54C    P0    56W / 149W |   4017MiB / 11441MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------

In [0]:
def train_model(model, dataloaders, criterion, optimizer, lr_scheduler, num_epochs, exp_dir, device, writer):
    since = time.time()

    best_acc = 0.0

    res = []

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch + 1, num_epochs))
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()   # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0
            runcount = 0
            i = 0

            # Iterate over data.
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                # zero the parameter gradients
                optimizer.zero_grad()

                # forward
                # track history if only in train
                with torch.set_grad_enabled(phase == 'train'):
                    # Get model outputs and calculate loss
                    
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # backward + optimize only if in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # statistics
                batch_size = inputs.size(0)
                running_loss += loss.item() * batch_size
                running_corrects += torch.sum(preds == labels.data)

                lr = get_lr(optimizer)
                runcount += batch_size
                i += 1

                print(f'{phase} [{i}/{len(dataloaders[phase])}]: '
                    f'Loss: {running_loss / runcount:.4f} '
                    f'Acc: {(running_corrects.double() / runcount * 100):.2f}% '
                    f'Learning Rate: {lr}')

            epoch_loss = running_loss / runcount
            epoch_acc = running_corrects.double() / runcount * 100

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                
                torch.save({
                    'model': model.state_dict(),
                    'optimizer': optimizer.state_dict(),
                    'lr_scheduler': lr_scheduler.state_dict(),
                    'epoch': epoch},
                    os.path.join(exp_dir, 'best.pth'))
                
            if phase == 'train':
                trainres = {
                    'train_loss': epoch_loss,
                    'train_acc': epoch_acc.item(),
                }

                # Record loss / acc into the writer
                writer.add_scalar('Train/Loss', epoch_loss, epoch)
                writer.add_scalar('Train/Accuracy', epoch_acc, epoch)
                writer.flush()

            if phase == 'val':
                valres = {
                    'val_loss': epoch_loss,
                    'val_acc': epoch_acc.item(),
                }

                trainres.update(valres)
                res.append(trainres)

                # Record loss / acc into the writer
                writer.add_scalar('Val/Loss', epoch_loss, epoch)
                writer.add_scalar('Val/Accuracy', epoch_acc, epoch)
                writer.flush()

                try:
                    lr_scheduler.step()
                except:
                    lr_scheduler.step(epoch_loss)

        print()

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    res = pd.DataFrame(res)
    res.to_csv(exp_dir + '/history.csv')

    return model

In [0]:
import math
import matplotlib.pyplot as plt

def find_lr(model, loss_fn, optimizer, train_loader, device, init_value=1e-8, final_value=10.0):
    number_in_epoch = len(train_loader) - 1
    update_step = (final_value / init_value) ** (1 / number_in_epoch)
    lr = init_value
    optimizer.param_groups[0]["lr"] = lr
    best_loss = 0.0
    batch_num = 0
    losses = []
    log_lrs = []
    
    for data in train_loader:
        batch_num += 1
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)

        # Crash out if loss explodes

        if batch_num > 1 and loss > 4 * best_loss:
            return log_lrs[10:-5], losses[10:-5]

        # Record the best loss

        if loss < best_loss or batch_num == 1:
            best_loss = loss

        # Store the values

        losses.append(loss)
        log_lrs.append(math.log10(lr))

        # Do the backward pass and optimize

        loss.backward()
        optimizer.step()

        # Update the lr for the next step and store

        lr *= update_step
        optimizer.param_groups[0]["lr"] = lr
        
    return log_lrs[10:-5], losses[10:-5]

In [0]:
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

In [0]:
class NetworkV1(nn.Module):
    def __init__(self, feature_extractor, num_classes, feature_extract):
        super().__init__()

        if hasattr(feature_extractor, 'fc'):
            self.feature_extractor = feature_extractor
            in_features = feature_extractor.fc.in_features
            # self.feature_extractor.fc = nn.Sequential(
            #     nn.Dropout(),
            #     nn.Linear(in_features=in_features, out_features=512),
            #     nn.ReLU(),
            #     nn.Dropout(),
            #     nn.Linear(in_features=512, out_features=256),
            #     nn.ReLU(),
            #     nn.Dropout(),
            #     nn.Linear(in_features=256, out_features=num_classes)
            # )
            self.feature_extractor.fc = nn.Linear(in_features=in_features, out_features=num_classes)
        elif isinstance(feature_extractor.classifier, nn.Linear): # densenet121
            in_features = self.feature_extractor.classifier.in_features
            self.feature_extractor.classifier = nn.Sequential(
                                        nn.Linear(in_features, num_classes),
                                        nn.Sigmoid())
        else: # mobilenetv2 / vgg19
            self.feature_extractor = feature_extractor.features
            self.avgpool = feature_extractor.avgpool
            self.classifier = feature_extractor.classifier
            if feature_extract:
                self.feature_extractor.eval()
            in_features = self.classifier[-1].in_features
            self.classifier[-1] = nn.Linear(in_features=in_features, out_features=num_classes, bias=True)

    def forward(self, x):
        # x = self.feature_extractor(x)
        # x = self.avgpool(x)
        # x = torch.flatten(x, 1)
        # x = self.classifier(x)
        
        # return x

        fc = self.feature_extractor(x)

        return fc

In [0]:
class NetworkGradCam(nn.Module):
    def __init__(self, feature_extractor, num_classes, feature_extract):
        super().__init__()

        # self.feature_extractor = feature_extractor

        # gradient placeholder
        self.gradient = None

        if hasattr(feature_extractor, 'fc'): # resnets
            # isolate the feature blocks
            self.features = nn.Sequential(feature_extractor.conv1,
                                          feature_extractor.bn1,
                                          nn.ReLU(),
                                          nn.MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False),
                                          feature_extractor.layer1, 
                                          feature_extractor.layer2, 
                                          feature_extractor.layer3, 
                                          feature_extractor.layer4)
            # average pooling layer
            self.avgpool = feature_extractor.avgpool

            in_features = feature_extractor.fc.in_features
            # if feature_extract:
            #     self.classifier = nn.Sequential(
            #         nn.Linear(in_features=in_features, out_features=512),
            #         nn.BatchNorm1d(num_features=512),
            #         nn.ReLU(),
            #         # nn.Dropout(),
            #         nn.Linear(in_features=512, out_features=256),
            #         nn.BatchNorm1d(num_features=256),
            #         nn.ReLU(),
            #         # nn.Dropout(),
            #         nn.Linear(in_features=256, out_features=num_classes)
            #     )
            # else:
            #     # classifier
            #     self.classifier = nn.Linear(in_features=in_features, out_features=num_classes, bias=True)
            
            # classifier
            self.classifier = nn.Linear(in_features=in_features, out_features=num_classes, bias=True)

        # elif hasattr(feature_extractor, 'last_linear'): # se_resnets
        #     # isolate the feature blocks
        #     self.features = nn.Sequential(feature_extractor.layer0,
        #                                   feature_extractor.layer1, 
        #                                   feature_extractor.layer2, 
        #                                   feature_extractor.layer3, 
        #                                   feature_extractor.layer4)
            
        #     self.avgpool = feature_extractor.avg_pool

        #     in_features = feature_extractor.last_linear.in_features
        #     if feature_extract:
        #         self.classifier = nn.Sequential(
        #             nn.Linear(in_features=in_features, out_features=512),
        #             nn.ReLU(),
        #             nn.Dropout(),
        #             nn.Linear(in_features=512, out_features=256),
        #             nn.ReLU(),
        #             nn.Dropout(),
        #             nn.Linear(in_features=256, out_features=num_classes)
        #         )
        #     else:
        #         # classifier
        #         self.classifier = nn.Linear(in_features=in_features, out_features=num_classes, bias=True)

        else: # vgg
            self.features = feature_extractor.features
            if feature_extract:
                self.features.eval()

            # average pooling layer
            self.avgpool = feature_extractor.avgpool

            in_features_first = feature_extractor.classifier[0].in_features
            in_features_last = feature_extractor.classifier[-1].in_features
            self.classifier = nn.Sequential(
                    nn.Linear(in_features=in_features_first, out_features=in_features_last, bias=True),
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(in_features=in_features_last, out_features=in_features_last, bias=True),
                    nn.ReLU(),
                    nn.Dropout(),
                    nn.Linear(in_features=in_features_last, out_features=num_classes, bias=True)
                )
            
    # hook for the gradients
    def activations_hook(self, grad):
        self.gradient = grad
    
    def get_gradient(self):
        return self.gradient
    
    def get_activations(self, x):
        return self.features(x)

    def forward(self, x):
        # extract the features
        x = self.features(x)
        
        # # register the hook
        # h = x.register_hook(self.activations_hook)
        
        # complete the forward pass
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        
        return x

In [0]:
# from torchsummary import summary
# device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# net = NetworkGradCam(cbam_official_resnet50(pretrained=False), 157, False).to(device)

# summary(net, (3, 224, 224))

# # x = torch.randn(size=(1, 3, 448, 448)).to(device)

# # pred = net(x)

# print(net)

In [0]:
def construct_model(config, num_classes):
    if config['arch'] == 'resnext50':
        feature_extractor = torchvision.models.resnext50_32x4d(pretrained=config['pretrained'])
    elif config['arch'] == 'resnet18':
        feature_extractor = torchvision.models.resnet18(pretrained=config['pretrained'])
    elif config['arch'] == 'resnet34':
        feature_extractor = torchvision.models.resnet34(pretrained=config['pretrained'])
    elif config['arch'] == 'resnet50':
        feature_extractor = torchvision.models.resnet50(pretrained=config['pretrained'])
    elif config['arch'] == 'mobilenetv2':
        feature_extractor = torchvision.models.mobilenet_v2(pretrained=config['pretrained'])
    elif config['arch'] == 'vgg19':
        feature_extractor = torchvision.models.vgg19(pretrained=config['pretrained'])
    elif config['arch'] == 'vgg19_bn':
        feature_extractor = torchvision.models.vgg19_bn(pretrained=config['pretrained'])
    elif config['arch'] == 'densenet121':
        feature_extractor = torchvision.models.densenet121(pretrained=config['pretrained'])
    elif config['arch'] == 'se_resnet50':
        feature_extractor = se_resnet50(pretrained=config['pretrained'])
    elif config['arch'] == 'se_linear_resnet34':
        feature_extractor = se_linear_resnet34(pretrained=config['pretrained'])
    elif config['arch'] == 'se_linear_resnet50':
        feature_extractor = se_linear_resnet50(pretrained=config['pretrained'])
    elif config['arch'] == 'cbam_resnet50':
        feature_extractor = cbam_resnet50(pretrained=config['pretrained'])
    elif config['arch'] == 'cbam_linear_resnet34':
        feature_extractor = cbam_linear_resnet34(pretrained=config['pretrained'])
    elif config['arch'] == 'cbam_linear_resnet50':
        feature_extractor = cbam_linear_resnet50(pretrained=config['pretrained'])
    elif config['arch'] == 'cbam_official_resnet50':
        feature_extractor = cbam_official_resnet50(pretrained=config['pretrained'])
    elif config['arch'] == 'se_vgg19_bn':
        feature_extractor = se_vgg19_bn(pretrained=config['pretrained'])
    else:
        print("Invalid model name, exiting...")
        exit()

    
    if config['feature_extract']:
        if config['arch'] == 'resnet50':
            child_counter = 0
            for child in feature_extractor.children():
                if child_counter < 6 and not isinstance(child, nn.BatchNorm2d):
                  print("child was frozen".format(child_counter))
                  for name, param in child.named_parameters():
                      print(name)
                      if("bn" not in name):
                          param.requires_grad = False
                else:
                    print("child was not frozen".format(child_counter))
                child_counter += 1
        else:
          child_counter = 0
          for child in feature_extractor.children():
              if child_counter < 4 and not isinstance(child, nn.BatchNorm2d):
                  print("child {} was frozen".format(child_counter))
                  print(child)
                  for name, param in child.named_parameters():
                      param.requires_grad = False
              else:
                for children_of_child in child.children():
                    children_of_child_counter = 0
                    for child_child in children_of_child.children():
                        child_child_counter = 0
                        if isinstance(child_child, SELayer) or isinstance(child_child, CBAMBlock):
                            print("child {} of child {} of child {} was not frozen".format(child_child_counter, children_of_child_counter, child_counter))
                            print(child_child)
                        else:
                            if child_counter < 6 and not isinstance(child_child, nn.BatchNorm2d):
                                print("child {} of child {} of child {} was frozen".format(child_child_counter, children_of_child_counter, child_counter))
                                print(child_child)
                                for name, param in child_child.named_parameters():
                                    print(name)
                                    if("bn" not in name):
                                        param.requires_grad = False
                            else:
                                print("child {} of child {} of child {} was not frozen".format(child_child_counter, children_of_child_counter, child_counter))
                                print(child_child)

                        child_child_counter += 1
                        children_of_child_counter += 1
              child_counter += 1
            
    # if config['version'] == '1':
    #     model = NetworkV1(feature_extractor, num_classes, config['feature_extract'])
    if config['grad_cam']:
        model = NetworkGradCam(feature_extractor, num_classes, config['feature_extract'])
    
    # model = feature_extractor

    return model

In [0]:
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

In [0]:
def get_exp_dir(config):

    if config['cifar10']:
        os.makedirs('logs/cifar10', exist_ok=True)
        exp_dir = f'logs/cifar10/{config["arch"]}_{config["imgsize"][0]}_{config["epochs"]}'
    else:
        exp_dir = f'logs/{config["arch"]}_{config["imgsize"][0]}_{config["epochs"]}'

    if config['finetune']:
        exp_dir += '_finetune'

    if config['feature_extract']:
        exp_dir += '_feature_extract'
      
    if config['grad_cam']:
        exp_dir += '_grad_cam'
      
    if config['pretrained'] is False:
        exp_dir += '_no_pretrained'

    os.makedirs(exp_dir, exist_ok=True)

    exps = [d for d in os.listdir(exp_dir) if os.path.isdir(os.path.join(exp_dir, d))]
    files = set(map(int, exps))
    if len(files):
        exp_id = min(set(range(1, max(files) + 2)) - files)
    else:
        exp_id = 1

    exp_dir = os.path.join(exp_dir, str(exp_id))
    os.makedirs(exp_dir, exist_ok=True)

    json.dump(config, open(exp_dir + '/config.json', 'w'))

    return exp_dir

def load_weight(model, optimizer, lr_scheduler, path):
    sd = torch.load(path)
    model.load_state_dict(sd['model'])
    optimizer.load_state_dict(sd['optimizer'])
    lr_scheduler.load_state_dict(sd['lr_scheduler'])
    epoch = sd['epoch']

    print('Loaded model from epoch %d\n' % (epoch))

In [0]:
# %pip install torch-lr-finder
# %pip install torch-lr-finder -v --global-option="amp"

In [0]:
from torchsummary import summary

def main(args):
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

    config = {
        'batch_size': args.batch_size,
        'optimizer': args.optim,
        'lr': args.lr,
        'weight_decay': args.weight_decay,
        'momentum': args.momentum,
        'epochs': args.epochs,
        'imgsize': (args.imgsize, args.imgsize),
        'arch': args.arch,
        'finetune': args.finetune,
        'path': args.path,
        'feature_extract': args.feature_extract,
        'version': '1',
        'grad_cam': args.grad_cam,
        'pretrained': args.pretrained,
        'cifar10': args.cifar10
    }

    dataloaders_dict = get_train_valid_loader(config)

    class_names = dataloaders_dict['train'].dataset.classes
    num_classes = len(class_names)

    model = construct_model(config, num_classes=num_classes)
    model = model.to(device)
    summary(model, (3, config['imgsize'][0], config['imgsize'][1]))

    # print(model)

    # Gather the parameters to be optimized/updated in this run. If we are
    #  finetuning we will be updating all parameters. However, if we are
    #  doing feature extract method, we will only update the parameters
    #  that we have just initialized, i.e. the parameters with requires_grad
    #  is True.
    params_to_update = model.parameters()
    print("Params to learn:")
    if config['feature_extract']:
        params_to_update = []
        for name,param in model.named_parameters():
            if param.requires_grad == True:
                params_to_update.append(param)
                print("\t",name)

    optimizer_ft = optim.SGD(params_to_update,
                            lr=config['lr'],
                            momentum=config['momentum'],
                            weight_decay=config['weight_decay'])
    if config['optimizer'].lower == 'adam':
        optimizer_ft = optim.Adam(params_to_update,
                                  lr=config['lr'],
                                  weight_decay=config['weight_decay'])

    lr_scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer_ft,
                                                        factor = 0.1,
                                                        patience = 5,
                                                        mode = 'min')

    if config['finetune']:
        load_weight(model, optimizer_ft, lr_scheduler, config['path'])

    # Setup the loss fxn
    criterion = nn.CrossEntropyLoss()

    # # %pip install torch-lr-finder -v --global-option="amp"

    # # from torch_lr_finder import LRFinder
    
    # # lr_finder = LRFinder(model, optimizer_ft, criterion, device='cuda')
    # # lr_finder.range_test(dataloaders_dict['train'], val_loader=dataloaders_dict['val'], end_lr=10, num_iter=100, step_mode='exp')
    # # lr_finder.plot()

    # # logs, losses = find_lr(model, criterion, optimizer_ft, dataloaders_dict['train'], device)
    # # plt.plot(logs, losses)

    # exp_dir = get_exp_dir(config)

    # PATH_to_log_dir = exp_dir + '/runs/'
    # # Declare Tensorboard writer
    # writer = SummaryWriter(PATH_to_log_dir)
    # print('Tensorboard is recording into folder: ' + PATH_to_log_dir + '\n')

    # # Train and evaluate
    # model = train_model(model, dataloaders_dict, criterion, optimizer_ft, lr_scheduler, config['epochs'], exp_dir, device, writer)

In [0]:
parser = argparse.ArgumentParser(description='Training and finetuning script for Car Model classification')

# training arg
parser.add_argument('--batch-size', default=32, type=int,
                    help='training batch size (default: 32)')
parser.add_argument('--epochs', default=40, type=int,
                    help='training epochs (default: 40)')
parser.add_argument('--arch', default='resnet34', choices=['resnext50',
                                                            'resnet18',
                                                            'resnet34',
                                                            'resnet50',
                                                            'vgg19',
                                                            'vgg19_bn',
                                                            'se_resnet50',
                                                            'se_linear_resnet34',
                                                            'se_linear_resnet50',
                                                            'cbam_resnet50',
                                                            'cbam_linear_resnet34',
                                                            'cbam_linear_resnet50',
                                                            'cbam_official_resnet50',
                                                            'se_vgg19_bn'],
                    help='Architecture (default: resnet34)')
parser.add_argument('--imgsize', default=224, type=int,
                    help='Input image size')
parser.add_argument('--finetune', default=False, action='store_true',
                    help='whether to finetune from 400x400 to 224x224 (default: False) or to resume training')
parser.add_argument('--path', default=None,
                    help='required if it is a finetune task (default: None)')
parser.add_argument('--feature-extract', default=False, action='store_true',
                    help='whether to feature extract (default: False)')
parser.add_argument('--grad-cam', default=False, action='store_true',
                    help='grad cam network or not')
parser.add_argument('--pretrained', default=False, action='store_true',
                    help='Imagenet pretrained weights')

# optimizer arg
parser.add_argument('--optim', default='SGD', type=str,
                    help='Optimizer (default: SGD)')
parser.add_argument('--lr', default=0.01, type=float,
                    help='Optimizer learning rate (default: 0.01)')
parser.add_argument('--weight-decay', default=1e-4, type=float,
                    help='Optimizer weight decay (default: 0.0001)')
parser.add_argument('--momentum', default=0.9, type=float,
                    help='SGD momentum (default: 0.9)')

parser.add_argument('--cifar10', default=False, action='store_true')

# args = parser.parse_args()

args = parser.parse_args(args=['--arch', 'resnet34', '--batch-size', '64', '--lr', '0.01', '--epochs', '50', '--imgsize', '224', '--grad-cam', '--pretrained',
                               '--feature-extract'])
                              #  '--cifar10'])
                              #  '--finetune', '--path', 'logs/cbam_linear_resnet34_224_25_grad_cam/1/best.pth'])

if args.finetune and args.path is None:
    parser.error('--finetune requires --path')

main(args)

Initializing datasets and dataloaders for train and validation...
child 0 was frozen
Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
child 2 was frozen
ReLU(inplace=True)
child 3 was frozen
MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
child 0 of child 0 of child 4 was frozen
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
weight
child 0 of child 1 of child 4 was not frozen
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
child 0 of child 2 of child 4 was frozen
ReLU(inplace=True)
child 0 of child 3 of child 4 was frozen
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
weight
child 0 of child 4 of child 4 was not frozen
BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
child 0 of child 0 of child 4 was frozen
Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
weight
child 0 of child 1