In [1]:
%matplotlib inline

import torch
import matplotlib.pyplot as plt
import torchvision
from torchvision import transforms
import torch.nn.functional as F
import torch.optim as optim
from torchsummary import summary
import time
from torchvision.models import resnet50, ResNet50_Weights, ResNet152_Weights, EfficientNet_B7_Weights

  warn(f"Failed to load image Python extension: {e}")


In [2]:
#Select device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda')

In [4]:
#Dataset location
TRAIN_DATA_PATH = "../Data/CATS_DOGS/train"
TEST_DATA_PATH = "../Data/CATS_DOGS/test"

normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize((224,224)),
        transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ]),
    'validation':
    transforms.Compose([
        transforms.Resize((224,224)),
        transforms.ToTensor(),
        normalize
    ]),
}

image_datasets = {
    'train': 
    torchvision.datasets.ImageFolder(TRAIN_DATA_PATH, data_transforms['train']),
    'validation': 
    torchvision.datasets.ImageFolder(TEST_DATA_PATH, data_transforms['validation'])
}

dataloaders = {
    'train':
    torch.utils.data.DataLoader(image_datasets['train'],
                                batch_size=32,
                                shuffle=True,
                                num_workers=16),  # for Kaggle
    'validation':
    torch.utils.data.DataLoader(image_datasets['validation'],
                                batch_size=32,
                                shuffle=False,
                                num_workers=16)  # for Kaggle
}

In [5]:
def test(model, data_loader):
    #Sets the module in evaluation mode
    model.eval()
    test_loss = 0
    correct = 0
    #correct.to(device)
    
    #dont update dynamic computation graph
    with torch.no_grad():
        #for every example in test
        for data, target in data_loader:
            
            target = target.view(-1, 1).float()
            target.to(device)
            
            #evaluate the model
            output = model(data.to(device))
            
            #acumulate the loss
            test_loss += F.binary_cross_entropy(output, target.to(device)).item()
            
            pred_cls = output.round()
            correct += pred_cls.eq(target.view(-1, 1).to(device)).sum() 
        
    test_loss /= len(data_loader.dataset)
    test_losses.append(test_loss)
    print('\nTest set: Avg. loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
    test_loss, correct, len(data_loader.dataset), 100. * correct / len(data_loader.dataset)))

In [6]:
def train(model, data_loader, test_on_every_epoch = False):
    
    for epoch in range(1, num_epochs+1):
    
        
        if test_on_every_epoch:
            test()
        
        start = time.time()
  
        for batch_idx, (data, target) in enumerate(data_loader):
        
            model.train()
        
            #forward pass
            out = model(data.to(device))
        
            #Use negative log likelihood loss.
            loss = criterion(out, target.view(-1, 1).float().to(device))
        
        
            #with this gradients are calculated
            loss.backward()
    
            #update gradients
            optimizer.step()
        
            #Set gradients to zero
            optimizer.zero_grad()

            #Display iteration statistics
            if batch_idx % log_interval == 0:
            
                #print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
                print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(data_loader.dataset),100. * batch_idx / len(data_loader), loss.item()))
    
        end = time.time()
        print('Time: {} '.format(end - start))
    

### Test ResNet50

In [28]:
# Hyper parameters
num_epochs = 4
#batchsize = 32
#batchsize_test = 32
learning_rate = 0.01
momentum = 0.5
log_interval = 60

#Network model
model = resnet50(weights="IMAGENET1K_V1").to(device)
#Disable training for all parameters in pretrained model
for param in model.parameters():
    param.requires_grad = False   

#Change last fully conected layer to a one with 1 outputs. This layer is trainable.
model.fc = torch.nn.Sequential(
               torch.nn.Linear(2048, 128),
               torch.nn.ReLU(inplace=True),
               torch.nn.Linear(128, 1),
               torch.nn.Sigmoid()).to(device)


#Stochastic gradient decent
optimizer = optim.SGD(model.parameters(), lr=learning_rate,momentum=momentum)
#Binary cross entropy loss
criterion = torch.nn.BCELoss()

train_losses = []
train_counter = []
test_losses = []

In [17]:
#just for testing
it = iter(dataloaders['train'])
data, target = next(it)
data.shape

out = model(data)

In [18]:
out.shape

torch.Size([32, 1])

In [34]:
train(model, dataloaders['train'])





Time: 27.14565110206604 




Time: 27.12320303916931 




Time: 27.060869216918945 




Time: 26.845443964004517 


In [35]:
test(model, dataloaders['validation'])


Test set: Avg. loss: 0.0018, Accuracy: 6124/6251 (98%)



### So?

In notebook 16, I used a simpler model, same training data and validation set, same hyperparameters and same optimizer and criterion function, ran for 4 epochs and got a 76% of acuracy.
Using pretrained ResNet50 model, just changing a little the last layers, which are the only trainable by the way, I was able to get 98% of acuracy in just 4 epochs. 

### Test Resnet152

In [13]:
# Hyper parameters
num_epochs = 4
#batchsize = 32
#batchsize_test = 32
learning_rate = 0.01
momentum = 0.5
log_interval = 60

#Network model
model = torchvision.models.resnet152(weights=ResNet152_Weights.IMAGENET1K_V2).to(device)
#Disable training for all parameters in pretrained model
for param in model.parameters():
    param.requires_grad = False   

#Change last fully conected layer to a one with 1 outputs. This layer is trainable.
model.fc = torch.nn.Sequential(
               torch.nn.Linear(2048, 128),
               torch.nn.ReLU(inplace=True),
               torch.nn.Linear(128, 1),
               torch.nn.Sigmoid()).to(device)


#Stochastic gradient decent
optimizer = optim.SGD(model.parameters(), lr=learning_rate,momentum=momentum)
#Binary cross entropy loss
criterion = torch.nn.BCELoss()

train_losses = []
train_counter = []
test_losses = []

In [14]:
train(model, dataloaders['train'])





Time: 58.462249517440796 




Time: 58.34886598587036 




Time: 58.48272967338562 




Time: 58.76094079017639 


In [15]:
test(model, dataloaders['validation'])


Test set: Avg. loss: 0.0011, Accuracy: 6191/6251 (99%)



### So?

For ResNet152, using the same approach as in the last example, we got a 99% of accuracy.

### Test EfficientNet b7

In [7]:
EfficientNet_B7_Weights.IMAGENET1K_V1.transforms

functools.partial(<class 'torchvision.transforms._presets.ImageClassification'>, crop_size=600, resize_size=600, interpolation=<InterpolationMode.BICUBIC: 'bicubic'>)

In [7]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])

data_transforms = {
    'train':
    transforms.Compose([
        transforms.Resize((600,600)),
        transforms.RandomAffine(0, shear=10, scale=(0.8,1.2)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize
    ]),
    'validation':
    transforms.Compose([
        transforms.Resize((600,600)),
        transforms.ToTensor(),
        normalize
    ]),
}

image_datasets = {
    'train': 
    torchvision.datasets.ImageFolder(TRAIN_DATA_PATH, data_transforms['train']),
    'validation': 
    torchvision.datasets.ImageFolder(TEST_DATA_PATH, data_transforms['validation'])
}

dataloaders = {
    'train':
    torch.utils.data.DataLoader(image_datasets['train'],
                                batch_size=16,
                                shuffle=True,
                                num_workers=16),  # for Kaggle
    'validation':
    torch.utils.data.DataLoader(image_datasets['validation'],
                                batch_size=16,
                                shuffle=False,
                                num_workers=16)  # for Kaggle
}

In [10]:
it = iter(dataloaders['train'])
data, target = next(it)
data.shape

out = model(data.to(device))

In [13]:
model.classifier

Sequential(
  (0): Dropout(p=0.5, inplace=True)
  (1): Linear(in_features=2560, out_features=1000, bias=True)
)

In [14]:
# Hyper parameters
num_epochs = 4
#batchsize = 32
#batchsize_test = 32
learning_rate = 0.01
momentum = 0.5
log_interval = 60

#Network model
model = torchvision.models.efficientnet_b7(weights=EfficientNet_B7_Weights.IMAGENET1K_V1).to(device)
#Disable training for all parameters in pretrained model
for param in model.parameters():
    param.requires_grad = False   

#Change last fully conected layer to a one with 1 outputs. This layer is trainable.
model.classifier = torch.nn.Sequential(
               torch.nn.Linear(2560, 128),
               torch.nn.ReLU(inplace=True),
               torch.nn.Linear(128, 1),
               torch.nn.Sigmoid()).to(device)


#Stochastic gradient decent
optimizer = optim.SGD(model.parameters(), lr=learning_rate,momentum=momentum)
#Binary cross entropy loss
criterion = torch.nn.BCELoss()

train_losses = []
train_counter = []
test_losses = []

In [15]:
train(model, dataloaders['train'])





Time: 541.9774088859558 




Time: 543.5254535675049 




Time: 570.105890750885 




Time: 550.1512305736542 


In [16]:
test(model, dataloaders['validation'])


Test set: Avg. loss: 0.0018, Accuracy: 6208/6251 (99%)



### ToDo

- NA

### Resources

- https://www.kaggle.com/code/pmigdal/transfer-learning-with-resnet-50-in-pytorch
- https://paperswithcode.com/sota/image-classification-on-imagenet
- https://pytorch.org/vision/stable/models.html