#### Simple CNN for image classification

In [1]:
import torch
from torch import nn

In [4]:
from transformers import ViTFeatureExtractor, ViTModel
model = ViTModel.from_pretrained("google/vit-base-patch16-224-in21k")


transformers.models.vit.modeling_vit.ViTModel

In [7]:
# verify model is instance of nn.Module
isinstance(model, nn.Module)
vit_model = model

In [22]:
def create_model(img_channels = 3):
    model = nn.Sequential()
    # model.add_module('conv0', nn.Conv2d(in_channels=img_channels, out_channels=16, kernel_size=5, padding='same'))
    # model.add_module('relu0', nn.ReLU())
    # model.add_module('pool0', nn.MaxPool2d(kernel_size=2))

    # model.add_module('conv1', nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding='same'))
    # model.add_module('relu1', nn.ReLU())
    # model.add_module('pool1', nn.MaxPool2d(kernel_size=2))

    # model.add_module('conv2', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding='same'))
    # model.add_module('relu2', nn.ReLU())
    # model.add_module('pool2', nn.MaxPool2d(kernel_size=2))

    # model.add_module('conv3', nn.Conv2d(in_channels=64, out_channels=128, kernel_size=5, padding='same'))
    # model.add_module('relu3', nn.ReLU())
    # model.add_module('pool3', nn.MaxPool2d(kernel_size=2))

    # model.add_module('conv4', nn.Conv2d(in_channels=128, out_channels=256, kernel_size=5, padding='same'))
    # model.add_module('relu4', nn.ReLU())
    # model.add_module('pool4', nn.MaxPool2d(kernel_size=2))
    model.add_module('vit', vit_model)
    # model.add_module('flatten', nn.Flatten())
    # model.add_module('dropout0', nn.Dropout(p=0.5))
    # model.add_module('ln0', nn.Linear(1024, out_features=512))
    # model.add_module('relu4', nn.ReLU())
    # model.add_module('dropout1', nn.Dropout(p=0.5))
    # model.add_module('ln1', nn.Linear(512, out_features=10))
    return model


In [23]:
in_tensor = torch.randn(1, 3, 224, 224)
create_model()(in_tensor)

BaseModelOutputWithPooling(last_hidden_state=tensor([[[ 0.2690,  0.1126,  0.0388,  ..., -0.0805, -0.0961, -0.0967],
         [ 0.0750, -0.0898,  0.0955,  ...,  0.1092, -0.0312, -0.1363],
         [ 0.2899, -0.0878, -0.0245,  ...,  0.0303,  0.0224, -0.0342],
         ...,
         [ 0.4162,  0.0329, -0.1779,  ...,  0.1251,  0.0237, -0.2066],
         [ 0.2597,  0.0466, -0.1116,  ...,  0.0249, -0.0543, -0.1122],
         [ 0.1294,  0.0503,  0.0673,  ...,  0.0747, -0.0468, -0.0782]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[ 0.2760,  0.0227, -0.5505,  0.1537, -0.0372,  0.6907,  0.1118,  0.2593,
         -0.4370,  0.1347, -0.2891,  0.6998, -0.4056,  0.3344, -0.3234,  0.6861,
         -0.7918,  0.0525, -0.1030,  0.0709,  0.5463, -0.7283, -0.0366, -0.5570,
          0.8518,  0.2218, -0.5922, -0.4778,  0.2425,  0.1889,  0.1958,  0.6503,
         -0.1981,  0.3196,  0.5965,  0.1285, -0.0282, -0.0061,  0.4933, -0.3511,
         -0.4261, -0.3544,  0.3496, -0.0520, -0.18

### Load and prepare an image dataset

In [14]:
from torchvision import datasets, transforms
#Import image augmentation clases from torchvision
from torchvision.transforms import ToTensor, Compose, Normalize, RandomHorizontalFlip, RandomRotation, Resize
from torch.utils.data import DataLoader
image_path = './data/eurosat'
transform = Compose([
    #Resize to 224
    Resize(224),
    RandomHorizontalFlip(),
    RandomRotation(degrees=10),
    ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
eurosat_dataset = datasets.EuroSAT(root=image_path, transform=transform, download=True)

#Randomize the dataset
torch.manual_seed(1)
train_len = int(0.85 * len(eurosat_dataset)) - int(0.85 * len(eurosat_dataset) * 0.2)
valid_len = int(0.85 * len(eurosat_dataset) * 0.2)
test_len = len(eurosat_dataset) - train_len - valid_len
train_dataset, test_dataset, valid_dataset = torch.utils.data.random_split(eurosat_dataset, [train_len, valid_len, test_len])

#Create DataLoaders
batch_size = 128
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=20)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=20)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True, num_workers=20)


#### Create the training loop function

In [4]:
def train(model, epochs, train_dl, valid_dl, gpu=False):
    if gpu:
        model.cuda()
    else:
        model.cpu()

    loss_fn = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    loss_hist_train = [0] * epochs
    accuracy_hist_train = [0] * epochs

    loss_hist_valid = [0] * epochs
    accuracy_hist_valid = [0] * epochs

    for epoch in range(epochs):
        model.train()
        for x_batch, y_batch in train_dl:
            if gpu:
                x_batch = x_batch.cuda()
                y_batch = y_batch.cuda()
            pred = model(x_batch)
            loss = loss_fn(pred, y_batch)
            
            loss.backward() #Calculate tensor gradients with backpropagation
            optimizer.step() #Apply updates to weights using the optimizer gradient descent specif impl algorithm
            optimizer.zero_grad() #Reset the gradients for the next iteration
            
            #Calculate and save matrics
            loss_hist_train[epoch] += loss.item() * y_batch.size(0) #Add the loss to the loss history
            is_correct = (torch.argmax(pred, dim=1) == y_batch).cpu().float()
            accuracy_hist_train[epoch] += is_correct.sum()

        loss_hist_train[epoch] /= train_len
        accuracy_hist_train[epoch] /= train_len

        model.eval() #Set the model to evaluation mode
        with torch.no_grad(): #Turn off gradients
            for x_batch, y_batch in valid_dl:
                if gpu:
                    x_batch = x_batch.cuda()
                    y_batch = y_batch.cuda()
                pred = model(x_batch)
                loss = loss_fn(pred, y_batch)
                loss_hist_valid[epoch] += (loss.item() * y_batch.size(0))
                is_correct = (torch.argmax(pred, dim=1) == y_batch).cpu().float()
                accuracy_hist_valid[epoch] += is_correct.sum()
            loss_hist_valid[epoch] /= valid_len
            accuracy_hist_valid[epoch] /= valid_len
        print(f'Epoch {epoch+1}/{epochs} accuracy: {accuracy_hist_train[epoch]:4f} valid_accuracy: {accuracy_hist_valid[epoch]:4f}')
    return loss_hist_train, accuracy_hist_train, loss_hist_valid, accuracy_hist_valid

In [5]:
def evaluate_model(model, data_loader, gpu=False):
    if gpu:
        model.cuda()
    else:
        model.cpu()    
    model.eval()
    y_list = []
    pred_list = []
    for x_batch, y_batch in data_loader:
        if gpu:
            x_batch = x_batch.cuda()
        pred = model(x_batch)
        pred_list.append(torch.argmax(pred, dim=1).cpu())
        y_list.append(y_batch)
    preds = torch.cat(pred_list, dim=0)
    y_true = torch.cat(y_list, dim=0)    
    #Calculate the accuracy
    is_correct = (preds == y_true).float()
    accuracy = is_correct.sum() / is_correct.numel()
    print(f'Accuracy: {accuracy:4f}')
    return accuracy

In [6]:
torch.backends.cuda.matmul.allow_tf32 = True

In [91]:
gpu_model = create_model()

#Measure the time it takes to train the model
import time
start = time.time()
loss_hist_train, accuracy_hist_train, loss_hist_valid, accuracy_hist_valid = train(gpu_model, epochs=40, train_dl=train_loader, valid_dl=valid_loader, gpu=True)
end = time.time()
print(f'Time to train: {end-start}')

Epoch 1/40 accuracy: 0.341340 valid_accuracy: 0.435076
Epoch 2/40 accuracy: 0.596786 valid_accuracy: 0.590850
Epoch 3/40 accuracy: 0.679684 valid_accuracy: 0.578867
Epoch 4/40 accuracy: 0.716558 valid_accuracy: 0.663834
Epoch 5/40 accuracy: 0.734586 valid_accuracy: 0.643137
Epoch 6/40 accuracy: 0.759749 valid_accuracy: 0.638344
Epoch 7/40 accuracy: 0.775327 valid_accuracy: 0.671242
Epoch 8/40 accuracy: 0.788344 valid_accuracy: 0.705229
Epoch 9/40 accuracy: 0.783660 valid_accuracy: 0.692810
Epoch 10/40 accuracy: 0.809205 valid_accuracy: 0.699129
Epoch 11/40 accuracy: 0.825654 valid_accuracy: 0.751634
Epoch 12/40 accuracy: 0.841285 valid_accuracy: 0.716993
Epoch 13/40 accuracy: 0.841122 valid_accuracy: 0.734858
Epoch 14/40 accuracy: 0.844009 valid_accuracy: 0.758388
Epoch 15/40 accuracy: 0.860621 valid_accuracy: 0.757734
Epoch 16/40 accuracy: 0.858987 valid_accuracy: 0.763399
Epoch 17/40 accuracy: 0.863344 valid_accuracy: 0.749455
Epoch 18/40 accuracy: 0.869771 valid_accuracy: 0.762527
E

In [59]:
cpu_model = create_model()

#Measure the time it takes to train the model
import time
start = time.time()
loss_hist_train, accuracy_hist_train, loss_hist_valid, accuracy_hist_valid = train(cpu_model, epochs=15, train_dl=train_loader, valid_dl=valid_loader, gpu=False)
end = time.time()
print(f'Time to train: {end-start}')

Epoch 1/15 accuracy: 0.351144 valid_accuracy: 0.488671


KeyboardInterrupt: 

In [None]:
import matplotlib.pyplot as plt
#Plot the loss
plt.plot(loss_hist_train, label='Training Loss')
plt.plot(loss_hist_valid, label='Validation Loss')
plt.legend()
plt.show()


In [92]:
evaluate_model(gpu_model, test_loader, gpu=True)

Accuracy: 0.896732


tensor(0.8967)

In [None]:
evaluate_model(cpu_model, test_loader, gpu=False)

In [9]:
import pytorch_lightning as pl
loss_fn = nn.CrossEntropyLoss()
class ClsModel(pl.LightningModule):
    def __init__(self, img_channels=3) -> None:
        super().__init__()
        model = nn.Sequential()
        model.add_module('conv0', nn.Conv2d(in_channels=img_channels, out_channels=16, kernel_size=5, padding='same'))
        model.add_module('relu0', nn.ReLU())
        model.add_module('pool0', nn.MaxPool2d(kernel_size=2))

        model.add_module('conv1', nn.Conv2d(in_channels=16, out_channels=32, kernel_size=5, padding='same'))
        model.add_module('relu1', nn.ReLU())
        model.add_module('pool1', nn.MaxPool2d(kernel_size=2))

        model.add_module('conv2', nn.Conv2d(in_channels=32, out_channels=64, kernel_size=5, padding='same'))
        model.add_module('relu2', nn.ReLU())
        model.add_module('pool2', nn.MaxPool2d(kernel_size=2))
        

        model.add_module('flatten', nn.Flatten())
        model.add_module('ln0', nn.Linear(4096, out_features=1024))
        model.add_module('relu4', nn.ReLU())
        model.add_module('ln1', nn.Linear(1024, out_features=10))
        self.model = model
       
    def forward(self, x):
        return self.model(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001)
        return optimizer

    def step(self, batch):
        x, y = batch
        y_pred = self.forward(x)
        return loss_fn(y_pred, y)

    def training_step(self, train_batch, batch_idx):
        loss = self.step(train_batch)
        self.log('train_loss', loss)
        return loss  

    def validation_step(self, valid_batch, batch_idx):
        loss = self.step(valid_batch)
        self.log('valid_loss', loss)
        return loss       


In [10]:
lmodel = ClsModel()
trainer = pl.Trainer(max_epochs=20, accelerator='gpu', devices=1)

start = time.time()
trainer.fit(lmodel, train_loader, valid_loader)
end = time.time()
print(f'Time to train: {end-start}')



GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params
-------------------------------------
0 | model | Sequential | 4.3 M 
-------------------------------------
4.3 M     Trainable params
0         Non-trainable params
4.3 M     Total params
17.084    Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=20` reached.


Time to train: 58.0283567905426


In [17]:
evaluate_model(lmodel, test_loader, gpu=True)

Accuracy: 0.819608


tensor(0.8196)

In [3]:
torch.version.cuda

'11.6'