In [138]:
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
from torchvision.models import resnet50, ResNet50_Weights
import torch.nn as nn
import torch
import torch.optim as optim
from datetime import datetime
from torch import nn, Tensor
from torch.nn import init
from torch.nn.modules.utils import _pair
from torch.nn.parameter import Parameter


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [139]:
### Define data transformations

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

batch_size = 512

### CIFAR-10 train set
train_set = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

### Data loader for the train set
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_size, shuffle=True)

### CIFAR-10 test set
test_set = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

### Data loader for the test set
test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_size, shuffle=False)


Files already downloaded and verified
Files already downloaded and verified


In [140]:
### Conventional CNN model
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride = 1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride = 1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 64),
            nn.ReLU(),
            nn.Linear(64, 10) ,
            nn.Softmax()
            )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [141]:
def initialize_weights(model):
    for module in model.modules():
        if isinstance(module, nn.Conv2d) or isinstance(module, nn.Linear):
            nn.init.xavier_uniform_(module.weight)
            if module.bias is not None:
                nn.init.zeros_(module.bias)

In [142]:
### Initialize the model and parameters( weights and biases) 
cnn = CNN().to(device)
initialize_weights(cnn)

### learning rate and number of epochs
learning_rate = 0.001
num_epochs = 40

### Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn.parameters(), lr=learning_rate)

# Training cnn 
total_step = len(train_loader)
start_time = datetime.now()
for epoch in range(num_epochs):
    cnn.train()
    running_loss = 0 
    train_correct = 0
    train_total = 0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        # Forward pass
        outputs = cnn(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        predicted  = torch.argmax(outputs, dim =1)            
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 50 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Average Loss: {running_loss / i:.4f} , Accuracy train : {(train_correct/train_total) * 100 : .2f}')

    # Testing the model
    cnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs =  cnn(images)
            predicted  = torch.argmax(outputs, dim =1)            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f' Accuracy test : {100 * correct / total}%')
end_time = datetime.now()

print(f"The whole processing took {end_time - start_time} for a conventional convulutional neural network")


Epoch [1/40], Step [50/98], Average Loss: 2.2071 , Accuracy train :  28.89
Average Loss : Accuracy test : 47.65%
Epoch [2/40], Step [50/98], Average Loss: 1.9834 , Accuracy train :  51.56
Average Loss : Accuracy test : 55.2%
Epoch [3/40], Step [50/98], Average Loss: 1.9124 , Accuracy train :  58.73
Average Loss : Accuracy test : 59.33%
Epoch [4/40], Step [50/98], Average Loss: 1.8782 , Accuracy train :  62.30
Average Loss : Accuracy test : 60.26%
Epoch [5/40], Step [50/98], Average Loss: 1.8506 , Accuracy train :  65.04
Average Loss : Accuracy test : 60.46%
Epoch [6/40], Step [50/98], Average Loss: 1.8383 , Accuracy train :  66.29
Average Loss : Accuracy test : 64.14%
Epoch [7/40], Step [50/98], Average Loss: 1.8241 , Accuracy train :  67.53
Average Loss : Accuracy test : 64.27%
Epoch [8/40], Step [50/98], Average Loss: 1.8103 , Accuracy train :  68.94
Average Loss : Accuracy test : 64.43%
Epoch [9/40], Step [50/98], Average Loss: 1.8037 , Accuracy train :  69.70
Average Loss : Accurac

In [143]:
def deform_conv2d(
    input,
    offset,
    weight,
    bias,
    stride = (1, 1),
    padding = (0, 0),
    dilation = (1, 1),
    mask = None,
):
    out_channels = weight.shape[0]

    use_mask = mask is not None

    if mask is None:
        mask = torch.zeros((input.shape[0], 0), device=input.device, dtype=input.dtype)

    if bias is None:
        bias = torch.zeros(out_channels, device=input.device, dtype=input.dtype)

    stride_h, stride_w = _pair(stride)
    pad_h, pad_w = _pair(padding)
    dil_h, dil_w = _pair(dilation)
    weights_h, weights_w = weight.shape[-2:]
    _, n_in_channels, _, _ = input.shape

    n_offset_grps = offset.shape[1] // (2 * weights_h * weights_w)
    n_weight_grps = n_in_channels // weight.shape[1]
    
    return torch.ops.torchvision.deform_conv2d(
        input,
        weight,
        offset,
        mask,
        bias,
        stride_h,
        stride_w,
        pad_h,
        pad_w,
        dil_h,
        dil_w,
        n_weight_grps,
        n_offset_grps,
        use_mask,
    )


class DeformConv2d(nn.Module):
    def __init__(
        self,
        in_channels: int,
        out_channels: int,
        kernel_size: int,
        stride: int = 1,
        padding: int = 0,
        dilation: int = 1,
        groups: int = 1,
        bias: bool = True,
    ):
        super().__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)
        self.groups = groups

        self.weight = Parameter(
            torch.empty(out_channels, in_channels // groups, self.kernel_size[0], self.kernel_size[1])
        )

        self.bias = Parameter(torch.empty(out_channels))
        self.reset_parameters()
    def reset_parameters(self) -> None:
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))

        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)

    def forward(self, input, offset, mask = None) :
        return deform_conv2d(
            input,
            offset,
            self.weight,
            self.bias,
            stride=self.stride,
            padding=self.padding,
            dilation=self.dilation,
            mask=mask,
        )

In [150]:
class DeformableConv2d(nn.Module):
    def __init__(self,in_channels,out_channels,kernel_size = 3,stride=1,padding=1,dilation=1,bias=False):
        super(DeformableConv2d, self).__init__()

        kernel_size = (kernel_size, kernel_size)
        
        self.stride = (stride, stride)
        self.padding = padding
        self.dilation = dilation
        self.offset_conv = nn.Conv2d(in_channels,
                                     2 * kernel_size[0] * kernel_size[1],
                                     kernel_size=kernel_size,
                                     stride=stride,
                                     padding=self.padding,
                                     dilation=self.dilation,
                                     bias=True)

        nn.init.constant_(self.offset_conv.weight, 0.)
        nn.init.constant_(self.offset_conv.bias, 0.)

        self.modulator_conv = nn.Conv2d(in_channels,
                                        1 * kernel_size[0] * kernel_size[1],
                                        kernel_size=kernel_size,
                                        stride=stride,
                                        padding=self.padding,
                                        dilation=self.dilation,
                                        bias=True)

        nn.init.constant_(self.modulator_conv.weight, 0.)
        nn.init.constant_(self.modulator_conv.bias, 0.)

        self.regular_conv = nn.Conv2d(in_channels=in_channels,
                                      out_channels=out_channels,
                                      kernel_size=kernel_size,
                                      stride=stride,
                                      padding=self.padding,
                                      dilation=self.dilation,
                                      bias=bias)

    def forward(self, x):
        offset = self.offset_conv(x)
        modulator = 2. * torch.sigmoid(self.modulator_conv(x))
        x = deform_conv2d(input=x,
                                          offset=offset,
                                          weight=self.regular_conv.weight,
                                          bias=self.regular_conv.bias,
                                          padding=self.padding,
                                          mask=modulator,
                                          stride=self.stride,
                                          dilation=self.dilation)
        return x

In [151]:
class DCNN(nn.Module):
    def __init__(self):
        super(DCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            DeformableConv2d(3, 32, kernel_size=3, stride = 1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            DeformableConv2d(32, 64, kernel_size=3, stride = 1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 64),
            nn.ReLU(),
            nn.Linear(64, 10) ,
            nn.Softmax()
            )
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [156]:
### Initialize the model and parameters( weights and biases) 
dcnn = DCNN().to(device)
initialize_weights(dcnn)

### learning rate and number of epochs
learning_rate = 0.001
num_epochs = 40

### Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(dcnn.parameters(), lr=learning_rate)

# Training cnn 
total_step = len(train_loader)
start_time = datetime.now()
for epoch in range(num_epochs):
    dcnn.train()
    running_loss = 0 
    train_correct = 0
    train_total = 0
    for i, (images, labels) in enumerate(train_loader):
        images, labels = images.to(device), labels.to(device)
        # Forward pass
        outputs = dcnn(images)
        loss = criterion(outputs, labels)
        running_loss += loss.item()
        predicted  = torch.argmax(outputs, dim =1)            
        train_total += labels.size(0)
        train_correct += (predicted == labels).sum().item()
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if (i+1) % 50 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{total_step}], Average Loss: {running_loss / i:.4f} , Accuracy train : {(train_correct/train_total) * 100 : .2f}')

    # Testing the model
    dcnn.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs =  dcnn(images)
            predicted  = torch.argmax(outputs, dim =1)            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Average Loss : Accuracy test : {100 * correct / total}%')
end_time = datetime.now()

print(f"The whole processing took {end_time - start_time} for a Deformable convulutional neural network")


Epoch [1/40], Step [50/98], Average Loss: 1.8089 , Accuracy train :  69.11
Average Loss : Accuracy test : 64.26%
Epoch [2/40], Step [50/98], Average Loss: 1.7950 , Accuracy train :  70.54
Average Loss : Accuracy test : 66.36%
Epoch [3/40], Step [50/98], Average Loss: 1.7820 , Accuracy train :  71.81
Average Loss : Accuracy test : 65.78%
Epoch [4/40], Step [50/98], Average Loss: 1.7729 , Accuracy train :  72.73
Average Loss : Accuracy test : 66.4%
Epoch [5/40], Step [50/98], Average Loss: 1.7645 , Accuracy train :  73.68
Average Loss : Accuracy test : 65.96%
Epoch [6/40], Step [50/98], Average Loss: 1.7647 , Accuracy train :  73.59
Average Loss : Accuracy test : 66.33%
Epoch [7/40], Step [50/98], Average Loss: 1.7535 , Accuracy train :  74.63
Average Loss : Accuracy test : 67.13%
Epoch [8/40], Step [50/98], Average Loss: 1.7485 , Accuracy train :  75.18
Average Loss : Accuracy test : 67.22%
Epoch [9/40], Step [50/98], Average Loss: 1.7368 , Accuracy train :  76.23
Average Loss : Accurac