In this assignment, I will verify the results presented in the paper "Do We Really Need Multiplications in Deep Learning?" by replicating experiments on the CIFAR-10 dataset using VGG, ResNet-20, and ResNet-32 architectures. The focus will be on comparing three approaches: standard Convolutional Neural Networks (CNNs), Binary Neural Networks (BNNs), and Adder Networks (AdderNets).

AdderNets, as proposed in the paper, replace the computationally expensive multiplications in CNNs with additions by using the ℓ1-norm distance to compute similarity between input features and filters, significantly reducing the computational complexity. I will evaluate the performance of these models, with a particular focus on verifying accuracy and computational cost reductions claimed by AdderNets.

The goal is to replicate and confirm the findings from the original research, which demonstrated that AdderNets could achieve competitive accuracy without the need for multiplication-heavy operations in deep learning.

#Assignment A8: CIFAR-10 Dataset Overview

In this assignment, I will be using the CIFAR-10 dataset, which consists of
 60,000 color images of size 32x32, divided into 10 classes. Each class contains 6,000 images, making the dataset a diverse collection for image classification tasks.

The dataset is split into two primary sections:
50,000 training images
10,000 test images

The training set is further divided into five batches, each containing 10,000 image. These batches are organized in a random order, meaning that some classes may appear more frequently in certain batches than others. However, the total number of images per class across all training batches is evenly distributed, with 5,000 images per class.

The test set consists of one batch of 10,000 images, with exactly 1,000 images randomly selected from each class.

The 10 classes in the dataset are as follows:
1. Airplane
2. Automobile
3. Bird
4. Cat
5. Deer
6. Dog
7. Frog
8. Horse
9. Ship
10. Truck

# Import Pytorch Libraries

In [None]:
pip install torch torchvision




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms


In [None]:
# Set device (GPU if available, otherwise CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# VGG-Small Architecture

In [None]:
class VGGScratch(nn.Module):
    def __init__(self):
        super(VGGScratch, self).__init__()

        # Convolutional layers
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2)
        )

        # Fully connected layers
        self.fc_layers = nn.Sequential(
            nn.Linear(256 * 4 * 4, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 512),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 10)  # CIFAR-10 has 10 classes
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)  # Flatten the output from conv layers
        x = self.fc_layers(x)
        return x


In [None]:
# Create model
model = VGGScratch().to(device)


In [None]:
# Data transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])


In [None]:
# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)


In [None]:
# Number of epochs
num_epochs = 10


In [None]:
# Training function
def train_model():
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass (calculate gradients)
            optimizer.step()  # Update weights using gradients

            running_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}")


In [None]:
# Evaluation function
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy: {100 * correct / total:.2f}%")


In [None]:
# Train the model
train_model()


Epoch [1/10], Loss: 1.2601
Epoch [2/10], Loss: 1.0624
Epoch [3/10], Loss: 0.9208
Epoch [4/10], Loss: 0.8009
Epoch [5/10], Loss: 0.7105
Epoch [6/10], Loss: 0.6459
Epoch [7/10], Loss: 0.5977
Epoch [8/10], Loss: 0.5547
Epoch [9/10], Loss: 0.5190
Epoch [10/10], Loss: 0.4919


In [None]:
# Test the model
test_model()


Accuracy: 82.78%


# Here VGGS-small architechture was trained  that achieved  82.78% accuracy with standard CNN  on 10 epochs

# Resnet 20  with CNN

In [None]:
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

class ResNet20(nn.Module):
    def __init__(self):
        super(ResNet20, self).__init__()
        self.in_channels = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(16, 3, stride=1)
        self.layer2 = self._make_layer(32, 3, stride=2)
        self.layer3 = self._make_layer(64, 3, stride=2)
        self.fc = nn.Linear(64, 10)  # CIFAR-10 has 10 classes

    def _make_layer(self, out_channels, blocks, stride):
        layers = []
        layers.append(BasicBlock(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(BasicBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = nn.AvgPool2d(8)(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
# Create model
model = ResNet20().to(device)


In [None]:
# Data transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])


In [None]:
# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)


In [None]:
# Number of epochs
num_epochs = 10


In [None]:
# Training function
def train_model():
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass (calculate gradients)
            optimizer.step()  # Update weights using gradients

            running_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}")


In [None]:
# Evaluation function
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy: {100 * correct / total:.2f}%")


In [None]:
# Train the model
train_model()

# Test the model
test_model()


Epoch [1/10], Loss: 1.4981
Epoch [2/10], Loss: 1.0445
Epoch [3/10], Loss: 0.8584
Epoch [4/10], Loss: 0.7513
Epoch [5/10], Loss: 0.6757
Epoch [6/10], Loss: 0.6340
Epoch [7/10], Loss: 0.5901
Epoch [8/10], Loss: 0.5706
Epoch [9/10], Loss: 0.5434
Epoch [10/10], Loss: 0.5264
Accuracy: 79.52%


Here Resnet-20 was trained and achieved 79.52%  accuracy with Standard CNN layers  that is lower compare to Vgg-small

# Resnet 32 with Cnn layers

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms

class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)
        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

class ResNet32(nn.Module):
    def __init__(self):
        super(ResNet32, self).__init__()
        self.in_channels = 16
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(16, 5, stride=1)
        self.layer2 = self._make_layer(32, 5, stride=2)
        self.layer3 = self._make_layer(64, 5, stride=2)
        self.fc = nn.Linear(64, 10)  # CIFAR-10 has 10 classes

    def _make_layer(self, out_channels, blocks, stride):
        layers = []
        layers.append(BasicBlock(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(BasicBlock(out_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = nn.AvgPool2d(8)(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


In [None]:
# Create model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet32().to(device)


In [None]:
# Data transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])


In [None]:
# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)


Files already downloaded and verified
Files already downloaded and verified


In [None]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)


In [None]:
# Number of epochs
num_epochs = 10


In [None]:
# Training function
def train_model():
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass (calculate gradients)
            optimizer.step()  # Update weights using gradients

            running_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}")


In [None]:
# Evaluation function
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f"Accuracy: {100 * correct / total:.2f}%")


In [None]:
# Train the model
train_model()

# Test the model
test_model()


Epoch [1/10], Loss: 1.6043
Epoch [2/10], Loss: 1.0623
Epoch [3/10], Loss: 0.8432
Epoch [4/10], Loss: 0.7356
Epoch [5/10], Loss: 0.6627
Epoch [6/10], Loss: 0.6122
Epoch [7/10], Loss: 0.5775
Epoch [8/10], Loss: 0.5466
Epoch [9/10], Loss: 0.5242
Epoch [10/10], Loss: 0.4985
Accuracy: 81.70%


# With Resnet-32  I achieved 81.78% with CNN  with 10 epochs

# The standard convolutional(CNN) in VGGsmall are replaced by adder layers to reduce energy consumption during training.

# CNN (Convolutional Neural Network) layers perform feature extraction by applying a convolution operation,
# which involves element-wise multiplication between the input feature map and the filter (kernel), followed by summation.
# This multiplication-heavy operation is energy-expensive.
#
# AdderNet replaces the convolution operation by using addition-based operations.
# Instead of performing element-wise multiplication, AdderNet computes the absolute difference between the input and filter weights:
#
# AdderNet Operation: |Input - Filter|
#
# The result is then summed to produce the output feature map. This approach reduces energy consumption by avoiding
# costly multiplication operations, using addition instead, which is more efficient.
#
# How it compares to CNN:
# In CNN: Output = summation (Input * Filter) + Bias
# In AdderNet: Output = summation |Input - Filter| + Bias
#
# By replacing the CNN layers with AdderNet, the energy efficiency of the network increases, while still preserving the
# hierarchical structure and overall architecture of VGGsmall.
#
# AdderNet also supports backpropagation, where the partial derivatives during training are computed by taking the gradient
# of the L1 norm (|Input - Filter|), which is the sign of the difference. The gradient is then used to update the weights:
#
# Gradient (w.r.t weights) = sign(Input - Filter)
#
# This allows AdderNet to work seamlessly with gradient-based optimization methods (e.g., SGD, Adam),
# ensuring proper weight updates and network training.





# Resnet-20 with Addernet layers

In [None]:
from torch.autograd import Function
import math

In [None]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define AdderNet Convolution Layer
def adder2d_function(X, W, stride=1, padding=0):
    n_filters, d_filter, h_filter, w_filter = W.size()
    n_x, d_x, h_x, w_x = X.size()

    h_out = (h_x - h_filter + 2 * padding) / stride + 1
    w_out = (w_x - w_filter + 2 * padding) / stride + 1

    h_out, w_out = int(h_out), int(w_out)
    X_col = torch.nn.functional.unfold(X.view(1, -1, h_x, w_x), h_filter, dilation=1, padding=padding, stride=stride).view(n_x, -1, h_out*w_out)
    X_col = X_col.permute(1,2,0).contiguous().view(X_col.size(1),-1)
    W_col = W.view(n_filters, -1)

    out = adder.apply(W_col,X_col)

    out = out.view(n_filters, h_out, w_out, n_x)
    out = out.permute(3, 0, 1, 2).contiguous()

    return out

class adder(Function):
    @staticmethod
    def forward(ctx, W_col, X_col):
        ctx.save_for_backward(W_col,X_col)
        output = -(W_col.unsqueeze(2)-X_col.unsqueeze(0)).abs().sum(1)
        return output

    @staticmethod
    def backward(ctx,grad_output):
        W_col,X_col = ctx.saved_tensors
        grad_W_col = ((X_col.unsqueeze(0)-W_col.unsqueeze(2))*grad_output.unsqueeze(1)).sum(2)
        grad_W_col = grad_W_col/grad_W_col.norm(p=2).clamp(min=1e-12)*math.sqrt(W_col.size(1)*W_col.size(0))/5
        grad_X_col = (-(X_col.unsqueeze(0)-W_col.unsqueeze(2)).clamp(-1,1)*grad_output.unsqueeze(1)).sum(0)

        return grad_W_col, grad_X_col

class adder2d(nn.Module):
    def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=False):
        super(adder2d, self).__init__()
        self.stride = stride
        self.padding = padding
        self.input_channel = input_channel
        self.output_channel = output_channel
        self.kernel_size = kernel_size
        self.adder = torch.nn.Parameter(nn.init.normal_(torch.randn(output_channel, input_channel, kernel_size, kernel_size)))
        self.bias = bias
        if bias:
            self.b = torch.nn.Parameter(nn.init.uniform_(torch.zeros(output_channel)))

    def forward(self, x):
        output = adder2d_function(x, self.adder, self.stride, self.padding)
        if self.bias:
            output += self.b.unsqueeze(0).unsqueeze(2).unsqueeze(3)
        return output

#Basic Block for ResNet-20
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = adder2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = adder2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                adder2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

# ResNet-20 with AdderNet Layers
class ResNet20(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet20, self).__init__()
       self.in_channels = 16
        self.conv1 = adder2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# Data Preparation
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

trainset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

# Initialize Model, Loss Function, Optimizer
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet20(BasicBlock, [3, 3, 3]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


scaler = torch.cuda.amp.GradScaler()

# Training Function
def train_model(num_epochs=4):  # 4 num of ephochs due to long processing and limited access to GPU
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            # Mixed precision forward and backward pass
            with torch.cuda.amp.autocast():
                outputs = model(inputs)  # Forward pass
                loss = criterion(outputs, labels)  # Compute loss

            scaler.scale(loss).backward()  # Backward pass
            scaler.step(optimizer)  # Update weights
            scaler.update()

            running_loss += loss.item()

            if i % 100 == 99:  # Print every 100 mini-batches
                print(f"Batch [{i + 1}], Loss: {loss.item():.4f}")

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}")
        test_model()

# Testing Function
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():  # No gradient calculation in evaluation
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy: {accuracy:.2f}%')

# Train and Evaluate
train_model(num_epochs=4)


Files already downloaded and verified
Files already downloaded and verified


  scaler = torch.cuda.amp.GradScaler()
  with torch.cuda.amp.autocast():


Batch [100], Loss: 2.3178
Batch [200], Loss: 2.2841
Batch [300], Loss: 2.2990
Batch [400], Loss: 2.1874
Batch [500], Loss: 2.2778
Batch [600], Loss: 2.2192
Batch [700], Loss: 2.2866
Batch [800], Loss: 2.1962
Batch [900], Loss: 2.1501
Batch [1000], Loss: 2.1495
Batch [1100], Loss: 2.1741
Batch [1200], Loss: 2.1797
Batch [1300], Loss: 2.2517
Batch [1400], Loss: 2.1338
Batch [1500], Loss: 2.1837
Epoch [1/4], Loss: 2.2448
Accuracy: 22.87%
Batch [100], Loss: 1.8841
Batch [200], Loss: 2.1563
Batch [300], Loss: 2.0353
Batch [400], Loss: 2.1056
Batch [500], Loss: 1.9715
Batch [600], Loss: 1.8636
Batch [700], Loss: 1.8515
Batch [800], Loss: 1.8632
Batch [900], Loss: 1.9308
Batch [1000], Loss: 1.7613
Batch [1100], Loss: 1.7418
Batch [1200], Loss: 1.8799
Batch [1300], Loss: 2.0743
Batch [1400], Loss: 1.6668
Batch [1500], Loss: 1.6798
Epoch [2/4], Loss: 1.9410
Accuracy: 31.38%
Batch [100], Loss: 1.8557
Batch [200], Loss: 1.8150
Batch [300], Loss: 1.6511
Batch [400], Loss: 2.0561
Batch [500], Loss:

# VGG small Architecjture repalce with BNN

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Binarization function with straight-through estimator (STE)
class BinaryActivation(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.sign()  # Binarize activations (output -1 or 1)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input.abs() > 1] = 0  # Gradient clipping for out of -1, 1 range
        return grad_input

# Binary convolutional layer
class BinaryConv2d(nn.Conv2d):
    def forward(self, input):
        binary_weight = BinaryActivation.apply(self.weight)  # Binarize weights
        return F.conv2d(input, binary_weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

#  VGG-Small architecture using binary convolutions
class BinaryVGGSmall(nn.Module):
    def __init__(self, num_classes=10):
        super(BinaryVGGSmall, self).__init__()

        self.conv_layers = nn.Sequential(
            BinaryConv2d(3, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            BinaryConv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 32x32 -> 16x16

            BinaryConv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            BinaryConv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),  # 16x16 -> 8x8

            BinaryConv2d(128, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            BinaryConv2d(256, 256, kernel_size=3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)   # 8x8 -> 4x4
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(256 * 4 * 4, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, num_classes)
        )

    def forward(self, x):
        out = self.conv_layers(x)
        out = out.view(out.size(0), -1)  # Flatten the output
        out = self.fc_layers(out)
        return out

# Data loading and preprocessing
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# Training the model
def train_model(model, trainloader, criterion, optimizer, device, epochs=20):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {running_loss/100:.4f}')
                running_loss = 0.0
    print('Finished Training')

# Test the model
def test_model(model, testloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')

# Setting up the device, model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BinaryVGGSmall().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and testing the model
train_model(model, trainloader, criterion, optimizer, device, epochs=20)
test_model(model, testloader, device)


Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Batch 100, Loss: 1.6066
Epoch 1, Batch 200, Loss: 1.3004
Epoch 1, Batch 300, Loss: 1.1341
Epoch 2, Batch 100, Loss: 0.9064
Epoch 2, Batch 200, Loss: 0.8814
Epoch 2, Batch 300, Loss: 0.8344
Epoch 3, Batch 100, Loss: 0.7314
Epoch 3, Batch 200, Loss: 0.7037
Epoch 3, Batch 300, Loss: 0.7092
Epoch 4, Batch 100, Loss: 0.6321
Epoch 4, Batch 200, Loss: 0.6277
Epoch 4, Batch 300, Loss: 0.6284
Epoch 5, Batch 100, Loss: 0.5742
Epoch 5, Batch 200, Loss: 0.5827
Epoch 5, Batch 300, Loss: 0.5497
Epoch 6, Batch 100, Loss: 0.5174
Epoch 6, Batch 200, Loss: 0.5120
Epoch 6, Batch 300, Loss: 0.4956
Epoch 7, Batch 100, Loss: 0.4752
Epoch 7, Batch 200, Loss: 0.4585
Epoch 7, Batch 300, Loss: 0.4809
Epoch 8, Batch 100, Loss: 0.4373
Epoch 8, Batch 200, Loss: 0.4219
Epoch 8, Batch 300, Loss: 0.4345
Epoch 9, Batch 100, Loss: 0.3969
Epoch 9, Batch 200, Loss: 0.4082
Epoch 9, Batch 300, Loss: 0.4208
Epoch 10, Batch 100, Loss: 0.3789

#ResNet-20 architecture to use binary neural network (BNN) layers

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Binarization function with straight-through estimator (STE)
class BinaryActivation(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.sign()  # Binarize activations (output -1 or 1)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input.abs() > 1] = 0  # Gradient clipping for out of -1, 1 range
        return grad_input

# Binary convolutional layer
class BinaryConv2d(nn.Conv2d):
    def forward(self, input):
        binary_weight = BinaryActivation.apply(self.weight)  # Binarize weights
        return F.conv2d(input, binary_weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

# Define the basic block used in ResNet
class BinaryBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BinaryBasicBlock, self).__init__()
        self.conv1 = BinaryConv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = BinaryConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                BinaryConv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

# Define the ResNet-20 architecture
class BinaryResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(BinaryResNet, self).__init__()
        self.in_planes = 16

        self.conv1 = BinaryConv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# ResNet-20 configuration
def BinaryResNet20():
    return BinaryResNet(BinaryBasicBlock, [3, 3, 3])

# Data loading and preprocessing
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# Training the model
def train_model(model, trainloader, criterion, optimizer, device, epochs=20):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {running_loss/100:.4f}')
                running_loss = 0.0
    print('Finished Training')

# Test the model
def test_model(model, testloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')

# Setting up the device, model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BinaryResNet20().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and testing the model
train_model(model, trainloader, criterion, optimizer, device, epochs=20)
test_model(model, testloader, device)


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:03<00:00, 46962939.51it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
Epoch 1, Batch 100, Loss: 1.8599
Epoch 1, Batch 200, Loss: 1.6155
Epoch 1, Batch 300, Loss: 1.4806
Epoch 2, Batch 100, Loss: 1.3016
Epoch 2, Batch 200, Loss: 1.2432
Epoch 2, Batch 300, Loss: 1.1835
Epoch 3, Batch 100, Loss: 1.0854
Epoch 3, Batch 200, Loss: 1.0463
Epoch 3, Batch 300, Loss: 1.0211
Epoch 4, Batch 100, Loss: 0.9542
Epoch 4, Batch 200, Loss: 0.9517
Epoch 4, Batch 300, Loss: 0.9197
Epoch 5, Batch 100, Loss: 0.8778
Epoch 5, Batch 200, Loss: 0.8462
Epoch 5, Batch 300, Loss: 0.8111
Epoch 6, Batch 100, Loss: 0.7845
Epoch 6, Batch 200, Loss: 0.7687
Epoch 6, Batch 300, Loss: 0.7933
Epoch 7, Batch 100, Loss: 0.7534
Epoch 7, Batch 200, Loss: 0.7334
Epoch 7, Batch 300, Loss: 0.7334
Epoch 8, Batch 100, Loss: 0.7106
Epoch 8, Batch 200, Loss: 0.7101
Epoch 8, Batch 300, Loss: 0.6864
Epoch 9, Batch 100, Loss: 0.6857
Epoch 9, Batch 200, Loss: 0.6651
Epoch 9, Batch 300, Loss: 0.6540
Epoch 10, Batch 100,

ResneT-20 achived 78.49% accuracy with Binary neural network

#ResNet-32 architecture with BNN layers


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

# Binarization function with straight-through estimator (STE)
class BinaryActivation(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        ctx.save_for_backward(input)
        return input.sign()  # Binarize activations (output -1 or 1)

    @staticmethod
    def backward(ctx, grad_output):
        input, = ctx.saved_tensors
        grad_input = grad_output.clone()
        grad_input[input.abs() > 1] = 0  # Gradient clipping for out of -1, 1 range
        return grad_input

# Binary convolutional layer
class BinaryConv2d(nn.Conv2d):
    def forward(self, input):
        binary_weight = BinaryActivation.apply(self.weight)  # Binarize weights
        return F.conv2d(input, binary_weight, self.bias, self.stride, self.padding, self.dilation, self.groups)

#Basic block used in ResNet
class BinaryBasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BinaryBasicBlock, self).__init__()
        self.conv1 = BinaryConv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = BinaryConv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * planes:
            self.shortcut = nn.Sequential(
                BinaryConv2d(in_planes, self.expansion * planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * planes)
            )

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = F.relu(out)
        return out

#ResNet-32 architecture
class BinaryResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(BinaryResNet, self).__init__()
        self.in_planes = 16

        self.conv1 = BinaryConv2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.linear = nn.Linear(64, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 8)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

# ResNet-32 configuration
def BinaryResNet32():
    return BinaryResNet(BinaryBasicBlock, [5, 5, 5])

# Data loading and preprocessing
transform = transforms.Compose(
    [transforms.RandomHorizontalFlip(),
     transforms.RandomCrop(32, padding=4),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

# Training the model
def train_model(model, trainloader, criterion, optimizer, device, epochs=20):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 100 == 99:
                print(f'Epoch {epoch+1}, Batch {i+1}, Loss: {running_loss/100:.4f}')
                running_loss = 0.0
    print('Finished Training')

# Test the model
def test_model(model, testloader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    print(f'Accuracy: {100 * correct / total:.2f}%')

# Setting up the device, model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = BinaryResNet32().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training and testing the model
train_model(model, trainloader, criterion, optimizer, device, epochs=20)
test_model(model, testloader, device)


Files already downloaded and verified
Files already downloaded and verified
Epoch 1, Batch 100, Loss: 1.8724
Epoch 1, Batch 200, Loss: 1.6050
Epoch 1, Batch 300, Loss: 1.4680
Epoch 2, Batch 100, Loss: 1.3020
Epoch 2, Batch 200, Loss: 1.2225
Epoch 2, Batch 300, Loss: 1.1503
Epoch 3, Batch 100, Loss: 1.0799
Epoch 3, Batch 200, Loss: 1.0290
Epoch 3, Batch 300, Loss: 1.0049
Epoch 4, Batch 100, Loss: 0.9182
Epoch 4, Batch 200, Loss: 0.9000
Epoch 4, Batch 300, Loss: 0.8815
Epoch 5, Batch 100, Loss: 0.8286
Epoch 5, Batch 200, Loss: 0.8235
Epoch 5, Batch 300, Loss: 0.8298
Epoch 6, Batch 100, Loss: 0.7723
Epoch 6, Batch 200, Loss: 0.7665
Epoch 6, Batch 300, Loss: 0.7593
Epoch 7, Batch 100, Loss: 0.7254
Epoch 7, Batch 200, Loss: 0.7166
Epoch 7, Batch 300, Loss: 0.7102
Epoch 8, Batch 100, Loss: 0.6939
Epoch 8, Batch 200, Loss: 0.6772
Epoch 8, Batch 300, Loss: 0.6808
Epoch 9, Batch 100, Loss: 0.6566
Epoch 9, Batch 200, Loss: 0.6396
Epoch 9, Batch 300, Loss: 0.6595
Epoch 10, Batch 100, Loss: 0.6255

#  Sorry for not following order!

#Here  Resnet 32 along with Addernet layer  

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Function
import torchvision
import torchvision.transforms as transforms
import math

# Define adder2d function and class
def adder2d_function(X, W, stride=1, padding=0):
    n_filters, d_filter, h_filter, w_filter = W.size()
    n_x, d_x, h_x, w_x = X.size()

    h_out = (h_x - h_filter + 2 * padding) / stride + 1
    w_out = (w_x - w_filter + 2 * padding) / stride + 1

    h_out, w_out = int(h_out), int(w_out)
    X_col = torch.nn.functional.unfold(X.view(1, -1, h_x, w_x), h_filter, dilation=1, padding=padding, stride=stride).view(n_x, -1, h_out*w_out)
    X_col = X_col.permute(1, 2, 0).contiguous().view(X_col.size(1), -1)
    W_col = W.view(n_filters, -1)

    out = adder.apply(W_col, X_col)

    out = out.view(n_filters, h_out, w_out, n_x)
    out = out.permute(3, 0, 1, 2).contiguous()

    return out

class adder(Function):
    @staticmethod
    def forward(ctx, W_col, X_col):
        ctx.save_for_backward(W_col, X_col)
        output = -(W_col.unsqueeze(2) - X_col.unsqueeze(0)).abs().sum(1)
        return output

    @staticmethod
    def backward(ctx, grad_output):
        W_col, X_col = ctx.saved_tensors
        grad_W_col = ((X_col.unsqueeze(0) - W_col.unsqueeze(2)) * grad_output.unsqueeze(1)).sum(2)
        grad_W_col = grad_W_col / grad_W_col.norm(p=2).clamp(min=1e-12) * math.sqrt(W_col.size(1) * W_col.size(0)) / 5
        grad_X_col = (-(X_col.unsqueeze(0) - W_col.unsqueeze(2)).clamp(-1, 1) * grad_output.unsqueeze(1)).sum(0)

        return grad_W_col, grad_X_col

class adder2d(nn.Module):
    def __init__(self, input_channel, output_channel, kernel_size, stride=1, padding=0, bias=False):
        super(adder2d, self).__init__()
        self.stride = stride
        self.padding = padding
        self.input_channel = input_channel
        self.output_channel = output_channel
        self.kernel_size = kernel_size
        self.adder = torch.nn.Parameter(nn.init.normal_(torch.randn(output_channel, input_channel, kernel_size, kernel_size)))
        self.bias = bias
        if bias:
            self.b = torch.nn.Parameter(nn.init.uniform_(torch.zeros(output_channel)))

    def forward(self, x):
        output = adder2d_function(x, self.adder, self.stride, self.padding)
        if self.bias:
            output += self.b.unsqueeze(0).unsqueeze(2).unsqueeze(3)

        return output

# Basic Block for ResNet-32
class BasicBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = adder2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = adder2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != out_channels:
            self.shortcut = nn.Sequential(
                adder2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=0, bias=False),
                nn.BatchNorm2d(out_channels)
            )

    def forward(self, x):
        out = self.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        out = self.relu(out)
        return out

# Define ResNet-32 Model
class ResNet32(nn.Module):
    def __init__(self, block, num_blocks, num_classes=10):
        super(ResNet32, self).__init__()
        self.in_channels = 16
        self.conv1 = adder2d(3, 16, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(16)
        self.relu = nn.ReLU(inplace=True)
        self.layer1 = self._make_layer(block, 16, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 32, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 64, num_blocks[2], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def _make_layer(self, block, out_channels, blocks, stride):
        layers = []
        layers.append(block(self.in_channels, out_channels, stride))
        self.in_channels = out_channels
        for _ in range(1, blocks):
            layers.append(block(self.in_channels, out_channels))
        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.relu(self.bn1(self.conv1(x)))
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

# Instantiate the model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = ResNet32(BasicBlock, [5, 5, 5]).to(device)

# Data transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
])

# Load CIFAR-10 dataset
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=32, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=32, shuffle=False, num_workers=2)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=5e-4)

# Number of epochs
num_epochs = 4

# Training function
def train_model():
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(trainloader):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()  # Clear previous gradients
            outputs = model(inputs)  # Forward pass
            loss = criterion(outputs, labels)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update weights using gradients

            running_loss += loss.item()

        print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {running_loss / len(trainloader):.4f}")

# Testing function
def test_model():
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():  # No gradient calculation in evaluation
        for inputs, labels in testloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    print(f'Accuracy: {100 * correct / total:.2f}%')

# Train the model
train_model()

# Test the model
test_model()


Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:05<00:00, 29878493.66it/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified
