In [73]:
import torch
import torch.nn.functional as F
from torch import nn
class Conv2D(nn.Module):
    def __init__(self,in_channels, out_channels, kernel_size=(3,3), stride=1,padding=1):
        super(Conv2D, self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.stride = stride
        self.padding = padding
        self.weight = nn.Parameter(torch.randn(out_channels, in_channels, *kernel_size))
        self.bias = nn.Parameter(torch.randn(out_channels))
    def forward(self, input_batch):
        b, c, h, w = input_batch.size()
        print(f'input size is: {input_batch.size()}')
        unfolded = F.unfold(input_batch, kernel_size=self.kernel_size, stride=self.stride, padding=self.padding)
        # flatten the patches (with all channels) into vectors, arranged as the rows of a matrix X
        print(f'unfolded size is: {unfolded.size()}')
        unfolded = unfolded.transpose(1, 2).contiguous().view(b, -1, c * self.kernel_size[0] * self.kernel_size[1])
        print(f'unfolded size is: {unfolded.size()}')
        # flatten the weight tensor into a matrix W
        print(f'weight size is: {self.weight.size()}')
        weight = self.weight.view(self.out_channels, -1)
        print(f'weight size is: {weight.size()}')
        # compute the matrix multiplication XW^T
        output = torch.bmm(unfolded, weight.t().unsqueeze(0).expand(b, -1, -1))
        print(f'output size is: {output.size()}')
        # reshape the output to its final shape
        output = output.view(b, self.out_channels, h, w)
        # add the bias
        output += self.bias.view(1, -1, 1, 1).expand_as(output)
        print(f'output size is: {output.size()}')
        return output
# We use the Conv2D module by instantiating it, and applying it to an input.
conv = Conv2D(3, 1)
input_batch = torch.randn(16, 3, 32, 32)
output_batch = conv(input_batch)

# Built-in Conv2D implementation
conv_builtin = nn.Conv2d(3, 1, kernel_size=3, stride=1, padding=1)
input_batch = torch.randn(16, 3, 32, 32)
output_builtin = conv_builtin(input_batch)

# Custom Conv2D usage
conv_custom = Conv2D(3, 1)
output_custom = conv_custom(input_batch)

print(f'Output (built-in) size: {output_builtin.size()}')
print(f'Output (custom) size: {output_custom.size()}')
# check if the output of the custom implementation matches the output of the built-in implementation
print(torch.allclose(output_builtin, output_custom))
# if false, print the maximum absolute difference between the two outputs
print(torch.max(torch.abs(output_builtin - output_custom)))

input size is: torch.Size([16, 3, 32, 32])
unfolded size is: torch.Size([16, 27, 1024])
unfolded size is: torch.Size([16, 1024, 27])
weight size is: torch.Size([1, 3, 3, 3])
weight size is: torch.Size([1, 27])
output size is: torch.Size([16, 1024, 1])
output size is: torch.Size([16, 1, 32, 32])
input size is: torch.Size([16, 3, 32, 32])
unfolded size is: torch.Size([16, 27, 1024])
unfolded size is: torch.Size([16, 1024, 27])
weight size is: torch.Size([1, 3, 3, 3])
weight size is: torch.Size([1, 27])
output size is: torch.Size([16, 1024, 1])
output size is: torch.Size([16, 1, 32, 32])
Output (built-in) size: torch.Size([16, 1, 32, 32])
Output (custom) size: torch.Size([16, 1, 32, 32])
False
tensor(20.5723, grad_fn=<MaxBackward1>)


In [181]:
import torch
import torch.nn.functional as F
from torch.autograd import Function

class Conv2DFunc(Function):
    @staticmethod
    def forward(ctx, input_batch, kernel, bias=None, stride=1, padding=1):
        """
        Forward pass for 2D convolution using unfold.
        """
        # Save inputs for backward
        ctx.save_for_backward(input_batch, kernel, bias)
        ctx.stride = stride
        ctx.padding = padding
        
        # Pad the input
        b, c, h, w = input_batch.size()
        input_padded = F.pad(input_batch, (padding, padding, padding, padding))
        
        # Extract patches from the input tensor using unfold
        unfolded_input = F.unfold(input_padded, kernel_size=kernel.size(2), stride=stride)
        
        # Save unfolded input for backward pass
        ctx.unfolded_input = unfolded_input
        
        # Reshape unfolded input: (b, c * kh * kw, h_out * w_out)
        unfolded_input = unfolded_input.view(b, c * kernel.size(2) * kernel.size(3), -1)
        
        # Reshape kernel to (out_channels, in_channels * kh * kw)
        unfolded_kernel = kernel.view(kernel.size(0), -1)
        
        # Perform the matrix multiplication: (b, out_channels, h_out * w_out)
        output = unfolded_kernel.matmul(unfolded_input)
        
        # Reshape output to (b, out_channels, h_out, w_out)
        h_out = (h + 2 * padding - kernel.size(2)) // stride + 1
        w_out = (w + 2 * padding - kernel.size(3)) // stride + 1
        output = output.view(b, kernel.size(0), h_out, w_out)
        print(f'output size is: {output.size()}')
        
        # Add bias if applicable
        if bias is not None:
            output += bias.view(1, -1, 1, 1)
        
        return output
    
    @staticmethod
    def backward(ctx, grad_output):
        input_batch, kernel, bias = ctx.saved_tensors
        stride, padding = ctx.stride, ctx.padding
        unfolded_input = ctx.unfolded_input
        
        b, c, h, w = input_batch.size()
        out_channels, in_channels, kh, kw = kernel.size()
        
        # Compute gradients for input and kernel
        grad_input = None
        grad_kernel = None
        grad_bias = None
        
        # 1. Gradient with respect to input
        if ctx.needs_input_grad[0]:
            grad_output_unfolded = grad_output.view(b, out_channels, -1)
            print(f'grad_output_unfolded size is: {grad_output_unfolded.size()}')
            grad_output_unfolded = grad_output_unfolded.permute(0, 2, 1).contiguous()
            print(f'grad_output_unfolded size is: {grad_output_unfolded.size()}')
            grad_output_unfolded = grad_output_unfolded.view(-1, out_channels)
            print(f'grad_output_unfolded size is: {grad_output_unfolded.size()}\n')
            
            kernel_reshaped = kernel.view(out_channels, -1)
            grad_input_unfolded = grad_output_unfolded.matmul(kernel_reshaped)
            print(f'grad_input_unfolded size is: {grad_input_unfolded.size()}')
            grad_input_unfolded = grad_input_unfolded.view(b, -1, grad_output_unfolded.size(0) // b)
            print(f'grad_input_unfolded size is: {grad_input_unfolded.size()}')
            
            grad_input = F.fold(grad_input_unfolded, (h + 2 * padding - kh + 1, w + 2 * padding - kw + 1), (kh, kw), stride=stride, padding=padding)
            print(f'grad_input size is: {grad_input.size()}')
        
        # 2. Gradient with respect to kernel
        if ctx.needs_input_grad[1]:
            grad_output_unfolded = grad_output.view(b, out_channels, -1)
            grad_output_unfolded = grad_output_unfolded.permute(0, 2, 1).contiguous()
            grad_output_unfolded = grad_output_unfolded.view(-1, out_channels)
            
            unfolded_input = unfolded_input.permute(0, 2, 1).contiguous()
            unfolded_input = unfolded_input.view(-1, unfolded_input.size(2))
            
            grad_kernel = grad_output_unfolded.t().matmul(unfolded_input)
            grad_kernel = grad_kernel.view(out_channels, in_channels, kh, kw)
        
        # 3. Gradient with respect to bias
        if bias is not None and ctx.needs_input_grad[2]:
            grad_bias = grad_output.sum(dim=(0, 2, 3))
        
        return grad_input, grad_kernel, grad_bias, None, None

input_batch = torch.randn(16, 3, 32, 32, requires_grad=True)
kernel = torch.randn(1, 3, 3, 3, requires_grad=True)
bias = torch.randn(1, requires_grad=True)

# Apply the custom convolution function
output = Conv2DFunc.apply(input_batch, kernel, bias, 1, 1)
output.backward(torch.ones_like(output))

# Compare with built-in convolution
conv_builtin = nn.Conv2d(3, 1, kernel_size=3, stride=1, padding=1)
conv_builtin.weight.data = kernel
conv_builtin.bias.data = bias
output_builtin = conv_builtin(input_batch)
output_builtin.backward(torch.ones_like(output_builtin))

# Check if the output of the custom implementation matches the output of the built-in implementation
print(torch.allclose(output, output_builtin))
# if false, print the maximum absolute difference between the two outputs
print(torch.max(torch.abs(output - output_builtin)))

# Check if the gradients of the custom implementation match the gradients of the built-in implementation
print(torch.allclose(input_batch.grad, input_batch.grad))
print(torch.allclose(kernel.grad, conv_builtin.weight.grad))
print(torch.allclose(bias.grad, conv_builtin.bias.grad))
# if false, print the maximum absolute difference between the gradients
print(torch.max(torch.abs(input_batch.grad - input_batch.grad)))
print(torch.max(torch.abs(kernel.grad - conv_builtin.weight.grad)))
print(torch.max(torch.abs(bias.grad - conv_builtin.bias.grad)))

output size is: torch.Size([16, 1, 32, 32])
grad_output_unfolded size is: torch.Size([16, 1, 1024])
grad_output_unfolded size is: torch.Size([16, 1024, 1])
grad_output_unfolded size is: torch.Size([16384, 1])

grad_input_unfolded size is: torch.Size([16384, 27])
grad_input_unfolded size is: torch.Size([16, 27, 1024])
grad_input size is: torch.Size([16, 3, 32, 32])
False
tensor(4.7684e-06, grad_fn=<MaxBackward1>)
True
True
True
tensor(0.)
tensor(0.0004)
tensor(0.)


In [196]:
for i, (inputs, targets) in enumerate(train_loader):
    inputs, targets = inputs.to(device), targets.to(device)
    optimizer.zero_grad()
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    loss.backward()
    optimizer.step()
    if i % 100 == 0:
        print(f'Iteration {i}, Loss: {loss.item()}')

NameError: name 'device' is not defined

In [1]:
import torch

# Check if CUDA is available
print(torch.cuda.is_available())  # Should print True if CUDA is available

# Check which GPU PyTorch is using
print(torch.cuda.current_device())  # Should print the device index
print(torch.cuda.get_device_name(0))  # Should print the name of the GPU, e.g., 'GeForce GTX 1060'


True
0
NVIDIA GeForce GTX 1060 3GB


In [3]:
import torchvision
from torchvision import datasets, transforms
from    torch.utils.data import DataLoader, random_split
mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transforms.ToTensor())
train_size = 50000
val_size = 10000

train_dataset, val_dataset = random_split(mnist_dataset, [train_size, val_size])

test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_dataset = DataLoader(test_dataset, batch_size=16, shuffle=False)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

def train(model, train_loader, val_loader, criterion, optimizer, epochs = 5):
    for epoch in range(epochs):
        model.train()
        runnin_loss = 0.0

        for i, (inputs, targets) in enumerate(train_loader):
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            runnin_loss += loss.item()
            if i % 100 == 99:
                print(f'[{epoch + 1}, {i + 1}] loss: {runnin_loss / 100}')
                runnin_loss = 0.0
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, targets in val_loader:
                outputs = model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                _, predicted = torch.max(outputs, 1)
                total += targets.size(0)
                correct += (predicted == targets).sum().item()
        print(f'Epoch {epoch + 1}')
        print(f'Validation loss: {val_loss / len(val_loader)}')
        print(f'Validation accuracy: {100 * correct / total:.2f}%')