In [33]:
import torch
import torchvision

import torch.nn as nn

import torchvision.transforms as transforms

from torch.optim.lr_scheduler import ReduceLROnPlateau

In [34]:
if torch.backends.mps.is_available():
    device = torch.device("mps")
    x = torch.ones(1, device=device)
    print (x)
    
elif torch.backends.cuda.is_built():
    device = torch.device("cuda")
    x = torch.ones(1, device=device)
    print (x)
    
else:
    print ("MPS device not found.")

tensor([1.], device='cuda:0')


In [3]:
# I create a residual block which will be reused by the model class
# The residual block may change size depending on the size of the network, I will begin with the ResNET34 network and then move to the ResNET50

class ResidualBlock(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.conv1 = nn.Conv2d(
            
        )
        
        

In [40]:
dummy1 = torch.randn(1, 3, 224, 224)  # For example, a single 224x224 RGB image
dummy1= dummy1.to(device)

conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=(7,7), stride=2, padding=3, device=device)
dummy1 = conv1(dummy1)
print(dummy1.shape)

mp1 = nn.MaxPool2d(kernel_size=(3,3), stride=2, padding=1)
dummy1 = mp1(dummy1)
print(dummy1.shape)

conv2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=(3,3), stride=1, padding=1, device=device)
dummy1 = conv2(dummy1)
print(dummy1.shape)

conv3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(3,3), stride=2, padding=1, device=device)
dummy1_change_dim = conv3(dummy1)
print(dummy1_change_dim.shape)

torch.Size([1, 64, 112, 112])
torch.Size([1, 64, 56, 56])
torch.Size([1, 64, 56, 56])
torch.Size([1, 128, 28, 28])


In [43]:
dummy1.shape

torch.Size([1, 64, 56, 56])

In [46]:
conv_change_dim = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(1,1), stride=2, device=device)
dummy1_change = conv_change_dim(dummy1)
print(dummy1_change.shape)

torch.Size([1, 128, 28, 28])


In [48]:
dummy1_change + dummy1_change_dim

tensor([[[[ 0.1504,  0.3778,  0.1248,  ...,  0.4215,  0.3730,  0.2714],
          [ 0.2804,  0.2868,  0.9122,  ...,  0.5940,  0.4803,  0.7434],
          [ 0.4763,  0.7639,  0.6916,  ...,  0.5905,  0.5021,  0.5044],
          ...,
          [ 0.2601,  0.6739,  0.8382,  ...,  0.6568,  0.6527,  0.6523],
          [ 0.2464,  0.5472,  0.8069,  ...,  0.5932,  0.7632,  0.6078],
          [ 0.2615,  0.3440,  0.8023,  ...,  0.4130,  0.5459,  0.7129]],

         [[-0.1580, -0.1041, -0.1342,  ..., -0.0392, -0.2626,  0.0531],
          [-0.4320, -0.4274, -0.1070,  ..., -0.4674, -0.1672, -0.3652],
          [-0.3088, -0.1924, -0.2040,  ...,  0.1007, -0.0948, -0.1875],
          ...,
          [-0.5458, -0.3418, -0.3320,  ..., -0.6077,  0.0449, -0.0755],
          [-0.2638, -0.2043, -0.0681,  ..., -0.0342, -0.2294,  0.0404],
          [-0.4018, -0.4768, -0.0253,  ..., -0.3825, -0.4072, -0.4644]],

         [[ 0.0916,  0.0493, -0.1437,  ..., -0.1328, -0.0155,  0.0150],
          [-0.1602, -0.4761, -

In [47]:
class ResNet18(nn.Module):
    def __init__(self):
        super().__init__()
        
        # The first 7x7 conv layer
        self.conv0 = nn.Conv2d(
            in_channels=3, out_channels=64, 
            kernel_size=(7, 7),
            stride=2, padding=3, bias=False,
        )
        self.bn0 = nn.BatchNorm2d(num_features=64)
        self.relu0 = nn.ReLU()
        self.maxpool0 = nn.MaxPool2d(
            kernel_size=(3, 3), stride=2, padding=1,
        )
        
        # First part of first conv block
        self.conv1_1 = nn.Conv2d(
            in_channels=64, out_channels=64,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn1_1 = nn.BatchNorm2d(num_features=64)
        self.relu1_1 = nn.ReLU()
        
        self.conv1_2 = nn.Conv2d(
            in_channels=64, out_channels=64,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn1_2 = nn.BatchNorm2d(num_features=64)
        self.relu1_2 = nn.ReLU()
        # Second part of first conv block
        self.conv1_3 = nn.Conv2d(
            in_channels=64, out_channels=64,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn1_3 = nn.BatchNorm2d(num_features=64)
        self.relu1_3 = nn.ReLU()
        
        self.conv1_4 = nn.Conv2d(
            in_channels=64, out_channels=64,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn1_4 = nn.BatchNorm2d(num_features=64)
        self.relu1_4 = nn.ReLU()
        
        # First part of second conv block, this layer changes features to 128
        self.conv2_1 = nn.Conv2d(
            in_channels=64, out_channels=128,
            kernel_size=(3,3),
            stride=2, padding=1, bias=False,
        )
        self.bn2_1 = nn.BatchNorm2d(num_features=128)
        self.relu2_1 = nn.ReLU()
        
        self.conv2_2 = nn.Conv2d(
            in_channels=128, out_channels=128,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn2_2 = nn.BatchNorm2d(num_features=128)
        self.relu2_2 = nn.ReLU()
        
        self.dim_match_conv1 = nn.Conv2d(
            in_channels=64, out_channels=128, 
            kernel_size=(1,1),
            stride=2, 
        )
        # Second part of second conv block
        self.conv2_3 = nn.Conv2d(
            in_channels=128, out_channels=128,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn2_3 = nn.BatchNorm2d(num_features=128)
        self.relu2_3 = nn.ReLU()
        
        self.conv2_4 = nn.Conv2d(
            in_channels=128, out_channels=128,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn2_4 = nn.BatchNorm2d(num_features=128)
        self.relu2_4 = nn.ReLU()
        
        # First part of third conv block
        self.conv3_1 = nn.Conv2d(
            in_channels=128, out_channels=256,
            kernel_size=(3,3),
            stride=2, padding=1, bias=False,
        )
        self.bn3_1 = nn.BatchNorm2d(num_features=256)
        self.relu3_1 = nn.ReLU()
        
        self.conv3_2 = nn.Conv2d(
            in_channels=256, out_channels=256,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn3_2 = nn.BatchNorm2d(num_features=256)
        self.relu3_2 = nn.ReLU()
        
        self.dim_match_conv2 = nn.Conv2d(
            in_channels=128, out_channels=256, 
            kernel_size=(1,1),
            stride=2, 
        )
        # Second part of third conv block
        self.conv3_3 = nn.Conv2d(
            in_channels=256, out_channels=256,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn3_3 = nn.BatchNorm2d(num_features=256)
        self.relu3_3 = nn.ReLU()
        
        self.conv3_4 = nn.Conv2d(
            in_channels=256, out_channels=256,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn3_4 = nn.BatchNorm2d(num_features=256)
        self.relu3_4 = nn.ReLU()
        
        # First part of fourth conv block
        self.conv4_1 = nn.Conv2d(
            in_channels=256, out_channels=512,
            kernel_size=(3,3),
            stride=2, padding=1, bias=False,
        )
        self.bn4_1 = nn.BatchNorm2d(num_features=512)
        self.relu4_1 = nn.ReLU()
        
        self.conv4_2 = nn.Conv2d(
            in_channels=512, out_channels=512,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn4_2 = nn.BatchNorm2d(num_features=512)
        self.relu4_2 = nn.ReLU()
        
        self.dim_match_conv3 = nn.Conv2d(
            in_channels=256, out_channels=512, 
            kernel_size=(1,1),
            stride=2, 
        )
        # Second part of fourth conv block
        self.conv4_3 = nn.Conv2d(
            in_channels=512, out_channels=512,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn4_3 = nn.BatchNorm2d(num_features=512)
        self.relu4_3 = nn.ReLU()
        
        self.conv4_4 = nn.Conv2d(
            in_channels=512, out_channels=512,
            kernel_size=(3,3),
            stride=1, padding=1, bias=False,
        )
        self.bn4_4 = nn.BatchNorm2d(num_features=512)
        self.relu4_4 = nn.ReLU()
        
        # Avg pool out put and pass through a FC connected layer which has 10 outputs for CIFAR10 
        # (Change to 1000 for ImageNet)
        self.global_avg_pool = nn.AdaptiveAvgPool2d(
            output_size=(1,1)
        )
        
        # Final FC layer
        self.fc1 = nn.Linear(
            in_features=512, out_features=10
        )
        
    def forward(self, x):
           
        x = self.conv0(x)
        x = self.bn0(x)
        x = self.relu0(x)
        x = self.maxpool0(x)  
        res_x = x.detach().clone()  # The x to be passed forward to the +2 layer
        #print(f'dim after init conv: {x.shape}')
        
        # First residual block of first conv block
        x = self.conv1_1(x)
        x = self.bn1_1(x)
        x = self.relu1_1(x)
        x = self.conv1_2(x)
        x = self.bn1_2(x)
        x = self.relu1_2(x)
        x = x + res_x
        # res_x = x.detach().clone()  This might be the wrong approach for making res_x
        res_x = x
        #print(f'dim after first res block in first conv block {x.shape}')
        
        # Second residual block of first conv block
        x = self.conv1_3(x)
        x = self.bn1_3(x)
        x = self.relu1_3(x)
        x = self.conv1_3(x)
        x = self.bn1_3(x)
        x = self.relu1_3(x)
        x = x + res_x
        #res_x = x.detach().clone()
        res_x = x
        #print(f'dim after second res block in first conv block {x.shape}')
        
        # First residual block of second conv block
        x = self.conv2_1(x)
        x = self.bn2_1(x)
        x = self.relu2_1(x)
        x = self.conv2_2(x)
        x = self.bn2_2(x)
        x = self.relu2_2(x)
        
        # Must perform a (1x1) conv on res_x to make the dimensions match
        res_x = self.dim_match_conv1(res_x)
        #print(f'dim res_x after 1x1 conv: {res_x.shape}')
        x = x + res_x
        #res_x = x.detach().clone()
        res_x = x
        #print(f'dim after first res block in second conv block {x.shape}')
        
        # Second residual block of second conv block
        x = self.conv2_3(x)
        x = self.bn2_3(x)
        x = self.relu2_3(x)
        x = self.conv2_4(x)
        x = self.bn2_4(x)
        x = self.relu2_4(x)
        x = x + res_x
        #res_x = x.detach().clone()
        res_x = x
        #print(f'dim after second res block in second conv block {x.shape}')
        
        # First residual block of third conv block
        x = self.conv3_1(x)
        x = self.bn3_1(x)
        x = self.relu3_1(x)
        x = self.conv3_2(x)
        x = self.bn3_2(x)
        x = self.relu3_2(x)
        
        # Must perform a (1x1) conv on res_x to make the dimensions match
        res_x = self.dim_match_conv2(res_x)
        #print(f'dim res_x after 1x1 conv: {res_x.shape}')
        x = x + res_x
        #res_x = x.detach().clone()
        res_x = x
        #print(f'dim after first res block in third conv block {x.shape}')
        
        # Second residual block of third conv block
        x = self.conv3_3(x)
        x = self.bn3_3(x)
        x = self.relu3_3(x)
        x = self.conv3_4(x)
        x = self.bn3_4(x)
        x = self.relu3_4(x)
        x = x + res_x
        #res_x = x.detach().clone()
        res_x = x
        #print(f'dim after second res block in third conv block {x.shape}')
        
        # First residual block of fourth conv block
        x = self.conv4_1(x)
        x = self.bn4_1(x)
        x = self.relu4_1(x)
        x = self.conv4_2(x)
        x = self.bn4_2(x)
        x = self.relu4_2(x)
        
        # Must perform a (1x1) conv on res_x to make the dimensions match
        res_x = self.dim_match_conv3(res_x)
        #print(f'dim res_x after 1x1 conv: {res_x.shape}')
        x = x + res_x
        #res_x = x.detach().clone()
        res_x = x
        #print(f'dim after first res block in fourth conv block {x.shape}')
        
        # Second residual block of third conv block
        x = self.conv4_3(x)
        x = self.bn4_3(x)
        x = self.relu4_3(x)
        x = self.conv4_4(x)
        x = self.bn4_4(x)
        x = self.relu4_4(x)
        x = x + res_x
        # res_x = x.detach().clone()
        #print(f'dim after second res block in fourth conv block {x.shape}')
        
        x = self.global_avg_pool(x)
        #print(f'dim after global avg pool: {x.shape}')
        # Flatten after pooling to make output into a vector ready for FC layer
        x = torch.flatten(x, start_dim=1)
        #print(f'dim after flatten: {x.shape}')
        
        # Final fc layer
        x = self.fc1(x)
        #print(f'Final output dim: {x.shape}')
        
        return x

def kaiming_uniform_init(net):
    if isinstance(net, nn.Linear) or isinstance(net, nn.Conv2d):
        nn.init.kaiming_uniform_(net.weight)
        if net.bias is not None:
            nn.init.constant_(net.bias, 0)  

model = ResNet18()
model.apply(kaiming_uniform_init)
model.to(device)

ResNet18(
  (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu0): ReLU()
  (maxpool0): MaxPool2d(kernel_size=(3, 3), stride=2, padding=1, dilation=1, ceil_mode=False)
  (conv1_1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1_1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1_1): ReLU()
  (conv1_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1_2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1_2): ReLU()
  (conv1_3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1_3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1_3): ReLU()
  (conv1_4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (b

In [48]:
dummy = torch.randn(1, 1, 32, 32)  # For example, a single 224x224 RGB image
dummy = dummy.to(device)

out = model(dummy)

RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[1, 1, 32, 32] to have 3 channels, but got 1 channels instead

In [41]:
from torchvision.datasets import CIFAR10
from torch.utils.data import DataLoader

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.4914, 0.48227 ,0.4465], std=[0.2470, 0.2435, 0.2616]), #Normalize using mean and std dev of cifar10
])

# Load datasets
train_dataset = CIFAR10(root='./CIFAR', train=True, download=True, transform=transform)
test_dataset = CIFAR10(root='./CIFAR', train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) # 256 batch size for imagenet
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [45]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.0001, weight_decay=0.0001, momentum=0.9)

# Define learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=5, verbose=True)

In [46]:
for epoch in range(10000):
    model.train()  # Set model to training mode
    running_loss = 0.0
    
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    train_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}] training loss: {train_loss:.3f}')
    
    # Validation phase
    model.eval()  # Set model to evaluation mode
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for data in test_loader:  # Assuming test_loader is used as a validation loader
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)
            
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            val_running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    
    val_loss = val_running_loss / len(test_loader)
    val_accuracy = 100 * val_correct / val_total
    print(f'Epoch [{epoch + 1}] validation loss: {val_loss:.3f}, accuracy: {val_accuracy:.2f}%')
    
    # Update the LR scheduler with validation loss
    scheduler.step(val_loss)


Epoch [1] training loss: 1.964
Epoch [1] validation loss: 1.989, accuracy: 30.27%
Epoch [2] training loss: 1.609
Epoch [2] validation loss: 1.932, accuracy: 33.76%
Epoch [3] training loss: 1.495
Epoch [3] validation loss: 1.686, accuracy: 39.81%
Epoch [4] training loss: 1.419
Epoch [4] validation loss: 1.633, accuracy: 43.59%
Epoch [5] training loss: 1.363
Epoch [5] validation loss: 1.585, accuracy: 43.74%
Epoch [6] training loss: 1.320
Epoch [6] validation loss: 1.641, accuracy: 41.97%
Epoch [7] training loss: 1.282
Epoch [7] validation loss: 1.621, accuracy: 42.98%
Epoch [8] training loss: 1.241
Epoch [8] validation loss: 1.636, accuracy: 43.36%
Epoch [9] training loss: 1.215
Epoch [9] validation loss: 1.758, accuracy: 39.69%
Epoch [10] training loss: 1.188
Epoch [10] validation loss: 1.754, accuracy: 40.06%
Epoch [11] training loss: 1.165
Epoch [11] validation loss: 1.629, accuracy: 43.34%
Epoch [12] training loss: 1.106
Epoch [12] validation loss: 1.643, accuracy: 42.88%
Epoch [13]

KeyboardInterrupt: 

In [None]:
# Make batch size 128, didnt seem to help