In [1]:
import os
from pathlib import Path

try:
    import google.colab
    IN_COLAB = True
except:
    IN_COLAB = False

if IN_COLAB:
    from google.colab import userdata
    repo_name = 'dgcnz/dl2'
    url = f"https://{userdata.get('gh_pat')}@github.com/{repo_name}.git"
    !git clone {url}
    print("\nCurrent Directory:")
    %cd dl2
    #!pip install torch torchvision numpy matplotlib git+https://github.com/AMLab-Amsterdam/lie_learn escnn scipy
    !pip install torchvision git+https://github.com/AMLab-Amsterdam/lie_learn escnn lightning
    #!pip install -r requirements.txt

else: # automatically checks if the current directory is 'repo name'
    curdir = Path.cwd()
    print("Current Directory", curdir)
    assert curdir.name == "dl2" or curdir.parent.name == "dl2", "Notebook cwd has to be on the project root"
    if curdir.name == "notebooks":
        %cd ..
        print("New Current Directory:", curdir)

Cloning into 'dl2'...
remote: Enumerating objects: 180, done.[K
remote: Counting objects: 100% (180/180), done.[K
remote: Compressing objects: 100% (139/139), done.[K
remote: Total 180 (delta 58), reused 124 (delta 30), pack-reused 0[K
Receiving objects: 100% (180/180), 72.00 KiB | 12.00 MiB/s, done.
Resolving deltas: 100% (58/58), done.

Current Directory:
/content/dl2
Collecting git+https://github.com/AMLab-Amsterdam/lie_learn
  Cloning https://github.com/AMLab-Amsterdam/lie_learn to /tmp/pip-req-build-4rabijrt
  Running command git clone --filter=blob:none --quiet https://github.com/AMLab-Amsterdam/lie_learn /tmp/pip-req-build-4rabijrt
  Resolved https://github.com/AMLab-Amsterdam/lie_learn to commit 1ccc2106e402d517a29de5438c9367c959e67338
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting escnn

In [27]:
import sys
sys.path.append('../')

import torch

from escnn import gspaces
from escnn import nn
import os
from torch import optim, utils, Tensor
#from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor
import lightning as L
from src.models.image_module import ImageLightningModule
from src.data.rotated_mnist_datamodule import MnistRotDataset

from torch.utils.data import Dataset
from torchvision.transforms import RandomRotation
from torchvision.transforms import Pad
from torchvision.transforms import Resize
from torchvision.transforms import ToTensor
from torchvision.transforms import Compose
from torchvision.transforms import InterpolationMode

Using regular fields, 10 classes and 8 rotations

In [28]:
class C8SteerableCNN(torch.nn.Module):

    def __init__(self, n_classes=10):

        super(C8SteerableCNN, self).__init__()

        # the model is equivariant under rotations by 45 degrees, modelled by C8
        self.r2_act = gspaces.rot2dOnR2(N=8)

        # the input image is a scalar field, corresponding to the trivial representation
        in_type = nn.FieldType(self.r2_act, [self.r2_act.trivial_repr])

        # we store the input type for wrapping the images into a geometric tensor during the forward pass
        self.input_type = in_type

        # convolution 1
        # first specify the output type of the convolutional layer
        # we choose 24 feature fields, each transforming under the regular representation of C8
        out_type = nn.FieldType(self.r2_act, 24*[self.r2_act.regular_repr])
        self.block1 = nn.SequentialModule(
            nn.MaskModule(in_type, 29, margin=1),
            nn.R2Conv(in_type, out_type, kernel_size=7, padding=1, bias=False),
            nn.InnerBatchNorm(out_type),
            nn.ReLU(out_type, inplace=True)
        )

        # convolution 2
        # the old output type is the input type to the next layer
        in_type = self.block1.out_type
        # the output type of the second convolution layer are 48 regular feature fields of C8
        out_type = nn.FieldType(self.r2_act, 48*[self.r2_act.regular_repr])
        self.block2 = nn.SequentialModule(
            nn.R2Conv(in_type, out_type, kernel_size=5, padding=2, bias=False),
            nn.InnerBatchNorm(out_type),
            nn.ReLU(out_type, inplace=True)
        )
        self.pool1 = nn.SequentialModule(
            nn.PointwiseAvgPoolAntialiased(out_type, sigma=0.66, stride=2)
        )

        # convolution 3
        # the old output type is the input type to the next layer
        in_type = self.block2.out_type
        # the output type of the third convolution layer are 48 regular feature fields of C8
        out_type = nn.FieldType(self.r2_act, 48*[self.r2_act.regular_repr])
        self.block3 = nn.SequentialModule(
            nn.R2Conv(in_type, out_type, kernel_size=5, padding=2, bias=False),
            nn.InnerBatchNorm(out_type),
            nn.ReLU(out_type, inplace=True)
        )

        # convolution 4
        # the old output type is the input type to the next layer
        in_type = self.block3.out_type
        # the output type of the fourth convolution layer are 96 regular feature fields of C8
        out_type = nn.FieldType(self.r2_act, 96*[self.r2_act.regular_repr])
        self.block4 = nn.SequentialModule(
            nn.R2Conv(in_type, out_type, kernel_size=5, padding=2, bias=False),
            nn.InnerBatchNorm(out_type),
            nn.ReLU(out_type, inplace=True)
        )
        self.pool2 = nn.SequentialModule(
            nn.PointwiseAvgPoolAntialiased(out_type, sigma=0.66, stride=2)
        )

        # convolution 5
        # the old output type is the input type to the next layer
        in_type = self.block4.out_type
        # the output type of the fifth convolution layer are 96 regular feature fields of C8
        out_type = nn.FieldType(self.r2_act, 96*[self.r2_act.regular_repr])
        self.block5 = nn.SequentialModule(
            nn.R2Conv(in_type, out_type, kernel_size=5, padding=2, bias=False),
            nn.InnerBatchNorm(out_type),
            nn.ReLU(out_type, inplace=True)
        )

        # convolution 6
        # the old output type is the input type to the next layer
        in_type = self.block5.out_type
        # the output type of the sixth convolution layer are 64 regular feature fields of C8
        out_type = nn.FieldType(self.r2_act, 64*[self.r2_act.regular_repr])
        self.block6 = nn.SequentialModule(
            nn.R2Conv(in_type, out_type, kernel_size=5, padding=1, bias=False),
            nn.InnerBatchNorm(out_type),
            nn.ReLU(out_type, inplace=True)
        )
        self.pool3 = nn.PointwiseAvgPoolAntialiased(out_type, sigma=0.66, stride=1, padding=0)

        self.gpool = nn.GroupPooling(out_type)

        # number of output channels
        c = self.gpool.out_type.size

        # Fully Connected
        self.fully_net = torch.nn.Sequential(
            torch.nn.Linear(c, 64),
            torch.nn.BatchNorm1d(64),
            torch.nn.ELU(inplace=True),
            torch.nn.Linear(64, n_classes),
        )

    def forward(self, input: torch.Tensor):
        # wrap the input tensor in a GeometricTensor
        # (associate it with the input type)
        x = nn.GeometricTensor(input, self.input_type)

        # apply each equivariant block

        # Each layer has an input and an output type
        # A layer takes a GeometricTensor in input.
        # This tensor needs to be associated with the same representation of the layer's input type
        #
        # The Layer outputs a new GeometricTensor, associated with the layer's output type.
        # As a result, consecutive layers need to have matching input/output types
        x = self.block1(x)
        x = self.block2(x)
        x = self.pool1(x)

        x = self.block3(x)
        x = self.block4(x)
        x = self.pool2(x)

        x = self.block5(x)
        x = self.block6(x)

        # pool over the spatial dimensions
        x = self.pool3(x)

        # pool over the group
        x = self.gpool(x)

        # unwrap the output GeometricTensor
        # (take the Pytorch tensor and discard the associated representation)
        x = x.tensor

        # classify with the final fully connected layers)
        x = self.fully_net(x.reshape(x.shape[0], -1))

        return x

In [29]:


device = 'cuda' if torch.cuda.is_available() else 'cpu'


# define the LightningModule
class LitModEquivariant(L.LightningModule):
    def __init__(self, net):
        super().__init__()
        self.net = net

    def training_step(self, batch, batch_idx):

        x, y = batch
        #x = x.view(x.size(0), -1) idk waarom dit in die tutorial staat maar dit is een flatten opration vgm
        print(x.shape)
        #print(x.shape)
        z = self.net(x)
        loss = torch.nn.functional.cross_entropy(z, y)
        # Logging to TensorBoard (if installed) by default
        self.log("train_loss", loss)
        return loss

    def configure_optimizers(self):
        optimizer = optim.Adam(self.parameters(), lr=1e-3)
        return optimizer


# init model
net = C8SteerableCNN().to(device)
equivariantmodel = LitModEquivariant(net)

#define transforms
pad = Pad((0, 0, 1, 1), fill=0)

resize1 = Resize(87)
resize2 = Resize(29)

totensor = ToTensor()

train_transform = Compose([
    pad,
    resize1,
    RandomRotation(180., interpolation=InterpolationMode.BILINEAR, expand=False),
    resize2,
    totensor,
])

dataset = MnistRotDataset("data/mnist/",download=True, transform=train_transform)
train_loader = utils.data.DataLoader(dataset, batch_size = 64)

# train the model (hint: here are some helpful Trainer arguments for rapid idea iteration)
trainer = L.Trainer(limit_train_batches=1, max_epochs=2) #for some reason error if you put max_epoch to 1 idk man
trainer.fit(model=equivariantmodel, train_dataloaders=train_loader)


INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: `Trainer(limit_train_batches=1)` was configured so 1 batch per epoch will be used.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer(limit_train_batches=1)` was configured so 1 batch per epoch will be used.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name | Type           | Params
----------------------------------------
0 | net  | C8SteerableCNN | 2.1 M 
------------------------------------

Training: |          | 0/? [00:00<?, ?it/s]

torch.Size([64, 1, 29, 29])
torch.Size([64, 1, 29, 29])


INFO: `Trainer.fit` stopped: `max_epochs=2` reached.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer.fit` stopped: `max_epochs=2` reached.


###Now create a similar sized non-equivariant CNN


In [47]:
import torch
import torch.nn.functional as F

class BasicInvertedBottleneckBlock(torch.nn.Module):
    def __init__(self, Cin, N, Cout, downsample = False, first_block = False):
        super(BasicInvertedBottleneckBlock, self).__init__()

        kernel_size = 3
        padding = 1

        if first_block:
          kernel_size = 7
          padding = 3

        self.block = torch.nn.Sequential(
          torch.nn.Conv2d(Cin, N, kernel_size=kernel_size, stride=1, padding=padding),
          torch.nn.BatchNorm2d(N), #idk which BN to use
          torch.nn.ELU(inplace=True)
        )

        self.one_by_one: bool = (Cin != Cout)

        if self.one_by_one:
            self.conv1x1 = torch.nn.Conv2d(Cin, Cout, kernel_size=1, stride=1, padding=0)

        self.downsample = downsample
        if self.downsample:
          #for this one I'm guessing
          self.downsample_conv = torch.nn.Conv2d(N, Cout, kernel_size=3, stride=2, padding=padding)
          self.avg_pool = torch.nn.AvgPool2d(kernel_size=3, stride=2, padding=padding)

    def forward(self, x):
        out = self.block(x)
        print('before downsample')
        print(out.shape)
        print(x.shape)

        if self.downsample:
          out = self.downsample_conv(out)
          x = self.avg_pool(x)


        print('afterdownsample')
        print(out.shape)
        print(x.shape)

        if self.one_by_one:
            x = self.conv1x1(x)

        print('afteronebyone')
        print(out.shape)
        print(x.shape)
        skip_connection = out + x
        return skip_connection

class CNN(torch.nn.Module):
    def __init__(self, backbone_channels, residual_channels):
        super(CNN, self).__init__()
        self.backbone_channels = backbone_channels
        self.residual_channels = residual_channels
        self.blocks = self._make_blocks()

    def _make_blocks(self):
        blocks = []
        for i in range(len(self.backbone_channels)):
            Cin = self.backbone_channels[i]
            N = self.residual_channels[i]
            # this next part is because their explanation doesnt seem to include the channel output size of the last block. So we set it to be the same as the input
            if i < len(self.backbone_channels) - 1:
                Cout = self.backbone_channels[i + 1]
            else:
                Cout = 128 #

            if i == 0:
              blocks.append(BasicInvertedBottleneckBlock(Cin, N, Cout, first_block=True))

            elif i+1 % 2 == 0: #every two layers this is true
              blocks.append(BasicInvertedBottleneckBlock(Cin, N, Cout, downsample=True))

            else:
              blocks.append(BasicInvertedBottleneckBlock(Cin, N, Cout))
        return torch.nn.Sequential(*blocks)

    def forward(self, x):
        return self.blocks(x)

# Example usage:
backbone_channels = [1, 21, 54, 72, 108, 168]  # These are the C_in's
residual_channels = [96, 192, 288, 288, 576, 576]  # These are the upsampled N's
model = CNN(backbone_channels, residual_channels)



In [48]:
normal_cnn = LitModEquivariant(model)
cnn_trainer = L.Trainer(limit_train_batches=1, max_epochs=2) #for some reason error if you put max_epoch to 1 idk man
cnn_trainer.fit(model=normal_cnn, train_dataloaders=train_loader)

INFO: GPU available: True (cuda), used: True
INFO:lightning.pytorch.utilities.rank_zero:GPU available: True (cuda), used: True
INFO: TPU available: False, using: 0 TPU cores
INFO:lightning.pytorch.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO: IPU available: False, using: 0 IPUs
INFO:lightning.pytorch.utilities.rank_zero:IPU available: False, using: 0 IPUs
INFO: HPU available: False, using: 0 HPUs
INFO:lightning.pytorch.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO: `Trainer(limit_train_batches=1)` was configured so 1 batch per epoch will be used.
INFO:lightning.pytorch.utilities.rank_zero:`Trainer(limit_train_batches=1)` was configured so 1 batch per epoch will be used.
INFO: LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO:lightning.pytorch.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
INFO: 
  | Name | Type | Params
------------------------------
0 | net  | CNN  | 1.9 M 
------------------------------
1.9 M     Trainable params
0       

Training: |          | 0/? [00:00<?, ?it/s]

torch.Size([64, 1, 29, 29])
before downsample
torch.Size([64, 96, 29, 29])
torch.Size([64, 1, 29, 29])
afterdownsample
torch.Size([64, 96, 29, 29])
torch.Size([64, 1, 29, 29])
afteronebyone
torch.Size([64, 96, 29, 29])
torch.Size([64, 21, 29, 29])


RuntimeError: The size of tensor a (96) must match the size of tensor b (21) at non-singleton dimension 1