In [13]:
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import pytorch_lightning as pl
import os


In [113]:
def block(in_filters, out_filters, activation=None, *args, **kwargs):
    if activation is None:
        activation = nn.ReLU
    return nn.Sequential(
        nn.Conv2d(in_filters, out_filters, *args, **kwargs),
        activation(),
        nn.MaxPool2d(kernel_size=2)
    )

class MyModel(pl.LightningModule):
    
    def __init__(self, layer_widths: list, *args, **kwargs):
        """
        Creates a model consisting of
        - a Sequence of blocks
        - a Sequance of output Dense Layers
        - a Softmax-function for outputting "probabilities"
        layer_widths: []
            [input_dim, *hidden_layer_dims, n_classes]
        activation=method
            nn.ReLU, ...
        """
        super().__init__()
        self.blocks = nn.Sequential(*[
            block(in_dim, out_dim, *args, **kwargs) 
            for in_dim, out_dim in zip(layer_widths[:-1], layer_widths[1:-1]) # zip with shifted version of itself
                                                                              # to make dimensions match.
                                                                              # Exclude last layer for self.tail
        ])
        self.tail = nn.Sequential(*[
            nn.Linear(32*7*7, layer_widths[-1])
        ])
        self.model = nn.Sequential(*[
            self.blocks,
            nn.Flatten(),
            self.tail,
            nn.Softmax()
        ])
        
    def forward(self, x):
        return self.model(x)
    
    def _calculate_loss(self, x, y):
        y_predicted = self(x)
        loss = F.cross_entropy(y_predicted, y)
        return loss
    
    def training_step(self, batch, batch_idx):
        """
        Customize the training-procedure and the loss function.
        """
        x, y = batch
        loss = self._calculate_loss(x, y)
        return loss
    
    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_predicted = self(x)
        loss = self._calculate_loss(x, y)
        metrics = {'val_loss': loss}
        self.log_dict(metrics)
        return metrics
        
    def test_step(self, batch, batch_idx):
        x, y = batch
        loss = self._calculate_loss(x, y)
        metrics = {'test_loss': loss}
        self.log_dict(metrics)
        return metrics
    
    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        x, y = batch
        y_predicted = self(x)
        return y_predicted
    
    def configure_optimizers(self):
        """
        Configure optimizer to use.
        """
        return torch.optim.Adam(self.parameters())


In [114]:
train_ds = datasets.MNIST(os.getcwd(), download=True, transform=transforms.Compose(
    [
        transforms.ToTensor()
    ]))
train_loader = DataLoader(train_ds, batch_size = 42) # sample shape [1, 28, 28]
trainer = pl.Trainer(max_epochs=1)
"""
layer_widths:
[
input_dimension (1)
*hidden layers (16x32 width matrix, 32x10 width matrix. Two layers of 16 and 32 neurons)
output-dimenion (10)
]
further args are piped to the block/Linear/Conv layer.

First markdown approach. 
- What is the input, output dimension?
- What kind of blocks is needed?
- Overall architecture of the model.
- Optimizer. Which different variables are there for the optimizer?
- Loss function. Which loss-functions make sense?
- Pull these parameters out of the model (No idea how though...)

"""
model = MyModel(layer_widths = [1, 16, 32, 10], kernel_size=5, stride=1, padding=2)
trainer.fit(model, train_loader)
#print(model)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name   | Type       | Params
--------------------------------------
0 | blocks | Sequential | 13.2 K
1 | tail   | Sequential | 15.7 K
2 | model  | Sequential | 28.9 K
--------------------------------------
28.9 K    Trainable params
0         Non-trainable params
28.9 K    Total params
0.116     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]