# 2022/01/14 Memo & Tests

#### Aim
visualize model info

#### Get & Add the path

In [1]:
import pathlib
import sys

from pathlib import Path


current_dir: Path = pathlib.Path().cwd().resolve()
project_root: Path = current_dir.parent
data_dir: Path = project_root / "data"

sys.path.append(str(project_root))


### Visualize `TaskChallenger`

In [2]:
import torchinfo
import torch
from torch import nn
from torch.nn import functional as F

from corsmal_challenge.models.audio import LogMelEncoder  # noqa (E402)
from corsmal_challenge.models.task1_2 import T1Head, T2Head  # noqa (E402)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

batches = 1
seq_len = 750
in_channels = 8
embed_dim = 128
num_heads = 4
num_layers = 4

class TaskChallenger(nn.Module):
    def __init__(self, task_id: int = 1):
        super(TaskChallenger, self).__init__()
        self.task_id = task_id
        self.encoder = LogMelEncoder(num_encoder_blocks=4, num_heads=4)
        self.classify_head1 = T1Head()
        self.classify_head2 = T2Head()

    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
        x: torch.Tensor = self.encoder(inputs)
        if self.task_id == 1:
            x = self.classify_head1(x[:, 0, :])  # extract embedding of class token
        elif self.task_id == 2:
            x = self.classify_head2(x[:, 0, :])  # extract embedding of class token
        x = x.squeeze(1)
        return x


a = torch.randn(batches, in_channels, seq_len, embed_dim)
model = TaskChallenger(task_id=2)
a = a.to(device)
model = model.to(device)


b: torch.Tensor = model(a)
print(f"a.size(): {a.size()}")
print(f"b.size(): {b.size()}")

torchinfo.summary(model, inputs_size=a.shape)

a.size(): torch.Size([1, 8, 750, 128])
b.size(): torch.Size([1, 4])


Layer (type:depth-idx)                                  Param #
TaskChallenger                                          --
├─LogMelEncoder: 1-1                                    --
│    └─BatchNorm2d: 2-1                                 16
│    └─DepthWiseConv2d: 2-2                             416
│    └─PointWiseConv2d: 2-3                             17
│    └─TransformerEncoder: 2-4                          --
│    │    └─CLSTokenAdder: 3-1                          129
│    │    └─PositionalEncoding: 3-2                     --
│    │    └─MultiheadedSelfAttention: 3-3               66,048
│    │    └─Sequential: 3-4                             396,672
├─T1Head: 1-2                                           --
│    └─Linear: 2-5                                      4,128
│    └─Linear: 2-6                                      99
├─T2Head: 1-3                                           --
│    └─Linear: 2-7                                      4,128
│    └─Linear: 2-8                

In [3]:
%%timeit
model(a)

3.12 ms ± 413 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
