In [2]:
# !pip install gdown
# !gdown --folder https://drive.google.com/drive/folders/18fbeQOzN4BMn09LPnFgWflhTP-r9JJrc?usp=sharing
# !pip install -r requirements.txt

Collecting gdown
  Downloading gdown-5.1.0-py3-none-any.whl.metadata (5.7 kB)
Collecting tqdm (from gdown)
  Downloading tqdm-4.66.2-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.6/57.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting PySocks!=1.5.7,>=1.5.6 (from requests[socks]->gdown)
  Downloading PySocks-1.7.1-py3-none-any.whl.metadata (13 kB)
Downloading gdown-5.1.0-py3-none-any.whl (17 kB)
Downloading tqdm-4.66.2-py3-none-any.whl (78 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.3/78.3 kB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading PySocks-1.7.1-py3-none-any.whl (16 kB)
Installing collected packages: tqdm, PySocks, gdown
Successfully installed PySocks-1.7.1 gdown-5.1.0 tqdm-4.66.2
[0mRetrieving folder contents
Retrieving folder 12JC9TX26Z7p1q209HIZ_aR8Rxgpmdj0R converted
Processing file 1xOwTLik_HkK8mjkYueYm3vYsYij1ezUy noise_data.pt
Processing file 1odkWAtGNQ6jsuWirHcHMl9ouj2

In [7]:
import pytorch_lightning as pl
import torch

from torch import Tensor, nn
from torch.optim.lr_scheduler import ReduceLROnPlateau
from pytorch_lightning.callbacks import EarlyStopping

from engine.data import get_data_loader
from engine.metrics import accuracy
from engine.model_base import LightningBaseModule

torch.set_float32_matmul_precision("medium")

In [5]:
train_loader = get_data_loader("train")
val_loader = get_data_loader("val")
test_loader = get_data_loader("test")

In [9]:
EMBEDDING_SIZE = 80


class Attention(nn.Module):
    def __init__(self):
        super().__init__()
        self.attention = nn.MultiheadAttention(EMBEDDING_SIZE, 10)
        self.activation = nn.ReLU()

    def forward(self, X):
        output, _ = self.attention(X, X, X)
        return self.activation(output)


class AttentionBlock(nn.Module):
    def __init__(self):
        super().__init__()
        self.attentions = nn.Sequential(*[Attention() for _ in range(3)])
        self.layer_norm = nn.LayerNorm(EMBEDDING_SIZE)

    def forward(self, X):
        output = self.attentions(X)
        output = self.layer_norm(output)
        return output + X


class AttentionModel(LightningBaseModule):
    def __init__(self):
        super().__init__()
        self.attention_1 = nn.Sequential(*[AttentionBlock() for _ in range(5)])
        self.attention_2 = nn.Sequential(*[AttentionBlock() for _ in range(5)])
        self.flatten = nn.Flatten()
        self.dense = nn.Sequential(
            nn.Linear(8000, 1000),
            nn.ReLU(),
            nn.Linear(1000, 22),
            nn.Softmax(dim=1),
        )
        self.loss = nn.CrossEntropyLoss()

    def forward(self, X):
        output = self.attention_1(X)
        output = self.attention_2(output + X)
        output = self.flatten(output)
        output = self.dense(output)
        return output

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
        scheduler = ReduceLROnPlateau(optimizer, patience=5)
        return [optimizer], [
            {
                "scheduler": scheduler,
                "interval": "epoch",
                "monitor": "val_accuracy",
                "frequency": 1,
            }
        ]

In [None]:
callbacks = [
    EarlyStopping(
        monitor="val_accuracy",
        mode="max",
        patience=10,
        min_delta=1e-4,
    )
]

model = AttentionModel().cuda()
trainer = pl.Trainer(max_epochs=100, callbacks=callbacks)
trainer.fit(model, train_loader, val_loader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: /workspace/lightning_logs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name             | Type             | Params
------------------------------------------------------
0 | attention        | Sequential       | 98.8 K
1 | flatten          | Flatten          | 0     
2 | linear           | Linear           | 120 K 
3 | final_activation | Softmax          | 0     
4 | loss             | CrossEntropyLoss | 0     
------------------------------------------------------
218 K     Trainable params
0         Non-trainable params
218 K     Total params
0.875     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:492: Your `val_dataloader`'s sampler has shuffling enabled, it is strongly recommended that you turn shuffling off for val/test dataloaders.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.
/usr/local/lib/python3.10/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=95` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]