In [1]:
import numpy as np
import torch

In [3]:
import sys
import os

# Include parent dir
SCRIPT_DIR = os.path.dirname(os.path.abspath('play.ipynb'))
sys.path.append(os.path.dirname(SCRIPT_DIR))

In [3]:
array = [55, 60, 50, 45]
np.mean(array)

52.5

### GRADIENT CONFORMER COMPUTATION

In [52]:
from models.eeg_conformer import Conformer, ConformerConfig

In [53]:
mdl = Conformer(ConformerConfig)

Number of parameters: 0.41M


In [54]:
mdl.apply(mdl.init_weights)

  nn.init.kaiming_normal(module.weight, mode= 'fan_out', nonlinearity='relu')


Conformer(
  (embed): PatchEmbedding(
    (shallownet): ModuleList(
      (0): Conv2d(1, 40, kernel_size=(1, 8), stride=(1, 1))
      (1): Conv2d(40, 40, kernel_size=(64, 1), stride=(1, 1))
      (2): BatchNorm2d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (3): ELU(alpha=1.0)
      (4): AvgPool2d(kernel_size=(1, 20), stride=(1, 4), padding=0)
      (5): Dropout(p=0.4, inplace=False)
    )
    (projection): Conv2d(40, 40, kernel_size=(1, 1), stride=(1, 1))
  )
  (encoder): TransformerEncoder(
    (0): EncoderBlock(
      (ln_1): LayerNorm()
      (attn): TemporalSelfAttention(
        (w_q): Linear(in_features=40, out_features=40, bias=True)
        (w_k): Linear(in_features=40, out_features=40, bias=True)
        (w_v): Linear(in_features=40, out_features=40, bias=True)
        (c_proj): Linear(in_features=40, out_features=40, bias=True)
        (attn_dropout): Dropout(p=0.4, inplace=False)
        (resid_dropout): Dropout(p=0.4, inplace=False)
      )
   

In [55]:
x = torch.randn((64, 64, 128))
target = torch.randn(64)
logits, loss = mdl(x, target)
logits.shape, loss

(torch.Size([64]), tensor(0.0422, grad_fn=<NegBackward0>))

In [56]:
loss.backward()

In [57]:
for name, param in mdl.named_parameters():
    if param is not None:
        print(f'Layer {name}: {param.grad}')

Layer embed.shallownet.0.weight: tensor([[[[-1.1589e-01, -1.5722e-01, -1.4105e-01, -1.7125e-01, -1.8921e-01,
           -1.4967e-01, -2.0296e-01, -2.3040e-01]]],


        [[[-8.7813e-02, -7.5573e-02, -8.4988e-02, -4.3029e-02, -2.5184e-02,
           -1.1964e-02, -5.2214e-02, -4.5621e-02]]],


        [[[ 1.7741e-02,  1.9373e-02,  4.6946e-02,  5.7790e-02,  7.1441e-02,
            1.4077e-01,  9.1989e-02,  1.0884e-01]]],


        [[[ 9.3034e-02,  7.0245e-02, -3.6819e-05,  4.3412e-03,  9.4979e-02,
            1.1864e-01,  6.5652e-02,  1.2992e-01]]],


        [[[-1.6778e-01, -1.4607e-01, -1.6653e-01, -3.4871e-02,  3.6625e-02,
            7.3078e-02,  7.1132e-02,  4.2567e-02]]],


        [[[-4.3703e-02, -1.1809e-01, -3.7278e-02, -1.0814e-01, -3.8427e-02,
           -7.2157e-02, -4.8850e-02, -8.6604e-02]]],


        [[[ 1.5178e-02,  3.9372e-02, -3.1354e-02, -1.9728e-02,  9.2279e-02,
           -3.2841e-03,  3.6951e-02, -5.6273e-02]]],


        [[[-3.9252e-02, -4.3034e-02, -2.0035e-02, 

### TENSORBOARD SAVING HISTOGRAM GRAD

In [58]:
from torch.utils.tensorboard import SummaryWriter

In [59]:
writer = SummaryWriter()

In [60]:
for name, param in mdl.named_parameters():
    if param is not None:
        # SAVING NAME, GRAD, EPOCH
        writer.add_histogram(f'{name}.grad', param.grad, 1)

### WANDB GRADIENT SAVING

In [61]:
import wandb

In [62]:
wandb.init(project='gradient_tracking')

VBox(children=(Label(value='0.074 MB of 0.074 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
epoch,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
epoch,0


In [46]:
for name, param in mdl.named_parameters():
    if param.grad is not None and 'bias' not in name:
        # detach that eliminates property of the tensor of requiring grad
        wandb.log({f"{name}_grad": wandb.Histogram(param.cpu().detach().numpy()), "epoch": 0}) 

In [47]:
for name, param in mdl.named_parameters():
    if param.grad is not None and 'bias' not in name:
        print(name)

embed.shallownet.0.weight
embed.shallownet.1.weight
embed.shallownet.2.weight
embed.projection.weight
encoder.0.ln_1.weight
encoder.0.attn.w_q.weight
encoder.0.attn.w_k.weight
encoder.0.attn.w_v.weight
encoder.0.attn.c_proj.weight
encoder.0.ln_2.weight
encoder.0.mlp.c_fc.weight
encoder.0.mlp.c_proj.weight
encoder.1.ln_1.weight
encoder.1.attn.w_q.weight
encoder.1.attn.w_k.weight
encoder.1.attn.w_v.weight
encoder.1.attn.c_proj.weight
encoder.1.ln_2.weight
encoder.1.mlp.c_fc.weight
encoder.1.mlp.c_proj.weight
classif.fc.0.weight
classif.fc.3.weight
classif.fc.6.weight


In [49]:
desired_layers = ['classif.fc.0.weight', 
                  'classif.fc.6.weight', 
                  'embed.shallownet.0.weight',
                  'embed.shallownet.2.weight',
                  'encoder.0.attn.w_q.weight',
                  'encoder.3.attn.w_q.weight',
                  'encoder.0.mlp.c_fc.weight',
                  'encoder.3.mlp.c_fc.weight',
                ]

In [50]:
for name, param in mdl.named_parameters():
    if param.grad is not None and name in desired_layers:
        # detach that eliminates property of the tensor of requiring grad
        wandb.log({f"{name}_grad": wandb.Histogram(param.cpu().detach().numpy()), "epoch": 0}) 

### WANDB WATCH

In [4]:
from utils.datasets import CustomDataset
from utils.functional import get_data_path
from torch.utils.data import DataLoader

In [5]:
data_path = get_data_path('C:/Users/jaulab/Desktop/AAD/Data', 'fulsang', filt=False)
dataset = CustomDataset('fulsang', data_path, 'train', 'S1', 128, 1)

In [6]:
data_loader = DataLoader(dataset, batch_size=128, shuffle=True)

In [7]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [8]:
mdl.to(device)
optimizer = torch.optim.Adam(mdl.parameters(), lr=1e-06, weight_decay = 1e-04)

NameError: name 'mdl' is not defined

In [88]:
import tqdm

In [91]:
wandb.init(project='gradient_tracking')
wandb.watch(models = mdl, log='all', log_freq=len(data_loader))

max_epoch = 5

for epoch in range(max_epoch):

    tqdm_loader = tqdm.tqdm(data_loader, desc=f'Epoch: {epoch}', leave = False, mininterval=0.5)
    
    epoch_losses = []

    for batch_idx, data in enumerate(tqdm_loader):

        eeg = data['eeg'].to(device, dtype=torch.float)
        stima = data['stima'].to(device, dtype=torch.float)

        preds, loss = mdl(eeg, targets = stima)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_losses.append(loss)

    print(f'loss: {torch.mean(loss).item()}')
    wandb.log({'train_loss': -torch.mean(loss).item()})

VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

                                                           

loss: -0.10746944695711136


                                                           

loss: -0.03581579402089119


                                                           

loss: 0.10303233563899994


                                                           

loss: -0.17529068887233734


                                                           

loss: 0.03303400054574013


                                                           

loss: -0.12475737184286118


                                                           

loss: -0.01291625201702118


                                                           

KeyboardInterrupt: 

In [4]:
from models.dnn import FCNN, CNN

In [5]:
mdl = FCNN(n_hidden=3)

In [6]:
mdl = CNN()

In [7]:
for layer, param in mdl.named_parameters():
    print(layer)

temporal.0.weight
temporal.0.bias
temporal.1.weight
temporal.1.bias
spatial.0.weight
spatial.0.bias
spatial.1.weight
spatial.1.bias
depthwise.0.weight
depthwise.0.bias
depthwise.1.weight
depthwise.1.bias
depthwise.2.weight
depthwise.2.bias
classifier.1.weight
classifier.1.bias


In [9]:
x = torch.randn((64,64,50))
preds, loss = mdl(x)
preds.shape, loss

(torch.Size([64]), None)