# Environment setup

In [1]:
import detectron2.utils.comm as comm
from detectron2.config import get_cfg
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.utils.logger import setup_logger
from MODULES.MaskFormer.config import add_mask_former_config

In [2]:
# Create configs and perform basic setups

cfg = get_cfg()
add_deeplab_config(cfg)
add_mask_former_config(cfg)
cfg.set_new_allowed(True)
cfg.merge_from_file("MODULES/MaskFormer/configs/custom/MaskAVSL_swin_base.yaml")
# cfg.merge_from_list(args.opts)
cfg.MODEL.DEVICE = "cuda:2"
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.eval_only = False
cfg.freeze()
# default_setup(cfg, args)
# Setup logger for "mask_former" module
setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="MaskAVSL")

Loading config MODULES/MaskFormer/configs/custom/MaskAVSL_swin_base.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


<Logger MaskAVSL (DEBUG)>

# Trainer

In [3]:
from trainer import Trainer

trainer = Trainer(cfg)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


232045 of train videos have loaded
43 of evaluation videos have loaded


In [10]:
trainer.model.to('cpu')

AVSLModel(
  (backbone): D2SwinTransformer(
    (patch_embed): PatchEmbed(
      (proj): Conv2d(3, 128, kernel_size=(4, 4), stride=(4, 4))
      (norm): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
    )
    (pos_drop): Dropout(p=0.0, inplace=False)
    (layers): ModuleList(
      (0): BasicLayer(
        (blocks): ModuleList(
          (0): SwinTransformerBlock(
            (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (attn): WindowAttention(
              (qkv): Linear(in_features=128, out_features=384, bias=True)
              (attn_drop): Dropout(p=0.0, inplace=False)
              (proj): Linear(in_features=128, out_features=128, bias=True)
              (proj_drop): Dropout(p=0.0, inplace=False)
              (softmax): Softmax(dim=-1)
            )
            (drop_path): Identity()
            (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
            (mlp): Mlp(
              (fc1): Linear(in_features=128, out_features=51

In [7]:
tp = sum(p.numel() for p in trainer.model.parameters())
tsmb = tp * 4 / (1024 * 1024)
tsmb

1258.6978569030762

In [8]:
mem_params = sum([param.nelement()*param.element_size() for param in trainer.model.parameters()])
mem_bufs = sum([buf.nelement()*buf.element_size() for buf in trainer.model.buffers()])
mem = mem_params + mem_bufs # in bytes

In [11]:
import torch 
torch.cuda.memory_allocated('cpu')/1024 / 1024

0.0

In [None]:
trainer.train(epochs = 10)

### Trainer code

In [None]:
import os
from tqdm import tqdm, trange
import torch
from torch import nn
from torch.optim import Adam, lr_scheduler

from MODULES.AVSL_model import AVSLModel

from DATALOADER import VideoDataLoader

In [None]:
device = torch.device(cfg.MODEL.DEVICE)

model = AVSLModel(cfg, training=True)
# model.to(device)
model.zero_grad()
model.train()

optimizer = torch.optim.Adam(
    model.parameters(),    # 역전파 연산을 할 모델의 파라미터
    lr=0.003               # 임의의 learning_rate
)
# 주어진 step 이 지날때마다 학습률(Learning Rate)을 선형으로 변환해주는 스케줄러
scheduler = lr_scheduler.StepLR(
    optimizer,             # 학습률을 조절할 optimizer
    step_size=100,          # 학습률을 변환할 주기 (step)
    gamma=0.9              # 변환할 학습률 (상대값)
)

folder_path = 'DATA/videos'
train_dataloader = VideoDataLoader(cfg, folder_path)

train_steps = len(train_dataloader)
print(train_steps)

In [None]:
epochs = 10 
total_step = len(train_dataloader)
model.train()

# epoch 루프
for epoch in range(epochs):
    
    # step 루프
    for i, batch in enumerate(train_dataloader):
        
        # 순전파 - 모델의 추론 및 결과의 loss 연산
        loss = model(batch)
        losses = torch.tensor(0)
        for key in loss.keys():
            losses = losses + loss[key]

        # Backward and optimize
        optimizer.zero_grad()
        losses.backward() 
        optimizer.step()

        print('Epoch [{}/{}], Step [{}/{}], Loss: {}'
              .format(epoch + 1, epochs, i + 1, total_step, loss))

# Dataset

In [None]:
from DATALOADER import VideoDataLoader

folder_path = 'DATA/videos'
train_dataloader = VideoDataLoader(cfg, folder_path)
print(len(train_dataloader))

data = next(iter(train_dataloader))

# AVSLModel

In [None]:
from MODULES.AVSL_model import AVSLModel

avsl_model = AVSLModel(cfg, training=True)

In [None]:
outputs = avsl_model(data)

In [None]:
avsl_model.criterion.weight_dict

In [None]:
outputs.keys()

In [None]:
for key in outputs.keys():
    print(key)
    print(outputs[key])

In [None]:
for key in outputs.keys():
    print(key)
    print(outputs[key])

In [None]:
for key in outputs.keys():
    print(key)
    print(outputs[key])

In [None]:
for key in outputs.keys():
    print(key)
    print(outputs[key])

# LightTrainer

In [2]:
# Create configs and perform basic setups

cfg = get_cfg()
add_deeplab_config(cfg)
add_mask_former_config(cfg)
cfg.set_new_allowed(True)
cfg.merge_from_file("MODULES/MaskFormer/configs/custom/MaskAVSL_swin_base.yaml")
# cfg.merge_from_list(args.opts)
cfg.MODEL.DEVICE = "cuda:1"
cfg.SOLVER.IMS_PER_BATCH = 4
cfg.MODEL.MASK_FORMER.TRANSFORMER_IN_FEATURE = "res5"
cfg.freeze()
# default_setup(cfg, args)
# Setup logger for "mask_former" module
setup_logger(output=cfg.OUTPUT_DIR, distributed_rank=comm.get_rank(), name="MaskAVSL")

Loading config MODULES/MaskFormer/configs/custom/MaskAVSL_swin_base.yaml with yaml.unsafe_load. Your machine may be at risk if the file contains malicious content.


<Logger MaskAVSL (DEBUG)>

In [3]:
from trainer import LightTrainer

trainer = LightTrainer(cfg)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


221831 of train videos have loaded


In [8]:
trainer.train(10)

  max_size = (max_size + (stride - 1)) // stride * stride
  dim_t = self.temperature ** (2 * (dim_t // 2) / self.num_pos_feats)


RuntimeError: Given groups=1, weight of size [128, 256, 1, 1], expected input[4, 1024, 20, 20] to have 256 channels, but got 1024 channels instead