<a href="https://colab.research.google.com/github/datvodinh10/project-DD/blob/master/main.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import

In [None]:
!pip install gdown
!gdown 1dQ7dqVnBfp4STYMVsnkiLfiAIdFwrSkd # Training data
!gdown 1YedVnk4uKFBPInsa6Mzik0bmZK2Vuf4e # Target label data
#!gdown model_weight gdrive id 
!git clone https://github.com/datvodinh10/project-DD.git
%cd project-DD
from src.model.trainer import Trainer
%cd ..

In [None]:
import torch

## Train

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
config = {
    'transformer':{
        'embed_size': 384,       # model's hidden size
        'num_heads':8,           # number of heads in MSA
        'num_layers':4,          # number of encoder/decoder layer
        'max_len': 320,          # max sequence length
        'dropout':0.1,           # dropout rate
        'bias':False,            # attention bias
        'embed_type': 'position' # {'position','learned'}

    },
    'encoder':{
        'type': 'swin_transformer_v2', # {'swin_transformer','swin_transformer_v2','resnet18','resnet50,'vgg'}
        'swin':{
            'patch_size':[4,4],      # patch size divived in swin model 
            'embed_dim':48,          # model's hidden size
            'window_size':[2,6],     # window shifted size   
            'dropout':0.1,           # dropout rate
            'depths': [2,2,6,2],     # depth of swin model
            'num_heads': [3,6,12,24] # number of heads in W-MSA
        }
    },
    'decoder':{
        'type': 'transformer'        # {transformer,ctc}
    },
    'scheduler': {
        'active': True,             # active scheduler or not
        'first_cycle_steps': 400,  # step each cycle
        'cycle_mult': 1,            # cycle multiplier
        'max_lr': 5e-4,             # max lr each cycle       
        'min_lr': 3e-5,             # min lr each cycle
        'warmup_steps': 40,        # step to warm up to max lr (total/10)
        'gamma': 0.98              # max lr decay each cycle 
    },
    'dataloader':{
        'num_workers':0,
        'type': 'normal'    # cluster by target len or image width {'cluster_target','cluster_image','normal'}
    },
    'img_size': (64,192),     # image size
    'device':device,          # gpu or cpu
    'lr':1e-4,                # learning rate
    'padding': True,          # padding MSA and Loss
    'enhancing': True,        # auto contrast and sharpen
    'label_smoothing':0.1,    # smooth label CrossEntropyLoss
    'max_grad_norm': 0.5,     # max gradient backward
    'batch_size':256,         # batch_size
    'num_epochs':200,         # totals epochs to train model
    'save_per_epochs': 1,     # save data per #epoch
    'print_type': 'per_epoch' # {'per_epoch','per_batch'}
}

In [None]:
# !unzip -q /content/training_data.zip
# SRC_PATH = "/content/new_train"
# TARGET_PATH = "/content/train_gt.txt"
# MODEL_PATH = "/content"

!unzip -q /kaggle/working/training_data.zip
SRC_PATH = "/kaggle/working/new_train"
TARGET_PATH = "/kaggle/working/train_gt.txt"
MODEL_PATH = f"/kaggle/working/model_{config['encoder']['type']}_{config['num_epochs']}.pt"

In [None]:
trainer = Trainer(config      = config,
                  IMAGE_PATH  = SRC_PATH,
                  TARGET_PATH = TARGET_PATH,
                  MODEL_PATH  = MODEL_PATH)

In [None]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'Total parameters: {count_parameters(trainer.model)}')

In [None]:
trainer.train()