In [1]:
import sys
sys.path.append('../../')

from dl.data.txtdetn import datasets, utils, target_transforms, augmentations
from dl.data import transforms
from dl.models.fots import FOTSRes50
from dl.loss.fots import FOTSLoss

from dl.optim.scheduler import IterMultiStepLR
from dl.log import *

#from torchvision import transforms > not import!!
from torch.utils.data import DataLoader
from torch.optim.adam import Adam

import numpy as np
%matplotlib notebook

In [2]:
augmentation = augmentations.Compose([
        augmentations.RandomLongerResize(smin=640, smax=2560),
        augmentations.RandomRotate(fill_rgb=(103.939, 116,779, 123.68), amin=-10, amax=10, fit=True),
        augmentations.RandomScaleV(smin=0.8, smax=1.2, keep_aspect=True),
        augmentations.RandomSimpleCrop()
    ])

ignore = target_transforms.Ignore(strange=True)

transform = transforms.Compose(
    [transforms.Resize((640, 640)),
     transforms.ToTensor(),
     transforms.Normalize(rgb_means=(0.485, 0.456, 0.406), rgb_stds=(0.229, 0.224, 0.225))]
)
target_transform = target_transforms.Compose(
    [target_transforms.Text2Number(class_labels=datasets.SynthText_char_labels_without_upper_blank, ignore_nolabel=False),
     target_transforms.ToTensor(textTensor=True)]
)

train_dataset = datasets.SynthTextDetectionDataset(ignore=ignore, transform=transform, target_transform=target_transform, augmentation=augmentation,
                                                   onlyAlphaNumeric=False)

train_loader = DataLoader(train_dataset,
                          batch_size=8,
                          shuffle=True,
                          collate_fn=utils.batch_ind_fn,
                          num_workers=4,
                          pin_memory=True)

model = FOTSRes50(chars=datasets.SynthText_char_labels_without_upper_blank, input_shape=(None, None, 3)).cuda()
print(model)


FOTSRes50(
  (feature_extractor): SharedConvRes50(
    (conv1): Sequential(
      (0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): ReLU(inplace=True)
    )
    (pool1): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (res2): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)

In [3]:
optimizer = Adam(model.parameters(), lr=1e-3, weight_decay=5e-4)
iter_sheduler = iter_sheduler = IterMultiStepLR(optimizer, milestones=np.arange(10000, 107344*10, 10000), gamma=0.94, verbose=True)


save_manager = SaveManager(modelname='fots', interval=107344, max_checkpoints=10, plot_interval=1000)

trainer = TrainTextSpottingJupyterLogger(LiveGraph(yrange=(0, 4)), FOTSLoss(), model, optimizer, scheduler=iter_sheduler)#iter_sheduler)
#trainer.train_epoch(save_manager, 10, train_loader)
trainer.train_iter(save_manager, 1073440, train_loader)

Input any key. [n]/y


y




<IPython.core.display.Javascript object>

Training... Epoch: 0, Iter: 9999,	 9%[9999/107344]	total: 3.384332 detection: 1.352844 recognition: 2.031487 	Iter time: 0.4839
Iteration reached milestone: 10000. Change lr=[0.001] to [0.00094]

Training... Epoch: 0, Iter: 19999,	 19%[19999/107344]	total: 2.689136 detection: 1.106788 recognition: 1.582348 	Iter time: 0.4859
Iteration reached milestone: 20000. Change lr=[0.00094] to [0.0008835999999999999]

Training... Epoch: 0, Iter: 29999,	 28%[29999/107344]	total: 2.701028 detection: 1.154019 recognition: 1.547009 	Iter time: 0.5725
Iteration reached milestone: 30000. Change lr=[0.0008835999999999999] to [0.0008305839999999999]

Training... Epoch: 0, Iter: 39999,	 37%[39999/107344]	total: 1.558081 detection: 0.775543 recognition: 0.782538 	Iter time: 0.5262
Iteration reached milestone: 40000. Change lr=[0.0008305839999999999] to [0.0007807489599999998]

Training... Epoch: 0, Iter: 49999,	 47%[49999/107344]	total: 3.117398 detection: 1.758112 recognition: 1.359286 	Iter time: 0.4937


KeyboardInterrupt: 