In [1]:
from tool.config import Cfg
import torchvision
import os
import lmdb
from tqdm import tqdm
import numpy as np
import cv2

In [2]:
def writeCache(env, cache):
    with env.begin(write=True) as txn:
        for k, v in cache.items():
            txn.put(k.encode(), v)
            
def checkImageIsValid(imageBin):
    isvalid = True
    imgH = None
    imgW = None

    imageBuf = np.fromstring(imageBin, dtype=np.uint8)
    try:
        img = cv2.imdecode(imageBuf, cv2.IMREAD_GRAYSCALE)

        imgH, imgW = img.shape[0], img.shape[1]
        if imgH * imgW == 0:
            isvalid = False
    except Exception as e:
        isvalid = False

    return isvalid, imgH, imgW

In [3]:
config = Cfg.load_config_from_name('vgg_transformer')

In [4]:
dataset_params = {
    'name':'my_data',
    'data_root':'datasets',
    'train_annotation':'train.txt',
    'valid_annotation':'val.txt'
}

params = {
         'print_every':200,
         'valid_every':15*200,
          'iters':20000,
          'checkpoint':'./checkpoint/my_transformer.pth',    
          'export':'./weights/my_transformer.pth',
          'metrics': 10000
         }

config['trainer'].update(params)
config['dataset'].update(dataset_params)
config['device'] = 'cuda:0'

In [5]:
config

{'pretrain': {'id_or_url': '13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA',
  'md5': 'fbefa85079ad9001a71eb1bf47a93785',
  'cached': '/tmp/tranformerorc.pth'},
 'weights': 'https://drive.google.com/uc?id=13327Y1tz1ohsm5YZMyXVMPIOjoOA0OaA',
 'backbone': 'vgg19_bn',
 'cnn': {'ss': [[2, 2], [2, 2], [2, 1], [2, 1], [1, 1]],
  'ks': [[2, 2], [2, 2], [2, 1], [2, 1], [1, 1]],
  'hidden': 256},
 'vocab': 'aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ ',
 'device': 'cuda:0',
 'seq_modeling': 'transformer',
 'transformer': {'d_model': 256,
  'nhead': 8,
  'num_encoder_layers': 6,
  'num_decoder_layers': 6,
  'dim_feedforward': 2048,
  'max_seq_length': 1024,
  'pos_dropout': 0.1,
  'trans_dropout': 0.1},
 'optimizer': {'init_lr': 0.1, 'n_warmup_steps': 4000},
 'trainer': {'batch_size': 32,
  'print_every': 200,
  'valid_every

In [6]:
data_root = config['dataset']['data_root']
train_annotation = config['dataset']['train_annotation']
valid_annotation = config['dataset']['valid_annotation']
vocab = config['vocab']
dataset_name = config['dataset']['name']

In [7]:
transforms = torchvision.transforms.Compose([
    torchvision.transforms.ColorJitter(brightness=.1, contrast=.1, hue=.1, saturation=.1),
    torchvision.transforms.RandomAffine(degrees=0, scale=(3/4, 4/3))
])

In [8]:
lmdb_path='train_{}'.format(dataset_name)
root_dir=data_root
annotation_path = os.path.join(root_dir, train_annotation)
image_height=config['dataset']['image_height']
image_min_width=config['dataset']['image_min_width']
image_max_width=config['dataset']['image_max_width']

In [9]:
with open(annotation_path, 'r') as ann_file:
    lines = ann_file.readlines()
    annotations = [l.strip().split('\t') for l in lines]

In [10]:
annotations

[['images/1602473935.6425607.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.6505153.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.6596978.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.6647706.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.671753.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.6909418.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.7086434.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.713762.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.7318375.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.7396367.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.7458189.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.7673876.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.7770083.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.7841415.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.8060265.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.8121328.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.9753742.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.9825265.jpg', 'Hoàng đẹp trai'],
 ['images/1602473935.9967763.j

In [11]:
lmdb_path

'train_my_data'

In [17]:
nSamples = len(annotations)
env = lmdb.open(lmdb_path, map_size=1099511627776)
cache = {}
cnt = 0
error = 0

pbar = tqdm(range(nSamples), ncols = 100, desc='Create {}'.format(lmdb_path)) 
for i in pbar:
    imageFile, label = annotations[i]
    imagePath = os.path.join(root_dir, imageFile)

    if not os.path.exists(imagePath):
        print("Not exists path ", imagePath)
        error += 1
        continue

    with open(imagePath, 'rb') as f:
        imageBin = f.read()
    isvalid, imgH, imgW = checkImageIsValid(imageBin)
    
    if not isvalid:
        print("Image is not valid ", imagePath)
        error += 1
        continue

    imageKey = 'image-%09d' % cnt
    labelKey = 'label-%09d' % cnt
    pathKey = 'path-%09d' % cnt
    dimKey = 'dim-%09d' % cnt

    cache[imageKey] = imageBin
    cache[labelKey] = label.encode()
    cache[pathKey] = imageFile.encode()
    cache[dimKey] = np.array([imgH, imgW], dtype=np.int32).tobytes()

    cnt += 1

    if cnt % 1000 == 0:
        writeCache(env, cache)
        cache = {}
nSamples = cnt-1
cache['num-samples'] = str(nSamples).encode()
writeCache(env, cache)

  # This is added back by InteractiveShellApp.init_path()
Create train_my_data: 100%|█████████████████████████████████████| 251/251 [00:00<00:00, 4742.40it/s]


In [18]:
env = lmdb.open(
    lmdb_path,
    max_readers=8,
    readonly=True,
    lock=False,
    readahead=False,
    meminit=False)

In [19]:
txn = env.begin(write=False)

In [20]:
nSamples = int(txn.get('num-samples'.encode()))

In [21]:
nSamples

250