## Dataset preparation

In [1]:
#pip install -e .
#pip install  abbyy_course_cvdl_t2

from pathlib import Path
from course_ocr_t1.data import MidvPackage
from tqdm import tqdm
from matplotlib import pyplot as plt
import numpy as np

import torch

from task1pack.utils.data import HeatmapDataset

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DATASET_PATH = Path() / '..' / '..' / 'data' / 'midv500_compressed'
#DATASET_PATH = Path() / '..' / '..' / '..' / '..' / '..' / '..' / 'Downloads' / 'midv500_compressed'
assert DATASET_PATH.exists(), DATASET_PATH.absolute()

In [3]:
# Собираем список пакетов (MidvPackage) 
data_packs = MidvPackage.read_midv500_dataset(DATASET_PATH)
len(data_packs), type(data_packs[0])

(50, course_ocr_t1.data.MidvPackage)

In [4]:
from torchvision.transforms import Resize, Compose, ToTensor

IMAGE_SIZE = [512, 512]
HEATMAP_SIZE = [128, 128]

transforms = Compose([
    ToTensor(),
    Resize(IMAGE_SIZE),
])

In [5]:
train_dataset = HeatmapDataset(data_packs=data_packs, split='train', transforms=transforms, output_size=HEATMAP_SIZE)
test_dataset = HeatmapDataset(data_packs=data_packs, split='test', transforms=transforms, output_size=HEATMAP_SIZE)

len(train_dataset), len(test_dataset)

(10750, 4250)

In [6]:
train_dataset[0][0].shape, train_dataset[0][1].shape

(torch.Size([3, 512, 512]), torch.Size([4, 2]))

In [7]:
torch.manual_seed(42)

<torch._C.Generator at 0x7f2d1c8fb530>

In [8]:
device = 'cuda:2'

## Model

In [9]:
from task1pack.models.hrnet import get_pose_net
from task1pack.models.hrnet_config import cfg

In [10]:
cfg['MODEL']['IMAGE_SIZE'] = IMAGE_SIZE
cfg['MODEL']['HEATMAP_SIZE'] = HEATMAP_SIZE

In [11]:
hrnet = get_pose_net(is_train=True, cfg=cfg)

### Train

In [12]:
from task1pack.utils.train import train_model, show_train_plots, train_old
from torch.nn import MSELoss
import wandb

In [14]:
model_name = 'HRNet'

train_dataloader_config = {
    'batch_size': 24,
    'shuffle': True,
}

test_dataloader_config = {
    'batch_size': 24,
    'shuffle': False,
}

training_config = {
    'lr': 1e-3,
    'epochs': 150,
    'step_size': 5,
    'gamma': 0.8,
}

device = 'cuda:2'
criterion = MSELoss()

wandb.init(
    project='ocr task 1',
    name='{} {} epochs with lr={}, step={}, gamma={}'.format(
        model_name, 
        training_config['epochs'], 
        training_config['lr'],
        training_config['step_size'],
        training_config['gamma']),
    config={
        'train_dataloader_config': train_dataloader_config,
        'test_dataloader_config': test_dataloader_config,
        'training_config': training_config,

        "architecture": model_name,
        "dataset": "MIDV-500",
        "criterion": "MSELoss",
        "optimizer": "Adam",
        "image_size": IMAGE_SIZE,
        "heatmap_size": HEATMAP_SIZE,    
    }
)

In [None]:
train_losses, test_losses, trained_model = train_model(
    train_dataset=train_dataset, 
    test_dataset=test_dataset, 
    model=hrnet, 
    train_dataloader_kwargs=train_dataloader_config, 
    test_dataloader_kwargs=test_dataloader_config, 
    training_kwargs=training_config,
    criterion=criterion,
    device=device,
    wandb_instance=wandb,
)

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


initial loss 0.11798501759767532
epoch 0 started
train loss: 0.09071443165885285, test_loss: 0.03943267837166786
epoch 1 started
train loss: 0.04286523360808912, test_loss: 0.03320856764912605
epoch 2 started
train loss: 0.034160545933575905, test_loss: 0.020600154995918274
epoch 3 started
train loss: 0.03310414993445322, test_loss: 0.02037038467824459
epoch 4 started
train loss: 0.0305620129926995, test_loss: 0.018872443586587906
epoch 5 started
train loss: 0.029854524065740407, test_loss: 0.02701779268682003
epoch 6 started
train loss: 0.029651124821352175, test_loss: 0.016731442883610725
epoch 7 started
train loss: 0.028705439138450726, test_loss: 0.0161683838814497
epoch 8 started
train loss: 0.02777562563070595, test_loss: 0.015711860731244087
epoch 9 started
train loss: 0.02767496287976558, test_loss: 0.015464113093912601
epoch 10 started
train loss: 0.027442775009798685, test_loss: 0.015191293321549892
epoch 11 started
train loss: 0.027644856061669998, test_loss: 0.0182103477418

train loss: 0.02334445499582216, test_loss: 0.013192914426326752
epoch 101 started
train loss: 0.023329666327689274, test_loss: 0.013147114776074886
epoch 102 started
train loss: 0.023321263215621002, test_loss: 0.013166457414627075
epoch 103 started
train loss: 0.0233368974212291, test_loss: 0.013154039159417152
epoch 104 started
train loss: 0.023311204430293792, test_loss: 0.013146907091140747
epoch 105 started
train loss: 0.023304629970620487, test_loss: 0.013104657642543316
epoch 106 started
train loss: 0.02329524804528254, test_loss: 0.013165564276278019
epoch 107 started
train loss: 0.023301054065086646, test_loss: 0.013151533901691437
epoch 108 started
train loss: 0.02328776390485083, test_loss: 0.013109005056321621
epoch 109 started
train loss: 0.023287986124030016, test_loss: 0.013178983703255653
epoch 110 started
train loss: 0.023282732241308883, test_loss: 0.013176748529076576
epoch 111 started
train loss: 0.023274950843707693, test_loss: 0.013122167438268661
epoch 112 start

In [None]:
show_train_plots(train_losses, test_losses, 'HRNet')

### Сохраним результаты и измерим точность
Результаты - словарь с ключем DataItem.unique_key() и значением предсказанным quadrangle в относительных единицах.

In [None]:
from course_ocr_t1.metrics import dump_results_dict, measure_crop_accuracy

results_dict = {}

trained_model.eval()

with torch.no_grad():
    for i, (x, _) in tqdm(enumerate(test_dataset)):
        result = trained_model(x[None, ...].to(device))[0]
        key = test_dataset.get_key(i)
        results_dict[key] = result.cpu().tolist()

dump_results_dict(results_dict, Path() / 'pred1.json')

acc_95 = measure_crop_accuracy(
    Path() / 'pred1.json',
    Path() / 'gt.json'
)

acc_80 = measure_crop_accuracy(
    Path() / 'pred1.json',
    Path() / 'gt.json',
    iou_thr=0.8
)

wandb.run.summary['acc@95'] = acc_95
wandb.run.summary['acc@80'] = acc_80

torch.save(trained_model.state_dict(), Path() / 'model1.pth')
wandb.save(str(Path() / 'model1.pth'))
wandb.save(str(Path() / 'pred1.json'))

wandb.finish()

print(acc_95)