# speed_up_loader에 대한 설명

- Dataloader를 바꿈으로써 학습속도를 높이고자 작성하게 되었습니다.
- `do_training`함수를 돌리면서 시간을 측정했습니다.
- Dataset은 기존에 주어진 100장의 training image만을 사용하여 시간을 측정했습니다.

# 결과 미리보기

- Original Version
    - 현재 사용하고 있는 dataloader의 시간을 측정했습니다.
    - 학습 전까지 준비하는 시간 >> **0:00:03.951898**
    - 1 epoch 학습하는데 걸리는 시간 >> **0:03:13.311961**
- New Version-1
    - 이미지를 불러오는 부분 전까지만 미리 진행했을 때 시간을 측정했습니다.
    - 학습 전까지 준비하는 시간 >> **0:00:03.740865**
    - 1 epoch 학습하는데 걸리는 시간 >> **0:00:32.693058**
- New Version-2
    - 이미지를 미리 저장해두고 dataloader에서는 transform만 수행되게 됩니다.
    - 학습 전까지 준비하는 시간 >> **0:00:03.659511**
    - 1 epoch 학습하는데 걸리는 시간 >> **0:00:31.879351**

# Import

In [1]:
import os
os.chdir('/opt/ml/input/local')

In [8]:
import os.path as osp
import time
import math
import json
from datetime import timedelta

import torch
from torch import cuda
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from tqdm import tqdm

from east_dataset import EASTDataset
from model import EAST
from dataset import (filter_vertices, cal_distance, move_points, shrink_poly, get_rotate_mat,
                     rotate_vertices, get_boundary, cal_error, find_min_rect_angle,
                     is_cross_text, crop_img, rotate_all_pixels, resize_img, adjust_height, rotate_img,
                     generate_roi_mask, filter_vertices)

import numpy as np
import random
from PIL import Image
import albumentations as A

# 함수 정의

In [9]:
def set_seed(random_seed):
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)  # if use multi-GPU
    # CUDA randomness
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

    np.random.seed(random_seed)
    random.seed(random_seed)
    os.environ["PYTHONHASHSEED"] = str(random_seed)

# 변수 설정

In [10]:
data_dir = "../data/medical"
device = "cuda" if cuda.is_available() else "cpu"
num_worekrs = 8
image_size = 2048
input_size = 1024
batch_size = 8
learning_rate = 1e-3
max_epoch = 1
ignore_tags = ["masked", "excluded-region", "maintable", "stamp"]
seed = 1333

# Original Version

- 현재 사용하고 있는 dataloader의 시간을 측정했습니다.
- 학습 전까지 준비하는 시간 >> 0:00:03.951898
- 1 epoch 학습하는데 걸리는 시간 >> 0:03:13.311961

In [15]:
class SceneTextDataset(Dataset):
    def __init__(
        self,
        root_dir,
        split="train",
        image_size=2048,
        crop_size=1024,
        ignore_tags=[],
        ignore_under_threshold=10,
        drop_under_threshold=1,
        color_jitter=True,
        normalize=True,
    ):
        with open(osp.join(root_dir, "ufo/{}.json".format(split)), "r") as f:
            anno = json.load(f)

        self.anno = anno
        self.image_fnames = sorted(anno["images"].keys())
        self.image_dir = osp.join(root_dir, "img", split)

        self.image_size, self.crop_size = image_size, crop_size
        self.color_jitter, self.normalize = color_jitter, normalize

        self.ignore_tags = ignore_tags

        self.drop_under_threshold = drop_under_threshold
        self.ignore_under_threshold = ignore_under_threshold

    def __len__(self):
        return len(self.image_fnames)

    def __getitem__(self, idx):
        image_fname = self.image_fnames[idx]
        image_fpath = osp.join(self.image_dir, image_fname)

        vertices, labels = [], []
        for word_info in self.anno["images"][image_fname]["words"].values():
            word_tags = word_info["tags"]

            ignore_sample = any(elem for elem in word_tags if elem in self.ignore_tags)
            num_pts = np.array(word_info["points"]).shape[0]

            # skip samples with ignore tag and
            # samples with number of points greater than 4
            if ignore_sample or num_pts > 4:
                continue

            vertices.append(np.array(word_info["points"]).flatten())
            labels.append(int(not word_info["illegibility"]))
        vertices, labels = np.array(vertices, dtype=np.float32), np.array(
            labels, dtype=np.int64
        )

        vertices, labels = filter_vertices(
            vertices,
            labels,
            ignore_under=self.ignore_under_threshold,
            drop_under=self.drop_under_threshold,
        )

        image = Image.open(image_fpath)
        image, vertices = resize_img(image, vertices, self.image_size)
        image, vertices = adjust_height(image, vertices)
        image, vertices = rotate_img(image, vertices)
        image, vertices = crop_img(image, vertices, labels, self.crop_size)

        if image.mode != "RGB":
            image = image.convert("RGB")
        image = np.array(image)

        funcs = []
        if self.color_jitter:
            funcs.append(A.ColorJitter(0.5, 0.5, 0.5, 0.25))
        if self.normalize:
            funcs.append(A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))
        transform = A.Compose(funcs)

        image = transform(image=image)["image"]
        word_bboxes = np.reshape(vertices, (-1, 4, 2))
        roi_mask = generate_roi_mask(image, vertices, labels)

        return image, word_bboxes, roi_mask


def do_training(
    data_dir,
    device,
    num_workers,
    image_size,
    input_size,
    batch_size,
    learning_rate,
    max_epoch,
    ignore_tags,
    seed,
):
    start_time = time.time()
    set_seed(seed)

    dataset = SceneTextDataset(
        data_dir,
        split="train_default",
        image_size=image_size,
        crop_size=input_size,
        ignore_tags=ignore_tags,
    )
    dataset = EASTDataset(dataset)
    num_batches = math.ceil(len(dataset) / batch_size)
    train_loader = DataLoader(
        dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers
    )

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = EAST()
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[max_epoch // 2], gamma=0.1
    )
    print(f"학습 전까지 준비하는 시간 >> {timedelta(seconds=time.time() - start_time)}")
    start_time = time.time()
    model.train()
    for epoch in range(max_epoch):
        epoch_start = time.time()
        with tqdm(total=num_batches) as pbar:
            for img, gt_score_map, gt_geo_map, roi_mask in train_loader:
                continue
    print(f"1 epoch 학습하는데 걸리는 시간 >> {timedelta(seconds=time.time() - start_time)}")

In [16]:
do_training(data_dir, 
            device, 
            num_worekrs, 
            image_size, 
            input_size, 
            batch_size, 
            learning_rate, 
            max_epoch, 
            ignore_tags, 
            seed)

학습 전까지 준비하는 시간 >> 0:00:03.951898


  0%|          | 0/13 [03:13<?, ?it/s]

1 epoch 학습하는데 걸리는 시간 >> 0:03:13.311961





# New Version-1

- 이미지를 불러오는 부분 전까지만 미리 진행했을 때 시간을 측정했습니다.
- 학습 전까지 준비하는 시간 >> 0:00:03.740865
- 1 epoch 학습하는데 걸리는 시간 >> 0:00:32.693058

In [17]:
class SceneTextDataset(Dataset):
    def __init__(
        self,
        root_dir,
        split="train",
        image_size=2048,
        crop_size=1024,
        ignore_tags=[],
        ignore_under_threshold=10,
        drop_under_threshold=1,
        color_jitter=True,
        normalize=True,
    ):
        with open(osp.join(root_dir, "ufo/{}.json".format(split)), "r") as f:
            anno = json.load(f)

        self.anno = anno
        self.image_fnames = sorted(anno["images"].keys())[:3]
        self.image_dir = osp.join(root_dir, "img", split)

        self.image_size, self.crop_size = image_size, crop_size
        self.color_jitter, self.normalize = color_jitter, normalize

        self.ignore_tags = ignore_tags

        self.drop_under_threshold = drop_under_threshold
        self.ignore_under_threshold = ignore_under_threshold

        self.images = []
        self.vertices = []
        self.labels = []
        for idx in range(len(self.image_fnames)):
            image_fname = self.image_fnames[idx]
            image_fpath = osp.join(self.image_dir, image_fname)
            vertices, labels = [], []
            for word_info in self.anno["images"][image_fname]["words"].values():
                word_tags = word_info["tags"]
                ignore_sample = any(elem for elem in word_tags if elem in self.ignore_tags)
                num_pts = np.array(word_info["points"]).shape[0]

                if ignore_sample or num_pts > 4:
                    continue
                vertices.append(np.array(word_info["points"]).flatten())
                labels.append(int(not word_info["illegibility"]))
            vertices, labels = np.array(vertices, dtype=np.float32), np.array(labels, dtype=np.int64)
            vertices, labels = filter_vertices(vertices, labels, 
                                               ignore_under=self.ignore_under_threshold, 
                                               drop_under=self.drop_under_threshold)
            self.images.append(image_fpath)
            self.vertices.append(vertices)
            self.labels.append(labels)


    def __len__(self):
        return len(self.image_fnames)

    def __getitem__(self, idx):
        image_fpath = self.images[idx]
        vertices = self.vertices[idx]
        labels = self.labels[idx]

        image = Image.open(image_fpath)
        image, vertices = resize_img(image, vertices, self.image_size)
        image, vertices = adjust_height(image, vertices)
        image, vertices = rotate_img(image, vertices)
        image, vertices = crop_img(image, vertices, labels, self.crop_size)

        if image.mode != "RGB":
            image = image.convert("RGB")
        image = np.array(image)

        funcs = []
        if self.color_jitter:
            funcs.append(A.ColorJitter(0.5, 0.5, 0.5, 0.25))
        if self.normalize:
            funcs.append(A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))
        transform = A.Compose(funcs)

        image = transform(image=image)["image"]
        word_bboxes = np.reshape(vertices, (-1, 4, 2))
        roi_mask = generate_roi_mask(image, vertices, labels)

        return image, word_bboxes, roi_mask


def do_training(
    data_dir,
    device,
    num_workers,
    image_size,
    input_size,
    batch_size,
    learning_rate,
    max_epoch,
    ignore_tags,
    seed,
):
    start_time = time.time()
    set_seed(seed)

    dataset = SceneTextDataset(
        data_dir,
        split="train_default",
        image_size=image_size,
        crop_size=input_size,
        ignore_tags=ignore_tags,
    )
    dataset = EASTDataset(dataset)
    num_batches = math.ceil(len(dataset) / batch_size)
    train_loader = DataLoader(
        dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers
    )

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = EAST()
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[max_epoch // 2], gamma=0.1
    )
    print(f"학습 전까지 준비하는 시간 >> {timedelta(seconds=time.time() - start_time)}")
    start_time = time.time()
    model.train()
    for epoch in range(max_epoch):
        epoch_start = time.time()
        with tqdm(total=num_batches) as pbar:
            for img, gt_score_map, gt_geo_map, roi_mask in train_loader:
                continue
    print(f"1 epoch 학습하는데 걸리는 시간 >> {timedelta(seconds=time.time() - start_time)}")

In [18]:
do_training(data_dir, 
            device, 
            num_worekrs, 
            image_size, 
            input_size, 
            batch_size, 
            learning_rate, 
            max_epoch, 
            ignore_tags, 
            seed)

학습 전까지 준비하는 시간 >> 0:00:03.740865


  0%|          | 0/1 [00:32<?, ?it/s]

1 epoch 학습하는데 걸리는 시간 >> 0:00:32.693058





# New Version-2

- 이미지를 미리 저장해두고 dataloader에서는 transform만 수행되게 됩니다.
- 학습 전까지 준비하는 시간 >> 0:00:03.659511
- 1 epoch 학습하는데 걸리는 시간 >> 0:00:31.879351

In [19]:
class SceneTextDataset(Dataset):
    def __init__(
        self,
        root_dir,
        split="train",
        image_size=2048,
        crop_size=1024,
        ignore_tags=[],
        ignore_under_threshold=10,
        drop_under_threshold=1,
        color_jitter=True,
        normalize=True,
    ):
        with open(osp.join(root_dir, "ufo/{}.json".format(split)), "r") as f:
            anno = json.load(f)

        self.anno = anno
        self.image_fnames = sorted(anno["images"].keys())[:3]
        self.image_dir = osp.join(root_dir, "img", split)

        self.image_size, self.crop_size = image_size, crop_size
        self.color_jitter, self.normalize = color_jitter, normalize

        self.ignore_tags = ignore_tags

        self.drop_under_threshold = drop_under_threshold
        self.ignore_under_threshold = ignore_under_threshold

        self.images = []
        self.vertices = []
        self.labels = []
        for idx in range(len(self.image_fnames)):
            image_fname = self.image_fnames[idx]
            image_fpath = osp.join(self.image_dir, image_fname)
            vertices, labels = [], []
            for word_info in self.anno["images"][image_fname]["words"].values():
                word_tags = word_info["tags"]
                ignore_sample = any(elem for elem in word_tags if elem in self.ignore_tags)
                num_pts = np.array(word_info["points"]).shape[0]

                if ignore_sample or num_pts > 4:
                    continue
                vertices.append(np.array(word_info["points"]).flatten())
                labels.append(int(not word_info["illegibility"]))
            vertices, labels = np.array(vertices, dtype=np.float32), np.array(labels, dtype=np.int64)
            vertices, labels = filter_vertices(vertices, labels, ignore_under=self.ignore_under_threshold, drop_under=self.drop_under_threshold)
            self.images.append(Image.open(image_fpath))
            self.vertices.append(vertices)
            self.labels.append(labels)


    def __len__(self):
        return len(self.image_fnames)

    def __getitem__(self, idx):
        image = self.images[idx]
        vertices = self.vertices[idx]
        labels = self.labels[idx]

        image, vertices = resize_img(image, vertices, self.image_size)
        image, vertices = adjust_height(image, vertices)
        image, vertices = rotate_img(image, vertices)
        image, vertices = crop_img(image, vertices, labels, self.crop_size)

        if image.mode != "RGB":
            image = image.convert("RGB")
        image = np.array(image)

        funcs = []
        if self.color_jitter:
            funcs.append(A.ColorJitter(0.5, 0.5, 0.5, 0.25))
        if self.normalize:
            funcs.append(A.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)))
        transform = A.Compose(funcs)

        image = transform(image=image)["image"]
        word_bboxes = np.reshape(vertices, (-1, 4, 2))
        roi_mask = generate_roi_mask(image, vertices, labels)

        return image, word_bboxes, roi_mask


def do_training(
    data_dir,
    device,
    num_workers,
    image_size,
    input_size,
    batch_size,
    learning_rate,
    max_epoch,
    ignore_tags,
    seed,
):
    start_time = time.time()
    set_seed(seed)

    dataset = SceneTextDataset(
        data_dir,
        split="train_default",
        image_size=image_size,
        crop_size=input_size,
        ignore_tags=ignore_tags,
    )
    dataset = EASTDataset(dataset)
    num_batches = math.ceil(len(dataset) / batch_size)
    train_loader = DataLoader(
        dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers
    )

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model = EAST()
    model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = lr_scheduler.MultiStepLR(
        optimizer, milestones=[max_epoch // 2], gamma=0.1
    )
    print(f"학습 전까지 준비하는 시간 >> {timedelta(seconds=time.time() - start_time)}")
    start_time = time.time()
    model.train()
    for epoch in range(max_epoch):
        epoch_start = time.time()
        with tqdm(total=num_batches) as pbar:
            for img, gt_score_map, gt_geo_map, roi_mask in train_loader:
                continue
    print(f"1 epoch 학습하는데 걸리는 시간 >> {timedelta(seconds=time.time() - start_time)}")

In [20]:
do_training(data_dir, 
            device, 
            num_worekrs, 
            image_size, 
            input_size, 
            batch_size, 
            learning_rate, 
            max_epoch, 
            ignore_tags, 
            seed)

학습 전까지 준비하는 시간 >> 0:00:03.659511


  0%|          | 0/1 [00:31<?, ?it/s]

1 epoch 학습하는데 걸리는 시간 >> 0:00:31.879351



