In [1]:
import collections
import numpy as np
from pathlib import Path
import os
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split
import torch
import torch.optim as optim
from torchvision import transforms

assert torch.__version__.split('.')[0] == '1'

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd "drive/My Drive/Colab Notebooks/RetinaNet"

/content/drive/My Drive/Colab Notebooks/RetinaNet


In [4]:
!pip install import-ipynb
import import_ipynb

Collecting import-ipynb
  Downloading https://files.pythonhosted.org/packages/63/35/495e0021bfdcc924c7cdec4e9fbb87c88dd03b9b9b22419444dc370c8a45/import-ipynb-0.1.3.tar.gz
Building wheels for collected packages: import-ipynb
  Building wheel for import-ipynb (setup.py) ... [?25l[?25hdone
  Created wheel for import-ipynb: filename=import_ipynb-0.1.3-cp36-none-any.whl size=2976 sha256=e33d191c72e247d177437c187142965915395227b20ae6354174af3c5471ee41
  Stored in directory: /root/.cache/pip/wheels/b4/7b/e9/a3a6e496115dffdb4e3085d0ae39ffe8a814eacc44bbf494b5
Successfully built import-ipynb
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.3


In [5]:
import model
from dataloader import CSVDataset, collater, Resizer, AspectRatioBasedSampler, Augmenter, Normalizer, UnNormalizer, Resizer2

importing Jupyter notebook from model.ipynb
importing Jupyter notebook from losses.ipynb
importing Jupyter notebook from anchors.ipynb
importing Jupyter notebook from utils.ipynb
importing Jupyter notebook from dataloader.ipynb


In [6]:
print('CUDA available: {}'.format(torch.cuda.is_available()))

CUDA available: True


In [7]:
class_path = Path("data3_label")
img_path = Path("img/")
class_list = Path("data_class.txt")

In [8]:
# Create the data loaders
classes = [os.path.join(class_path, x.stem + ".txt") for x in class_path.iterdir()]
#_, classes = train_test_split(classes, test_size=0.1, shuffle=True)
non_test_file, test_file = train_test_split(classes, test_size=0.2, shuffle=True, random_state = 66)
train_file, val_file = train_test_split(non_test_file, test_size=0.25, shuffle=True, random_state = 66)

In [9]:
dataset_train = CSVDataset(img_path=img_path, train_file=train_file, class_list=class_list, transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()]))
sampler = AspectRatioBasedSampler(dataset_train, batch_size=2, drop_last=False)
dataloader_train = DataLoader(dataset_train, num_workers=3, collate_fn=collater, batch_sampler=sampler)

In [10]:
retinanet = model.resnet18(num_classes=dataset_train.num_classes(), pretrained=True) # 18,34,50,101,152 
retinanet = retinanet.cuda()
retinanet = torch.nn.DataParallel(retinanet).cuda() # for more than 1 GPU
retinanet.training = True

In [11]:
#hyper_parameter
epochs = 10
optimizer = optim.Adam(retinanet.parameters(), lr=1e-5)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True)
#loss_hist = collections.deque(maxlen=500)

In [13]:
#training
retinanet.train()
retinanet.module.freeze_bn()

print('Num training images: {}'.format(len(dataset_train)))

total_closs = []
total_rloss = []

for epoch_num in range(epochs):
    epoch_loss = []

    for iter_num, data in enumerate(dataloader_train):

        optimizer.zero_grad()

        classification_loss, regression_loss = retinanet([data['img'].cuda().float(), data['annot']])
        classification_loss = classification_loss.mean()
        regression_loss = regression_loss.mean()
        loss = classification_loss + regression_loss
        total_closs.append(classification_loss.item())
        total_rloss.append(regression_loss.item())

        if bool(loss == 0):
            continue

        loss.backward()

        torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)

        optimizer.step()
        epoch_loss.append(float(loss))

        if iter_num == len(dataloader_train)-1:
          print('Epoch: {} | Iteration: {} | Classification loss: {:1.5f} | Regression loss: {:1.5f}'.format(epoch_num, iter_num, float(classification_loss), float(regression_loss)))

        del classification_loss
        del regression_loss

    scheduler.step(np.mean(epoch_loss))

Num training images: 599
Epoch: 0 | Iteration: 299 | Classification loss: 0.11604 | Regression loss: 0.40230
Epoch: 1 | Iteration: 299 | Classification loss: 0.09753 | Regression loss: 0.37452
Epoch: 2 | Iteration: 299 | Classification loss: 0.05061 | Regression loss: 0.24184
Epoch: 3 | Iteration: 299 | Classification loss: 0.04490 | Regression loss: 0.23419
Epoch: 4 | Iteration: 299 | Classification loss: 0.03923 | Regression loss: 0.23139
Epoch: 5 | Iteration: 299 | Classification loss: 0.05146 | Regression loss: 0.24318
Epoch: 6 | Iteration: 299 | Classification loss: 0.03398 | Regression loss: 0.20201
Epoch: 7 | Iteration: 299 | Classification loss: 0.04430 | Regression loss: 0.25751
Epoch: 8 | Iteration: 299 | Classification loss: 0.03415 | Regression loss: 0.18662
Epoch: 9 | Iteration: 299 | Classification loss: 0.04662 | Regression loss: 0.22474


In [14]:
torch.save(retinanet.module, 'RetinaNet.pt')

In [12]:
model = torch.load('RetinaNet.pt')

Evaluate

In [13]:
def compute_overlap(a, b):

    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])

    iw = np.maximum(iw, 0)
    ih = np.maximum(ih, 0)

    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih

    ua = np.maximum(ua, np.finfo(float).eps)

    intersection = iw * ih

    return intersection / ua

In [14]:
def _compute_ap(recall, precision):

    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    i = np.where(mrec[1:] != mrec[:-1])[0]

    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    
    return ap

In [15]:
def _get_detections(dataset, retinanet, all_annotations, score_threshold=0.5, save_path=None):
    """ Get the detections from the retinanet using the generator.
    The result is a list of lists such that the size is:
        all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes]
    # Arguments
        dataset         : The generator used to run images through the retinanet.
        retinanet           : The retinanet to run on the images.
        score_threshold : The score confidence threshold to use.
        max_detections  : The maximum number of detections to use per image.
        save_path       : The path to save the images with visualized detections to.
    # Returns
        A list of lists containing the detections for each image in the generator.
    """
    all_detections = [[None for i in range(dataset.num_classes())] for j in range(len(dataset))]
    scores_list = []
    scores_list2 = []

    retinanet.eval()
    
    with torch.no_grad():

        for index in range(len(dataset)):
            data = dataset[index]
            scale = data['scale']

            # run network
            if torch.cuda.is_available():
                scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).cuda().float().unsqueeze(dim=0))
            else:
                scores, labels, boxes = retinanet(data['img'].permute(2, 0, 1).float().unsqueeze(dim=0))
                
            scores = scores.cpu().numpy()
            labels = labels.cpu().numpy()
            boxes  = boxes.cpu().numpy()

            scores_list.append(scores)

            tmp = []
            for score in scores:
              if score > score_threshold:
                tmp.append(score)
                
            scores_list2.append(tmp)

            # correct boxes for image scale
            boxes /= scale

            # select indices which have a score above the threshold
            indices = np.where(scores > score_threshold)[0]

            if indices.shape[0] > 0:
                # select those scores
                scores = scores[indices]

                # find the order with which to sort the scores
                max_detections = len(all_annotations[index][0])
                scores_sort = np.argsort(-scores)[:max_detections]

                # select detections
                image_boxes      = boxes[indices[scores_sort], :]
                image_scores     = scores[scores_sort]
                image_labels     = labels[indices[scores_sort]]
                image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)

                for label in range(dataset.num_classes()):
                    all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
            else:
                for label in range(dataset.num_classes()):
                    all_detections[index][label] = np.zeros((0, 5))

    return all_detections, scores_list, scores_list2

In [16]:
def _get_annotations(generator):

    all_annotations = [[None for i in range(generator.num_classes())] for j in range(len(generator))]

    for i in range(len(generator)):

        annotations = generator.load_annotations(i)

        for label in range(generator.num_classes()):
            all_annotations[i][label] = annotations[annotations[:, 4] == label, :4].copy()

    return all_annotations

In [21]:
import cv2 as cv
def save_evaluate_img(idx, detect, scale):

    txt = test_file[idx]
    txt = txt.split("/")[1].split(".")[0]
    img = 'img/' + txt + '.jpg'
    img = cv.imread(img)

    for d in detect:

        x1 = int(d[0])
        y1 = int(d[1])
        x2 = int(d[2])
        y2 = int(d[3])

        cv.rectangle(img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

    save_path = 'data3_result2'
    print(txt)
    cv.imwrite(f'{save_path}/{txt}.png', img)

In [22]:
def evaluate(
    generator,
    retinanet,
    iou_threshold=0.5,
    score_threshold=0.05,
    save_path=None
):
    """ Evaluate a given dataset using a given retinanet.
    # Arguments
        generator       : The generator that represents the dataset to evaluate.
        retinanet       : The retinanet to evaluate.
        iou_threshold   : The threshold used to consider when a detection is positive or negative.
        score_threshold : The score confidence threshold to use for detections.
        max_detections  : The maximum number of detections to use per image.
        save_path       : The path to save images with visualized detections to.
    # Returns
        A dict mapping class names to mAP scores.
    """

    # gather all detections and annotations

    all_annotations    = _get_annotations(generator)
    all_detections, scores_list, scores_list2 = _get_detections(generator, retinanet, all_annotations, score_threshold=score_threshold, save_path=save_path)
    
    average_precisions = {}

    for label in range(generator.num_classes()):

        false_positives = np.zeros((0,))
        true_positives  = np.zeros((0,))
        scores          = np.zeros((0,))
        num_annotations = 0.0

        for i in range(len(generator)):

            detections           = all_detections[i][label]
            annotations          = all_annotations[i][label]
            num_annotations     += annotations.shape[0]
            detected_annotations = []
            scale = generator[i]['scale']
    
            if len(detections)>0:
                save_evaluate_img(i, detections, scale)
            
            for d in detections:
                scores = np.append(scores, d[4])

                if annotations.shape[0] == 0:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)
                    continue

                overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
                assigned_annotation = np.argmax(overlaps, axis=1)
                max_overlap         = overlaps[0, assigned_annotation]

                if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                    false_positives = np.append(false_positives, 0)
                    true_positives  = np.append(true_positives, 1)
                    detected_annotations.append(assigned_annotation)
                else:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)

        # no annotations -> AP for this class is 0 (is this correct?)
        if num_annotations == 0:
            average_precisions[label] = 0, 0
            continue

        # sort by score
        indices         = np.argsort(-scores)
        false_positives = false_positives[indices]
        true_positives  = true_positives[indices]

        # compute false positives and true positives
        false_positives = np.cumsum(false_positives)
        true_positives  = np.cumsum(true_positives)

        # compute recall and precision
        recall    = true_positives / num_annotations
        precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
        
        # compute average precision
        average_precision  = _compute_ap(recall, precision)
        average_precisions[label] = average_precision, num_annotations
    
    print('mAP:')
    for label in range(generator.num_classes()):
        label_name = generator.label_to_name(label)
        print('{}: {}'.format(label_name, average_precisions[label][0]))
    
    #return average_precisions, scores_list, scores_list2

In [23]:
dataset_test = CSVDataset(img_path=img_path, train_file=test_file, class_list=class_list, transform=transforms.Compose([Normalizer(), Resizer()]))

In [24]:
if __name__ == '__main__':
    evaluate(dataset_test, model)

0001_027_25_11
0001_033_26_08
0001_034_26_17
0001_038_29_25
0001_021_28_23
0001_010_25_21
0001_023_24_09
0001_002_28_22
0001_034_26_21
0001_035_25_01
0001_009_26_02
0001_038_29_01
0001_017_25_22
0001_030_25_20
0001_002_28_23
0001_029_27_21
0001_005_26_03
0001_009_26_16
0001_020_29_16
0001_037_29_03
0001_036_27_07
0001_019_26_21
0001_013_26_16
0001_021_28_03
0001_006_27_01
0001_002_28_15
0001_008_26_03
0001_006_27_25
0001_024_24_17
0001_015_27_15
0001_005_26_21
0001_023_24_14
0001_004_26_06
0001_006_27_05
0001_032_27_13
0001_028_26_25
0001_034_26_03
0001_035_25_19
0001_001_26_04
0001_006_27_26
0001_016_26_24
0001_030_25_12
0001_027_25_02
0001_018_29_13
0001_028_26_10
0001_033_26_19
0001_038_29_22
0001_005_26_25
0001_008_26_16
0001_006_27_12
0001_036_27_23
0001_036_27_27
0001_029_27_20
0001_013_26_17
0001_036_27_09
0001_008_26_04
0001_033_26_10
0001_002_28_03
0001_029_27_08
0001_016_26_13
0001_005_26_07
0001_035_25_22
0001_017_25_03
0001_011_27_02
0001_014_26_01
0001_005_26_08
0001_012_2

Recognition_preprocessing

In [None]:
import cv2 as cv

In [None]:
from pathlib import Path

In [None]:
cd ../../TKHMTH/TKH/label_char/

/content/drive/My Drive/TKHMTH/TKH/label_char


In [None]:
def save_img(txt, total):
  for data in total:
      name = data.split(" ")[0]
      dir_path = Path("../../../Colab Notebooks/Recognition/" + name)
      dir_path.mkdir(parents=True, exist_ok=True)
      x1 = int(data.split(" ")[1])
      y1 = int(data.split(" ")[2])
      x2 = int(data.split(" ")[3])
      y2 = int(data.split(" ")[4])
      im = "../img/" + txt.split(".")[0] + ".jpg"
      img = cv.imread(im, 0)
      if y1 == y2 or x1 == x2:
          continue
      image = img[y1:y2,x1:x2]
      save_path = str(dir_path) + "/" + txt.split(".")[0] + "_" + name + ".png" # + "-" + str(x1) + "_" + str(x2) + "_" + str(y1) + "_" + str(y2) + ".png"
      cv.imwrite(save_path, image)

In [None]:
for files in non_test_file:
    txt = files.split("/")[-1]
    print(txt)
    with(open(txt, "r", encoding="utf-8")) as f:
        total = f.readlines()
    save_img(txt, total)

0001_006_27_02.txt
0001_007_26_06.txt
0001_030_25_19.txt
0001_001_26_15.txt
0001_004_26_13.txt
0001_004_26_18.txt
0001_032_27_27.txt
0001_032_27_07.txt
0001_034_26_25.txt
0001_030_25_02.txt
0001_027_25_20.txt
0001_012_27_19.txt
0001_006_27_07.txt
0001_013_26_20.txt
0001_021_28_06.txt
0001_015_27_17.txt
0001_020_29_17.txt
0001_008_26_13.txt
0001_021_28_14.txt
0001_011_27_24.txt
0001_008_26_15.txt
0001_015_27_14.txt
0001_004_26_12.txt
0001_005_26_05.txt
0001_020_29_07.txt
0001_030_25_09.txt
0001_001_26_26.txt
0001_023_24_02.txt
0001_025_26_10.txt
0001_032_27_14.txt
0001_021_28_17.txt
0001_033_26_26.txt
0001_027_25_14.txt
0001_022_29_29.txt
0001_002_28_18.txt
0001_023_24_15.txt
0001_013_26_14.txt
0001_012_27_03.txt
0001_016_26_02.txt
0001_022_29_06.txt
0001_009_26_19.txt
0001_012_27_14.txt
0001_034_26_14.txt
0001_012_27_11.txt
0001_013_26_21.txt
0001_027_25_12.txt
0001_029_27_25.txt
0001_024_24_18.txt
0001_020_29_02.txt
0001_007_26_20.txt
0001_025_26_02.txt
0001_005_26_26.txt
0001_011_27_