In [1]:
import torch
import numpy as np
import os
import csv
import cv2

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%cd "drive/My Drive/Colab Notebooks/RetinaNet"

/content/drive/My Drive/Colab Notebooks/RetinaNet


In [4]:
!pip install import-ipynb
import import_ipynb

Collecting import-ipynb
  Downloading https://files.pythonhosted.org/packages/63/35/495e0021bfdcc924c7cdec4e9fbb87c88dd03b9b9b22419444dc370c8a45/import-ipynb-0.1.3.tar.gz
Building wheels for collected packages: import-ipynb
  Building wheel for import-ipynb (setup.py) ... [?25l[?25hdone
  Created wheel for import-ipynb: filename=import_ipynb-0.1.3-cp36-none-any.whl size=2976 sha256=5ebe58f77c09b90b771875007d487edc3de4b15dead4e5b9d8423a46f4e9e530
  Stored in directory: /root/.cache/pip/wheels/b4/7b/e9/a3a6e496115dffdb4e3085d0ae39ffe8a814eacc44bbf494b5
Successfully built import-ipynb
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.3


In [5]:
def load_classes(csv_reader):
    result = {}

    for line, row in enumerate(csv_reader):
        line += 1

        try:
            class_name, class_id = row
        except ValueError:
            raise(ValueError('line {}: format should be \'class_name,class_id\''.format(line)))
        class_id = int(class_id)

        if class_name in result:
            raise ValueError('line {}: duplicate class name: \'{}\''.format(line, class_name))
        result[class_name] = class_id
    return result


In [19]:
def detect_image(image_path, save_path, class_list):

    with open(class_list, 'r') as f:
        classes = load_classes(csv.reader(f, delimiter=','))

    labels = {}
    for key, value in classes.items():
        labels[value] = key

    model = torch.load('RetinaNet.pt')

    if torch.cuda.is_available():
        model = model.cuda()

    model.training = False
    model.eval()

    for idx, img_name in enumerate(os.listdir(image_path)):
        print(img_name)
        image = cv2.imread(os.path.join(image_path, img_name))

        if image is None:
            continue
        image_orig = image.copy()

        rows, cols, cns = image.shape

        smallest_side = min(rows, cols)

        # rescale the image so the smallest side is min_side
        min_side = 608
        max_side = 1024
        scale = min_side / smallest_side

        # check if the largest side is now greater than max_side, which can happen
        # when images have a large aspect ratio
        largest_side = max(rows, cols)

        if largest_side * scale > max_side:
            scale = max_side / largest_side

        # resize the image with the computed scale
        image = cv2.resize(image, (int(round(cols * scale)), int(round((rows * scale)))))
        rows, cols, cns = image.shape

        pad_w = 32 - rows % 32
        pad_h = 32 - cols % 32

        new_image = np.zeros((rows + pad_w, cols + pad_h, cns)).astype(np.float32)
        new_image[:rows, :cols, :] = image.astype(np.float32)
        image = new_image.astype(np.float32)
        image /= 255
        image -= [0.485, 0.456, 0.406]
        image /= [0.229, 0.224, 0.225]
        image = np.expand_dims(image, 0)
        image = np.transpose(image, (0, 3, 1, 2))

        boxes = []
        with torch.no_grad():

            image = torch.from_numpy(image)
            if torch.cuda.is_available():
                image = image.cuda()

            scores, classification, transformed_anchors = model(image.cuda().float()) # the threshold in model is important
            idxs = np.where(scores.cpu() > 0.2)

            for j in range(idxs[0].shape[0]):
                bbox = transformed_anchors[idxs[0][j], :]

                x1 = int(bbox[0] / scale)
                y1 = int(bbox[1] / scale)
                x2 = int(bbox[2] / scale)
                y2 = int(bbox[3] / scale)
                tmp = [x1,y1,x2,y2]
                boxes.append(tmp)
                cv2.rectangle(image_orig, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)

            print(f'{save_path}/{img_name}')
            cv2.imwrite(f'{save_path}/{img_name}', image_orig)
        
        #print("------Recognition------")
        #pred = recognition(image_orig, boxes)
        #print(pred)
        break
    return image_orig, boxes

In [20]:
#if __name__ == '__main__':
img, boxes = detect_image(image_path="img/", save_path='../Data_Result', class_list="data_class.txt")

0001_034_26_04.jpg
../Data_Result/0001_034_26_04.jpg


In [8]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [28]:
from torchvision import transforms
from PIL import Image
import numpy as np
import torch

def recognition(image_orig, boxes):

    predictions_list = []
    model = torch.load('../Recognition.pth')

    model = model.to(device)
    model.eval()

    with torch.no_grad():
        for box in boxes:
            x1 = box[0]
            y1 = box[1]
            x2 = box[2]
            y2 = box[3]
            image = image_orig[y1:y2,x1:x2]
            #image = np.expand_dims(image, axis=0)
            data = Image.fromarray(image)
            data = data.convert("RGB")

            transform = transforms.Compose([
                          transforms.Resize((224,224)),
                          transforms.ToTensor(),
                          transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
                      ])
            
            data = transform(data).to(device)
            data = torch.unsqueeze(data, 0)
            #print(data.shape)
            #break
            output = model(data)
            predictions = torch.max(output, 1)[1]
            predictions_list.append(predictions)

    return predictions_list

In [29]:
pred = recognition(img, boxes)

In [30]:
print(pred)

[tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([167], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([167], device='cuda:0'), tensor([236], device='cuda:0'), tensor([167], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([167], device='cuda:0'), tensor([167], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor([236], device='cuda:0'), tensor(

In [11]:
with(open('../var.txt', 'r')) as f:      
    lines = f.readlines()

In [26]:
idx2word = []
for word in lines[0]:
    idx2word.append(word)

In [27]:
for idx in pred:
    print(idx2word[idx], end='')

TypeError: ignored