## Demo

In [1]:
import colorsys
import os
from timeit import default_timer as timer
import numpy as np
import cv2
from keras import backend as K
from keras.models import load_model
from keras.layers import Input
from PIL import Image, ImageFont, ImageDraw
from keras.utils import multi_gpu_model

from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body
from yolo3.utils import letterbox_image

import os

Using TensorFlow backend.


In [2]:
from imutils.video import WebcamVideoStream

In [3]:
import pretrainedmodels
import torch
import pretrainedmodels.utils as utils

In [4]:
model_name = 'resnet101'
model_pytorch = pretrainedmodels.__dict__[model_name](num_classes=1000, pretrained='imagenet')
model_pytorch.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=F

In [5]:
load_image = utils.LoadImage()
transform_image = utils.TransformImage(model_pytorch)

In [6]:
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
gpu_num = 1

In [7]:
model_path = 'model_data/ep042-loss24.239-val_loss23.735.h5'
anchors_path = 'model_data/yolo_anchors.txt'
classes_path = 'model_data/hand.txt'
score = 0.2
iou = 0.3
model_image_size = (416, 416)
sess = K.get_session()

In [8]:
# Get class
classes_path = os.path.expanduser(classes_path)
with open(classes_path) as f:
    class_names = f.readlines()

class_names = [c.strip() for c in class_names]

In [9]:
anchors_path = os.path.expanduser(anchors_path)
with open(anchors_path) as f:
    anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]


In [10]:
anchors = np.array(anchors).reshape(-1, 2)

In [11]:
# Load model
model_path = os.path.expanduser(model_path)
assert model_path.endswith('.h5'), 'Keras model end with file .h5'

num_anchors = len(anchors)
num_classes = len(class_names)

is_tiny_version = num_anchors==6
try:
    yolo_model = load_model(model_path, compile=False)
except:
    if is_tiny_version:
        yolo_model = tiny_yolo_body(Input(shape=(None, None, 3)), num_anchors//2, num_classes)
    else:
        yolo_model = yolo_body(Input(shape=(None, None, 3)), num_anchors//3, num_classes)
    
    yolo_model.load_weights(model_path)
else:
    yolo_model.layers[-1].output_shape[-1] == num_anchors/len(yolo_model.output) * (num_classes + 5), 'Mismatch between model and given anchor and class sizes'
    
print("{} model, anchors, and classes loaded.".format(model_path))

model_data/ep042-loss24.239-val_loss23.735.h5 model, anchors, and classes loaded.


In [12]:
# Create list color for multi object another
hsv_tuples = [(x / len(class_names), 1., 1.)
             for x in range(len(class_names))]
colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
colors = list(map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), colors))
np.random.seed(10101)
np.random.shuffle(colors)
np.random.seed(None)

In [13]:
input_image_shape = K.placeholder(shape=(2, ))
boxes, scores, classes = yolo_eval(yolo_model.output, anchors, len(class_names), input_image_shape, score_threshold=score, iou_threshold=iou)


In [14]:
with open('data/imagenet_synsets_phone.txt', 'r') as f:
    synsets = f.readlines()

In [15]:
synsets = [x.strip() for x in synsets]
splits = [line.split(' ') for line in synsets]
key_to_classname = {spl[0]:' '.join(spl[1:]) for spl in splits}

In [16]:
with open('data/imagenet_classes_phone.txt', 'r') as f:
    class_id_to_key = f.readlines()
class_id_to_key = [x.strip() for x in class_id_to_key]

In [17]:
# class VideoStream:
#     def __init__(self, src=0, usePiCamera=False, resolution=(640, 480),
#         framerate=16):
#         # check to see if the picamera module should be used
#         if usePiCamera:
#             # only import the picamera packages unless we are
#             # explicity told to do so -- this helps remove the
#             # requirement of `picamera[array]` from desktops or
#             # laptops that still want to use the `imutils` package
#             from .pivideostream import PiVideoStream

#             # initialize the picamera stream and allow the camera
#             # sensor to warmup
#             self.stream = PiVideoStream(resolution=resolution,
#                 framerate=framerate)

#         # otherwise, we are using OpenCV so initialize the webcam
#         # stream
#         else:
#             self.stream = WebcamVideoStream(src=src)

#     def start(self):
#         # start the threaded video stream
#         return self.stream.start()

#     def update(self):
#         # grab the next frame from the stream
#         self.stream.update()

#     def read(self):
#         # return the current frame
#         return self.stream.read()

#     def stop(self):
#         # stop the thread and release any resources
#         self.stream.stop()

In [18]:
video_capture = WebcamVideoStream(src=0).start()
cpt = 0
height = 480
width = 640
while True:
    # Read frame
    cpt += 1
    frame = video_capture.read()
    frame = cv2.flip(frame, 1)

    image = Image.fromarray(frame)
#     image = yolo.detect_image(image)

    # Process detect hand and recognition furniture
    boxed_image = letterbox_image(image, tuple(reversed(model_image_size)))
    image_data = np.array(boxed_image, dtype='float32')
    
    image_data /= 255.
    image_data = np.expand_dims(image_data, 0)
    
    out_boxes, out_scores, out_classes = sess.run([boxes, scores, classes],
                                                 feed_dict={
                                                     yolo_model.input: image_data,
                                                     input_image_shape: [image.size[1], image.size[0]],
                                                     K.learning_phase(): 0
                                                 })
    
    print("Found {} boxes for {}".format(len(out_boxes), 'img'))
    
    font = ImageFont.truetype(font='font/FiraMono-Medium.otf',
                    size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32'))
    thickness = (image.size[0] + image.size[1]) // 300
    
    for i, c in reversed(list(enumerate(out_classes))):
        predicted_class = class_names[c]
        box = out_boxes[i]
        score = out_scores[i]
        
        draw = ImageDraw.Draw(image)
        
        
        label = '{} {:.2f}'.format(predicted_class, score)
#         draw = ImageDraw.Draw(image)
#         label_size = draw.textsize(label, font)
        
        top, left, bottom, right = box
        
        top = max(0, np.floor(top + 0.5).astype('int32'))
        left = max(0, np.floor(left + 0.5).astype('int32'))
        bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
        right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
        print(label, (left, top), (right, bottom))
        for i in range(thickness):
            draw.rectangle(
                [left + i, top + i, right - i, bottom - i],
                outline=colors[c])
            
        if (top - (bottom - top) / 2.) > 0 and (bottom + (bottom - left) / 2.) < 480:
            top = top - (bottom - top) / 2.
            bottom = bottom + (bottom - top) / 2.
            top = int(top)
            bottom = int(bottom)
        if (left - (right - left) / 2.) > 0 and (right + (right - left) / 2.) < 640:
            left = left - (right - left) / 2.
            right = right + (right - left) / 2.
            left = int(left)
            right = int(right)
        
        img_crop_hand = frame[top:bottom, left:right]
        print(img_crop_hand.shape)
        # Convert numpy -> PIL.image
        img_crop_hand_pil = Image.fromarray(img_crop_hand)
        
        input_tensor = transform_image(img_crop_hand_pil)
        input_tensor = input_tensor.unsqueeze(0)
        input1 = torch.autograd.Variable(input_tensor, requires_grad=False)
        
        output = model_pytorch(input1)
#         output_features = model.features(input) # 1x14x14x2048 size may differ
#         output_logits = model.logits(output_features) # 1x1000
        _, argmax = output.data.squeeze().max(0)
        class_id = argmax[0]
        class_key = class_id_to_key[class_id]
        classname = key_to_classname[class_key]
        classname = classname.replace("hand-held ", "")
        
        path = "/home/neosai/Documents/github/keras-yolo3/data/hand"
        

#         cv2.imwrite("%s%04i.jpg" %classname, cpt, img_crop_hand)
        
        if classname != "":
            label = 'hand handle {}'.format(classname)
            print(label)
            name = classname + str(cpt) + ".jpg"
            cv2.imwrite(os.path.join(path, name), img_crop_hand)
        
        label_size = draw.textsize(label, font)
        
        if top - label_size[1] >= 0:
            text_origin = np.array([left, top - label_size[1]])
        else:
            text_origin = np.array([left, top + 1])
        
        
        draw.rectangle(
            [tuple(text_origin), tuple(text_origin + label_size)],
            fill=colors[c])
        draw.text(text_origin, label, fill=(0, 0, 0), font=font)
        
        
        del draw
        
    result = np.asarray(image)
    
    # Show image result
    cv2.imshow("Image", result)
    
    #
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 0 boxes for img
Found 1 boxes for img
hand 0.22 (332, 277) (433, 374)
(218, 227, 3)




IndexError: list index out of range

## Demo detect phone