In [1]:
import tensorflow as tf
import cv2
import numpy as np
import time

# Reference: https://github.com/udacity/CarND-Object-Detection-Lab
class TLClassifier(object):
    def __init__(self):
        start = time.time()
        graph_filename = '../site_graph.pb'
        print("Initializing TensorFlow...")
        self.detection_graph = tf.Graph()
        # configure for a GPU
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        # load trained tensorflow graph
        with self.detection_graph.as_default():
            graph_def = tf.GraphDef()
            with tf.gfile.GFile(graph_filename, 'rb') as f:
                graph_def.ParseFromString(f.read())
                tf.import_graph_def(graph_def, name='')

            self.sess = tf.Session(graph=self.detection_graph, config=config)
            # configure input and output
            self.image_tensor   = self.detection_graph.get_tensor_by_name('image_tensor:0')
            self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')
            self.dboxes         = self.detection_graph.get_tensor_by_name('detection_boxes:0')
            self.dscores        = self.detection_graph.get_tensor_by_name('detection_scores:0')
            self.dclasses       = self.detection_graph.get_tensor_by_name('detection_classes:0')

            startA = time.time()
            # initialize the network by running a randomized image
            image = np.asarray(np.random.rand(300,300,3), dtype="uint8")
            image_expanded = np.expand_dims(image, axis=0)
            _ = self.sess.run([self.dboxes, self.dscores, self.dclasses, self.num_detections],
              feed_dict={self.image_tensor: image_expanded})
        endA = time.time()
        print('First time run duration: ', endA-startA)
        end = time.time()
        print('Total initialization time: ', end-start)

    # Convert normalized box coordinates to pixels
    def to_image_coords(self, box, dim):
        """
        The original box coordinate output is normalized, i.e [0, 1].

        This converts it back to the original coordinate based on the image
        size. Optimized.
        """
        height, width = dim[0], dim[1]
        box_pixel = [int(box[0] * height), int(box[1] * width), int(box[2] * height), int(box[3] * width)]
        return np.array(box_pixel)

    def locateTL(self, image):
        box = [0, 0, 0, 0]
        with self.detection_graph.as_default():
            image_expanded = np.expand_dims(image, axis=0)
            (boxes, scores, classes, num_detections) = self.sess.run(
              [self.dboxes, self.dscores, self.dclasses, self.num_detections],
              feed_dict={self.image_tensor: image_expanded})
            
            # Remove unnecessary dimensions
            boxes   = np.squeeze(boxes)
            class_  = np.int32(np.squeeze(classes).tolist())
            scores  = np.squeeze(scores)
            index = next((i for i, clsid in enumerate(class_) if clsid < 4), None)
            if index == None:
                print('No traffic light detected')
            elif scores[index] <= 0.4:
                print('Confidence: ', scores[index])
            else:
                b = self.to_image_coords(boxes[index], image.shape[0:2])
                b_w = b[3]-b[1]
                ratio = (b[2]-b[0]) / (b_w + 0.00001)
                if (b_w >= 20) and (ratio > 2.0):
                    print('Confidence: ', scores[index])
                    box = b
                else:
                    print(b_h, b_w, ratio)
                    print('Found, but bad ratio or too narrow')
        return box

    # Classify a traffic light based on simple geometric properties
    # Expects a gray-scale image
    def classifyTL(self, image_data):
        print('____________________________________________________________________')
        # get the image center geometry
        midX = int(image_data.shape[1]/2)
        midY = int(image_data.shape[0]/2)
        thirdY = int(image_data.shape[0]/3)
        p = int(thirdY/4) #patch size
        # get the center point of each ROI
        rROI = ( int(thirdY/2) , midX )
        yROI = ( midY, midX )
        gROI = ( midY+thirdY , midX )
        # find the average from each center patch
        rROI = int(np.mean(image_data[rROI[0]-p:rROI[0]+p, rROI[1]-p:rROI[1]+p]))
        yROI = int(np.mean(image_data[yROI[0]-p:yROI[0]+p, yROI[1]-p:yROI[1]+p]))
        gROI = int(np.mean(image_data[gROI[0]-p:gROI[0]+p, gROI[1]-p:gROI[1]+p]))
        # perform simple brightness comparisons and print for humans
        if (gROI > yROI) and (gROI > rROI):
            print(">>> GREEN <<<")
        elif (yROI > gROI) and (yROI > rROI):
            print(">>> YELLOW <<<")
        elif (rROI > yROI) and (rROI > gROI):
            print(">>> RED <<<")
        if (gROI > yROI) and (gROI > rROI):
            return 1 # GO
        else:
            return 0 # STOP

In [2]:
tc = TLClassifier()

Initializing TensorFlow...
First time run duration:  4.571080207824707
Total initialization time:  7.856497764587402


In [3]:
def adjust_gamma(image, gamma=1.0):
    # build a lookup table mapping the pixel values [0, 255] to
    # their adjusted gamma values
    invGamma = 1.0 / gamma
    table = np.array([((i / 255.0) ** invGamma) * 255
        for i in np.arange(0, 256)]).astype("uint8")

    # apply gamma correction using the lookup table
    return cv2.LUT(image, table)


In [4]:
img = cv2.imread("../image/l5e-300y.png")
image_data = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
#image_data = (image_data - 128.)/128.
#image_data = adjust_gamma(image_data, 0.64)
#image_data = np.dstack((image_data,image_data,image_data))
start = time.time()
b = tc.locateTL(image_data)
end = time.time()
print(b)
print('Detection time: ', end-start)

#{1: {'id': 1, 'name': 'Green'}, 
# 2: {'id': 2, 'name': 'Red'}, 
# 3: {'id': 3, 'name': 'Yellow'}, 
# 4: {'id': 4, 'name': 'off'}}


Confidence:  0.995318
[ 35  88 122 116]
Detection time:  0.024503231048583984


In [5]:
start = time.time()
# If there is no detection or low-confidence detection
if np.array_equal(b, np.zeros(4)):
    print ('unknown')
else:
    img_tl = img[b[0]:b[2], b[1]:b[3]]
    img_tl = cv2.cvtColor(img_tl, cv2.COLOR_BGR2HSV)[:,:,2]
    signal_status = tc.classifyTL(img_tl)
    print("GO" if signal_status else "STOP")
end = time.time()
print('Classification time: ', end-start)

____________________________________________________________________
>>> YELLOW <<<
STOP
Classification time:  0.0010001659393310547


In [6]:
start = time.time()
img = cv2.imread("../image/vred-300.png")
image_data = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
b = tc.locateTL(image_data)
print(b)
end = time.time()
print('Detection time: ', end-start)
start = time.time()
# If there is no detection or low-confidence detection
if np.array_equal(b, np.zeros(4)):
    print ('unknown')
else:
    img_tl = img[b[0]:b[2], b[1]:b[3]]
    img_tl = cv2.cvtColor(img_tl, cv2.COLOR_BGR2HSV)[:,:,2]
    signal_status = tc.classifyTL(img_tl)
    print("GO" if signal_status else "STOP")
end = time.time()
print('Classification time: ', end-start)

Confidence:  0.728387
[ 59 221 127 243]
Detection time:  0.03550457954406738
____________________________________________________________________
>>> RED <<<
STOP
Classification time:  0.0004999637603759766


In [7]:
#image = np.asarray(np.random.rand(300,300,3)*2.-1.)
image = np.asarray(np.random.rand(300,300,3)*255, dtype="uint8")
print(image)

[[[162  27  76]
  [  7 210 202]
  [182 240 254]
  ..., 
  [ 20 212 196]
  [118  36 176]
  [167  21 205]]

 [[ 72 137  97]
  [142  64 144]
  [142 145 210]
  ..., 
  [218  12 233]
  [237  28   7]
  [  7 205  73]]

 [[191 203  97]
  [224  52 192]
  [  8 193  98]
  ..., 
  [ 74 112  91]
  [ 45 118  10]
  [130  42 191]]

 ..., 
 [[ 17  96 210]
  [200  27   1]
  [  9 239 128]
  ..., 
  [239   9 241]
  [230 245 129]
  [ 73 137 198]]

 [[164 251  83]
  [ 86 212 207]
  [156 145 221]
  ..., 
  [252 220 207]
  [136 232  50]
  [239  75 243]]

 [[253 196 154]
  [ 39 253 248]
  [102 125  93]
  ..., 
  [ 96 104  70]
  [  1 113 133]
  [ 37  68  50]]]
