In [None]:
import cv2
import os
import random
import time
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
from scipy.misc import imsave
import tensorflow as tf
from utils import *

# Allow image embeding in notebook
%matplotlib inline

In [None]:
def build_data(LISA_dir):
    #reading annotation file
    annotations = np.genfromtxt (LISA_dir + 'allAnnotations.csv', delimiter=";" ,dtype=str)
    annotations = annotations[1:annotations.size] #trimming headings

    #building array of filenames
    filenames = annotations[:,0]

    #building array of sign labels
    labels = annotations[:,1]

    #building dictionary to convert sign labels from strings to integer labels and back
    names = set(labels)
    values = np.arange(0,len(names))
    mapping = dict(zip(names, values))
    reverse = dict(zip(values, names))
    mapping.update(reverse)

    #converting names to labels
    for i in range(0,labels.size):
        labels[i] = mapping[labels[i]]
    labels = labels.astype(np.int) #casting strings to ints

    #building arrays of sign locations
    upperLeft = annotations[:,2:4].astype(np.int) #x, y values of upper left corner
    lowerRight = annotations[:,4:6].astype(np.int) #x, y values of lower right corner
    
    return filenames, labels, upperLeft, lowerRight, mapping

In [None]:
def build_signs(LISA_dir, filenames, lowerRight, upperLeft):
    if not (os.path.exists('images32.npy')):
        #building an array of only signs
        images = []
        for i in range(0,filenames.size):
            #cropping signs from source images
            path = LISA_dir + filenames[i]
            img = cv2.imread(path)
            sign = img[upperLeft[i,1]:lowerRight[i,1], upperLeft[i,0]:lowerRight[i,0]] #cropping to just the sign

            #increasing contrast
            sign_yuv = cv2.cvtColor(sign, cv2.COLOR_BGR2YUV) #converting to YUV
            sign_yuv[:,:,0] = cv2.equalizeHist(sign_yuv[:,:,0]) #equalize the histogram of the Y channel
            sign = cv2.cvtColor(sign_yuv, cv2.COLOR_YUV2BGR) #converting back to BGR

            #adding to array
            images.append(sign)

        images = np.array(images)

        # resizing images to 32x32
        images32 = [cv2.resize(image, (32, 32)) for image in images]
        images32 = np.array(images32)
        np.save('images32.npy', images32)
    else:
        images32 = np.load('images32.npy')
    
    return images32

In [None]:
def build_negatives(LISA_dir, num_samples):
    if not (os.path.exists('negatives.npy')):
        #building an array of negatives
        negatives = []
        neg_paths = [os.path.join(LISA_dir + 'negatives/negativePics/', f) for f in os.listdir(LISA_dir + 'negatives/negativePics/') if f.endswith('.png')]
        num_samples = 7 #lower to use fewer source images

        fail_x = []
        fail_y = []
        fail_im = []

        for i in range(0,len(neg_paths)):
            #cropping random 32x32 windows
            img = cv2.imread(neg_paths[i])
            for j in range(0,num_samples):
                randx = np.random.randint(0,img.shape[1]-32)
                randy = np.random.randint(0,img.shape[0]-32)
                fail_x = randx
                fail_y = randy
                fail_im = img
                neg = img[randy:randy+32, randx:randx+32] #cropping random region

                #increasing contrast
                neg_yuv = cv2.cvtColor(neg, cv2.COLOR_BGR2YUV) #converting to YUV
                neg_yuv[:,:,0] = cv2.equalizeHist(neg_yuv[:,:,0]) #equalize the histogram of the Y channel
                neg = cv2.cvtColor(neg_yuv, cv2.COLOR_YUV2BGR) #converting back to BGR

                #adding to array
                negatives.append(neg)

        negatives = np.array(negatives)
        np.save('negatives.npy', negatives)
    else:
        negatives = np.load('negatives.npy')
    
    return negatives

In [None]:
LISA_dir = 'C:/Users/Daniel/Documents/Stanford/2016-2017 - Q3 - Spring/Computer Vision/Python/Project/LISA/'
filenames, labels, upperLeft, lowerRight, mapping = build_data(LISA_dir)
images32 = build_signs(LISA_dir,filenames,lowerRight,upperLeft)
negatives = build_negatives(LISA_dir,8)

In [None]:
#creating a tensorflow graph to store the model
graph = tf.Graph()

#initializing model in the graph
with graph.as_default():
    # defining placeholders for inputs and labels
    images_ph = tf.placeholder(tf.float32, [None, 32, 32, 3])
    labels_ph = tf.placeholder(tf.int32, [None])

    #flattening input
    images_flat = tf.contrib.layers.flatten(images_ph)

    #defining fully connected layer
    logits = tf.contrib.layers.fully_connected(images_flat, 62, tf.nn.crelu) #generating logits

    #converting logits to label indices (int)
    predicted_labels = tf.argmax(logits, 1)

    #defining loss function. 
    loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels_ph, logits=logits))

    #creating training optimizer
    train = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss)

    #initializating optimizer to execute before training.
    init = tf.global_variables_initializer()

In [None]:
#BEGIN RECOGNITION SECTION

In [None]:
#creating session to run the graph
session = tf.Session(graph=graph)

#initializing variables
session.run([init]);

#declaring variables (index from i1 to i2 to only use some of the data for training)
labels_a = labels
images_a = images32

lossv = np.array([])
accuracyv = np.array([])

In [None]:
#training neural network
for i in range(51):
    #computing, recording loss
    _, loss_value = session.run([train, loss], feed_dict={images_ph: images_a, labels_ph: labels_a})
    lossv = np.append(lossv, loss_value)

    #picking ten random images
    sample_indexes = random.sample(range(len(images32)), 10)
    sample_images = [images32[i] for i in sample_indexes]
    sample_labels = np.array([labels[i] for i in sample_indexes])

    #running optimizer
    predicted = session.run([predicted_labels], feed_dict={images_ph: sample_images})[0]

    accuracyv = np.append(accuracyv,np.sum(sample_labels == predicted)/10)
    
    if i % 10 == 0:
        print("Loss: ", loss_value)

In [None]:
#plot accuracy and loss of training
x = np.arange(0,lossv.size)
plt.plot(x,lossv/np.max(lossv),x,accuracyv)
plt.legend(['Loss', 'Accuracy'],loc=4)
plt.xlabel('Training sessions')
plt.show()

In [None]:
#picking 10 random images
sample_indexes = random.sample(range(len(images32)), 500) #index images32 from i1 to i2 to use unseen data
sample_images = [images32[i] for i in sample_indexes]
sample_labels = np.array([labels[i] for i in sample_indexes])

#running optimizer
t0 = time.time()
predicted = session.run([predicted_labels], feed_dict={images_ph: sample_images})[0]
t1 = time.time()

print('Accuracy: ' + str(np.sum(sample_labels == predicted)/5) + "%")
print('Speed: ' + str(t1-t0))

#displaying predictions and true sign labels
fig = plt.figure(figsize=(10, 10))
for i in range(10):
    truth = sample_labels[i]
    prediction = predicted[i]
    plt.subplot(5, 2,1+i)
    plt.axis('off')
    color='green' if truth == prediction else 'red'
    plt.text(40, 10, "Truth: {0}\nGuess: {1}".format(mapping[int(truth)], mapping[prediction]), 
             fontsize=12, color=color)
    plt.imshow(sample_images[i])

In [None]:
#NEURAL DETECTOR BELOW

In [None]:
#creating session to run the graph
session = tf.Session(graph=graph)

#initializing variables
session.run([init]);

#declaring variables (index from i1 to i2 to only use some of the data for training)
labels_a = labels
images_a = images32

lossv = np.array([])
accuracyv = np.array([])

In [None]:
#training neural network
for i in range(11):
    #computing, recording loss
    _, loss_value = session.run([train, loss], feed_dict={images_ph: images_a, labels_ph: labels_a})
    lossv = np.append(lossv, loss_value)

    #picking ten random images
    sample_indexes = random.sample(range(len(images_a)), 10)
    sample_images = [images_a[i] for i in sample_indexes]
    sample_labels = np.array([labels_a[i] for i in sample_indexes])

    #running optimizer
    predicted = session.run([predicted_labels], feed_dict={images_ph: sample_images})[0]

    accuracyv = np.append(accuracyv,np.sum(sample_labels == predicted)/10)
    
    if i % 5 == 0:
        print("Loss: ", loss_value)

In [None]:
#plotting accuracy and loss of training
x = np.arange(0,lossv.size)
plt.plot(x,lossv/np.max(lossv),x,accuracyv)
plt.legend(['Loss', 'Accuracy'],loc=4)
plt.xlabel('Training sessions')
plt.show()

In [None]:
#run sliding window
im = cv2.imread(r'LISA\vid1\frameAnnotations-vid_cmp1.avi_annotations\stop_1323812801.avi_image6.png')
im_yuv = cv2.cvtColor(im, cv2.COLOR_BGR2YUV) #converting to YUV
im_yuv[:,:,0] = cv2.equalizeHist(im_yuv[:,:,0]) #equalize the histogram of the Y channel
im = cv2.cvtColor(im_yuv, cv2.COLOR_YUV2BGR) #converting back to BGR

stride = 8
windows = []
bboxes = []

for i in range(0, im.shape[0]-32, stride):
        for j in range(0, im.shape[0]-32, stride):
            window = im[i:i+32, j:j+32]
            windows.append(window)
            bboxes.append([i,j])
            
windows = np.array(windows)
bboxes = np.array(bboxes)

In [None]:
t0 = time.time()
predicted = session.run([predicted_labels], feed_dict={images_ph: windows})[0]
detections = np.where(predicted == 1)[0]
bboxes = bboxes[detections]
t1 = time.time()
print('Speed: ' + str(t1-t0))

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
for i in range(bboxes.shape[0]):
    ax.add_patch(
        patches.Rectangle(
            (bboxes[i,0], bboxes[i,1]),
            32,
            32,
            fill=False,
            edgecolor='yellow'
        )
    )
plt.imshow(im, 'gray')

In [None]:
#HAAR DETECTION BELOW

In [None]:
sign_cascade = cv2.CascadeClassifier('sign_classifier.xml')

t0 = time.time()
im = cv2.imread(r'LISA\vid1\frameAnnotations-vid_cmp1.avi_annotations\stop_1323812801.avi_image6.png')
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)

signs = sign_cascade.detectMultiScale(gray, 1.3, 5)
for (x,y,w,h) in signs:
    cv2.rectangle(im,(x,y),(x+w,y+h),(0,255,255),2)
    roi_gray = gray[y:y+h, x:x+w]
    roi_color = im[y:y+h, x:x+w]

t1 = time.time()
print('Speed: ' + str(t1-t0))

cv2.imshow('Image', im)
cv2.waitKey(0)
cv2.destroyAllWindows()

