In [187]:
#!pip install -r requirements.txt

import av
import cv2
import numpy as np
import skimage
from skimage.color import rgb2gray
from skimage.exposure import rescale_intensity
from PIL import Image, ImageDraw, ImageFont
from matplotlib import pyplot as plt

In [2]:
def load_frames(video_path):
    'Loads .avi video into array'
    
    frames = []
    v = av.open(video_path)
    for packet in v.demux():
        for frame in packet.decode():
            img = frame.to_image()
            arr = np.asarray(img)
            frames.append(arr)
    return frames

In [3]:
def equalize_intensity(img):
    p2, p98 = np.percentile(img, (0, 18))
    return rescale_intensity(img, in_range=(p2,p98))

In [4]:
def rgb_threshold(im, thresholds):
    'Thresholds RGB image by given RBG thresholds'
    
    c = im.copy()
    mask = c[:,:,0] > thresholds[0][0]
    for i, (l_thr, u_thr) in enumerate(thresholds):
        mask &= (c[:,:,i] > l_thr)
        mask &= (c[:,:,i] < u_thr)
    c[~mask] = (0,0,0)
    return c

In [5]:
def create_index_grid(image_shape, distance):
    'Creates an evenly spaced grid of coordinates across an image'
    
    indices = []
    for xi in range(image_shape[0]):
        for yi in range(image_shape[1]):
            if (xi%distance, yi%distance)==(0,0): 
                indices.append((xi, yi))            
    return indices

In [6]:
def neighbour_in_range(lower, upper, neighbour, _):
    'Helper fn for region growing'
    
    return lower < neighbour < upper

In [7]:
def collect_region(seed, visited, im, lower_threshold, upper_threshold, fn=neighbour_in_range, HIGHEST_PIX_VALUE=255):
    'Returns a region of pixel coordinate neighbours which satisfy the region criterion set by _fn_'

    detected = set([seed])
    region = set()

    x_min = y_min = 0
    x_max, y_max = im.shape
    
    while len(detected):
        
        pix = detected.pop()
        
        if pix in visited: continue
                
        pix_val = im[pix]
        
        x, y = pix
    
        for xi in range(max(x-1, x_min), min(x+2, x_max), 2):
            if ((xi, y)) in visited: continue
            if fn(lower_threshold, upper_threshold, im[xi, y], pix_val): detected.add((xi, y))
        for yi in range(max(y-1, y_min), min(y+2, y_max), 2):
            if ((x, yi)) in visited: continue
            if fn(lower_threshold, upper_threshold, im[x, yi], pix_val): detected.add((x, yi))
                
        region.add(pix)
        visited.add(pix)
        
    return list(region)

In [8]:
def collect_all_regions(seeds, im, min_region_size, max_region_size, l_thr, u_thr, fn=neighbour_in_range):
    'Runs collectRegion for every seed and returns a list of all connex regions in the image'
    
    regions = []
    visited = set()

    for seed in seeds:
        
        if seed in visited: continue
            
        region = collect_region(seed, visited, im, l_thr, u_thr, fn=fn)
        
        if min_region_size <= len(region) <= max_region_size: regions.append(region)
        
        
    return np.array(regions), visited

In [9]:
def mark_indices(im, indices, exaggerate=False):
    'Returns a copy of the image where indices are white'
    'Exaggerate to ease visualization'
    
    tp = im.copy()
    ind = np.array(indices)
    x, y = ind[:,0], ind[:,1]
    tp[x, y] = 255
    
    if exaggerate: 
        for i in range(-1,2):
            for j in range(-1,2):
                tp[x-i, y-j] = 255
    return tp

In [10]:
def rgb_to_binary(im):
    c = im.copy()
    grayscale = (rgb2gray(c)*256).astype('uint8')
    grayscale[grayscale != 0] = 1
    return grayscale

In [11]:
def locate_frame(region):
    max_x = max(region[:,0])
    max_y = max(region[:,1])
    min_x = min(region[:,0])
    min_y = min(region[:,1])
    
    return max_x, max_y, min_x, min_y

In [12]:
def draw_bw_frame(im_shape, max_x, max_y, min_x, min_y):
        
    g = np.zeros(im_shape)
    
    g[max_x-3:max_x+3, min_y:max_y] = 255
    g[min_x-3:min_x+3, min_y:max_y] = 255

    g[min_x:max_x, min_y-3:min_y+3] = 255
    g[min_x:max_x, max_y-3:max_y+3] = 255
    
    return g

In [13]:
def gray_to_color(im, color):
    colors = {'red':0, 'green':1, 'blue':2}
    assert (color in colors)
    
    c = colors[color]
    
    rgb_cell = [0,0,0]
    rgb_cell[c] = 255
    
    frame = np.array([[[0,0,0] for _ in range(im.shape[1])] for _ in range(im.shape[0])])
    
    frame[im>0] = rgb_cell
    
    return frame

In [14]:
def locate_rgb_regions(rgb_im, seed, min_size, max_size, l_thr, u_thr):
    c = rgb_im.copy()
    
    black_white = rgb_to_binary(c)*255
    seeds = create_index_grid(black_white.shape, seed)
    regions, _ = collect_all_regions(seeds, black_white, min_size, max_size, l_thr, u_thr)
    
    return regions

In [15]:
def overlap_frames(underlying, overlying):
    c = underlying.copy()
    c[np.where(overlying)] = overlying[np.where(overlying)]
    
    return c

In [16]:
def extract_candidate_frame(im, max_x, max_y, min_x, min_y):
    
    x_limit, y_limit = im.shape[:-1]
    
    width = max(max_x-min_x, max_y-min_y)

    x_delta = width - (max_x-min_x)
    min_x -= x_delta//2
    buffer = min(0, min_x)
    
    max_x += x_delta//2
    'If border point'
    if buffer: 
        max_x += (-buffer)
        min_x = 0
    else:
        buffer = max_x - (x_limit-1)
        if buffer > 0: 
            min_x -= buffer
            max_x = x_limit-1
    
    y_delta = width - (max_y-min_y)
    min_y -= y_delta//2
    buffer = min(0, min_y)

    max_y += y_delta//2
    'If border point'
    if buffer: 
        max_y += (-buffer)
        min_y = 0
    else:
        buffer = max_y - (y_limit-1)
        if buffer > 0: 
            min_y -= buffer
            max_y = y_limit-1
            
    img = im.copy()
    img = img[min_x:max_x, min_y:max_y]
        
    return np.array(img)

In [17]:
def add_colored_thumbnail(original_frame, candidate, candidate_validity):
    
    'Stretch the candidate'
    c = Image.fromarray(candidate).resize((128, 128))
    
    'Make a white or red background depending on candidate validity'
    if candidate_validity: 
        bg = np.array([[[255,255,255] for _ in range(128)] for _ in range(128)]).astype('uint8')
    else: 
        bg = np.array([[[255,0,0] for _ in range(128)] for _ in range(128)]).astype('uint8')
        
    bg = Image.fromarray(bg)
    
    'Blend the candidate image and the colored background'
    c = Image.blend(bg, c, alpha=0.5)
    
    'And apply it to the bottom right corner of the original frame'
    frame = original_frame.copy()
    frame[-128:, -128:] = c
       
    return frame

In [18]:
def crop_content(frame):

    f = frame.copy()
    max_x, max_y = f.shape[:-1]

    x_0, y_0 = (max_x, max_y)

    'Find content'
    for xi in range(max_x):
        for yi in range(max_y):
            if (f[xi, yi] != np.array([255,255,255])).any():
                x_0 = min(xi, x_0)
                y_0 = min(yi, y_0)

    x_1, y_1 = (x_0, y_0)
    for xi in range(max_x-1, x_0, -1):
        for yi in range(max_y-1, y_0, -1):
            if (f[xi, yi] != np.array([255,255,255])).any():
                x_1 = max(xi, x_1)
                y_1 = max(yi, y_1)
                
    return f[x_0:x_1, y_0:y_1]

In [19]:
def periferal_pixels(im, width, threshold=0):

    gray = rgb_to_binary(im)
    max_x, max_y = gray.shape

    perifery_pixels = sum(gray[0:width, 0:max_y].ravel()) + sum(gray[max_x-width:max_x, 0:max_y].ravel())
    perifery_pixels += sum(gray[0:max_x, 0:width].ravel()) + sum(gray[0:max_x, max_y-width:max_y].ravel())
    
    return perifery_pixels > threshold 

In [None]:
def calculator():
    equation = ' '
    s = ' '  
    while s != '=':
        s = yield s
        equation += s  
    yield eval(equation[:-1])

In [424]:
def visualize_equation(f):
    mask = f.copy()
    
    max_y, max_x = mask.shape[:-1]
    
    mask[:,:,:] = [0,0,0]
    mask[-64:, :-128, :] = [1, 1, 1] 
    
    mask = Image.fromarray(mask)
    draw = ImageDraw.Draw(mask)
    
    offset = 5
    font_size = 30
    font = ImageFont.truetype('arial', font_size)
    draw.text((offset, max_y-50),"Equation: ",(255,255,255), font=font)
    
    offset += 0.6*font_size*8
    
    frame, validity, symbol = (mask.copy(), False, '')
    
    while True:
        f = (yield frame)
        v = (yield validity)
        s = (yield symbol)        
        if v:
            draw.text((offset, max_y-50), s ,(255,255,255), font=font)
            offset += 0.6*font_size*1.5
        yield overlap_frames(f, np.array(mask))

In [425]:
def output_frames(frames, path):
    container = av.open(path, mode='w')
    
    stream = container.add_stream('mpeg4', rate=2)
    (h, w) = frames[0].shape[:-1]

    stream.width = w
    stream.height = h
    stream.pix_fmt = 'yuv420p'
    
    for f in frames:
        frame = av.VideoFrame.from_ndarray(f, format='rgb24')
        for packet in stream.encode(frame): container.mux(packet)
    for packet in stream.encode(): container.mux(packet)
    container.close()

__Program flow__

 * Load .avi movie frames into `frames` array.
 * Equalize intensity of the `frames`, store in `eq_frames`.
 * Assume the vehicle does not cover any number or operator in the first frame. Use it as a reference.
 * Filter the equalized frames for __red__ objects. Find the single red object (the arrow) in every frame, using region growing.
 * Draw a rectangular frame around the arrow for visualization purposes.
 * For every subsequent frame in `eq_frames`:
    * Fetch the area beneath the vehicle from the reference frame.
    * Call this area a `candidate` - that is - a frame that might be sent to the Neural Net for classification if the following is __True__:
       * It is not mostly white (avg pixel value > 254).
       * It does not contain part of the arrow.
       * It does not have any pixels set in its perifery (i.e. the entire region is encapsulated within the frame).
       * It contains at most three regions of __either__ black or blue color. Frames containing multiple colors are rejected.
    * Candidate is cropped and resized to 32x32 pixels if valid.
  

In [426]:
def classifier():
    'Mock classifier'
    im = []
    array = ['3', '3', '/', '2', '2', '+', '+', '+', '7', '*', '2', '2', '=']
    for a in array:
        im = (yield im)
        yield a

In [427]:
#def main():

# Mock classifier
clf = classifier()

# Feed me. Yields result whed fed '='
calc = calculator()
next(calc)

src_path = 'src/robot_parcours_1.avi'
frames = load_frames(src_path)

n = len(frames)
print('(main) Loaded {} frames'.format(n))

print('(main) Equalizing frame intensities')
eq_frames = [equalize_intensity(f) for f in frames]

# Use first frame as reference
reference_frame = eq_frames[0]
reference_region = set()

# Threshold frames to find the _red_ arrow
arrows = [rgb_threshold(f, ((180, 256), (-1,190), (-1,190))) for f in eq_frames]

# Black images containing colored rectangle indicating positing of arrow
arrow_rectangles = []

# Extract from the reference image the area beneath vehicle for every frame
candidates = []

# Boolean: False if candidate is not qualified for classification
candidate_validities = []

# Helper variables for equation integrity purposes
symbols = ' '
active_equation = True


for i, a in enumerate(arrows):
    
    print('(main) Interpreting frame {}'.format(i+1), end='\r')
    
    # Do not consider anything for classification after a '=' has been registered
    valid = active_equation
    
    # Symbol subject to change if a valid classification is made
    symbol = 'N'
    
    # Locate the arrow in this frame
    arrow_regions = locate_rgb_regions(a, 10, 1000, 3000, 250, 256)
    assert (len(arrow_regions)==1), 'Found no arrow in frame {}'.format(i)
    
    # Draw surrounding rectangle
    max_x, max_y, min_x, min_y = locate_frame(arrow_regions[0])
    frame = draw_bw_frame(a.shape[:-1], max_x, max_y, min_x, min_y)
    arrow_rectangles.append(gray_to_color(frame, 'green'))

    # Discard candidate if it contains part of the arrow
    if i == 0: reference_region = set([tuple(a) for a in arrow_regions[0]])
    valid &= len(set([tuple(a) for a in arrow_regions[0]])&reference_region)==0
        
    # Extract candidate frame of area beneath vehicle
    candidate = extract_candidate_frame(reference_frame, max_x, max_y, min_x, min_y)
    
    # Discard candidate if mostly empty
    if valid: 
        valid = sum(candidate.ravel()) / len(candidate.ravel())<254
        
        # Discard candidate if there exist objects on the border
        if valid: 
            valid = not periferal_pixels(candidate, 5)
            # 'Candidate invalid if it contains more than three objects (division operator)'
            #if valid:
            #    black_content = rgb_threshold(candidate, ((-1, 256), (-1, 256), (-1, 120)))
            #    black_objects = len(locate_rgb_regions(black_content, 2, 10, 1000, 2, 256))
            #    
            #    blue_content = rgb_threshold(candidate, ((-1, 120), (-1, 200), (120, 256)))
            #    blue_objects = len(locate_rgb_regions(blue_content, 2, 10, 1000, 2, 256))
            #    
            #    valid = ( (blue_objects in (1,2,3) and black_objects == 0) or
            #              (blue_objects == 0 and black_objects in (1,2,3))
            #            )
            #    print('pic {} validity: {}. Found {} black objects and {} blue objects'.format(i, valid, black_objects, blue_objects))

            # 'If found'
    
            if valid:

                valid = False
                # 'Tight crop to ease classification'
                candidate = crop_content(candidate)
                candidate = np.array(Image.fromarray(candidate).resize((32,32)))        

                # 'Fetch symbol from classifier'
                next(clf)
                symbol = clf.send(candidate)

                # 'Do not accept two identical symbols who are not separated by at least one invalid frame'
                if not symbol==symbols[-1]:

                    valid = True

                    # 'Send validated symbol to calculator'
                    result = calc.send(symbol)

                    # print('{} added to equation'.format(symbol))
                    if symbol == '=': active_equation = False
    
    # Candidate is not valid, and is resized - uncropped.      
    if not valid: candidate = np.array(Image.fromarray(candidate).resize((32,32)))    
    
    symbols += symbol
    
    # For visualization purposes
    candidates.append(candidate)
    candidate_validities.append(valid)
    
    
print('(main) Successfully evaluated all frames!   ')
print('(main) Visualizing arrow path')
# 'Add the surrounding rectangles to the original footage'
original_and_arrow_trace = [overlap_frames(f, a) for f, a in zip(frames, arrow_rectangles)]

print('(main) Visualizing candidate frames')
# 'Add colored thumbnail of candidate'
with_thumbnail = [add_colored_thumbnail(f, c, v) for f, c, v in zip(original_and_arrow_trace, candidates, candidate_validities)]

with_equation = []
viz = visualize_equation(reference_frame)

print('(main) Visualizing equation')
for f, v, s in zip(with_thumbnail, candidate_validities, symbols[1:]):
    next(viz)
    viz.send(f), viz.send(v)
    frame = viz.send(s)
    if s == '=':
        next(viz)
        viz.send(frame), viz.send(True)
        frame = viz.send(str(result))
    with_equation.append(frame)     

# 'Output'
output_frames(with_equation, 'out/test_equation.avi')

(main) Loaded 42 frames
(main) Equalizing frame intensities
(main) Successfully evaluated all frames!   
(main) Visualizing arrow path
(main) Visualizing candidate frames
(main) Visualizing equation


bitrate tolerance 128000 too small for bitrate 1024000, overriding


In [25]:
for i, (c, v) in enumerate(zip(candidates, candidate_validities)):
    if v:
        c = Image.fromarray(c)
        c.save('out/candidates/im{}.png'.format(i))