In [3]:
#!pip install -r requirements.txt

import av
import cv2
import numpy as np
import skimage
from skimage.color import rgb2gray
from skimage.exposure import rescale_intensity
from PIL import Image, ImageDraw, ImageFont
from matplotlib import pyplot as plt
from time import time

In [2]:
def load_frames(video_path):
    'Loads .avi video into array'
    
    frames = []
    v = av.open(video_path)
    for packet in v.demux():
        for frame in packet.decode():
            img = frame.to_image()
            arr = np.asarray(img)
            frames.append(arr)
    return frames

In [3]:
def equalize_intensity(img):
    p2, p98 = np.percentile(img, (0, 18))
    return rescale_intensity(img, in_range=(p2,p98))

In [4]:
def rgb_threshold(im, thresholds):
    'Thresholds RGB image by given RBG thresholds'
    
    c = im.copy()
    mask = c[:,:,0] > thresholds[0][0]
    for i, (l_thr, u_thr) in enumerate(thresholds):
        mask &= (c[:,:,i] > l_thr)
        mask &= (c[:,:,i] < u_thr)
    c[~mask] = (0,0,0)
    return c

In [5]:
def create_index_grid(image_shape, distance):
    'Creates an evenly spaced grid of coordinates across an image'
               
    return [(x, y) for x in range(0, image_shape[0], distance) for y in range(0, image_shape[1], distance)]

In [6]:
def collect_region(seed, visited, im, lower_threshold, upper_threshold):
    'Returns a region of pixel coordinate neighbours withing thresholds'

    detected = set([seed])
    region = set()

    x_min = y_min = 0
    x_max, y_max = im.shape
    
    while len(detected):
        
        pix = detected.pop()
        
        if pix in visited: continue
                
        x, y = pix
    
        for xi in range(max(x-1, x_min), min(x+2, x_max), 2):
            if ((xi, y)) in visited: continue
            if (lower_threshold < im[xi, y] < upper_threshold): detected.add((xi, y))
        for yi in range(max(y-1, y_min), min(y+2, y_max), 2):
            if ((x, yi)) in visited: continue
            if (lower_threshold < im[x, yi] < upper_threshold): detected.add((x, yi))
                
        region.add(pix)
        visited.add(pix)
        
    return list(region)

In [7]:
def collect_all_regions(seeds, im, min_region_size, max_region_size, l_thr, u_thr):
    'Runs collectRegion for every seed and returns a list of all connex regions in the image'
    
    regions = []
    visited = set()

    for seed in seeds:
        
        if seed in visited: continue
            
        region = collect_region(seed, visited, im, l_thr, u_thr)
        
        if min_region_size <= len(region) <= max_region_size: regions.append(region)
        
    return np.array(regions)

In [8]:
def rgb_to_binary(im):
    c = im.copy()
    grayscale = (rgb2gray(c)*256).astype('uint8')
    grayscale[grayscale != 0] = 1
    return grayscale

In [9]:
def locate_frame(region):
    max_x = max(region[:,0])
    max_y = max(region[:,1])
    min_x = min(region[:,0])
    min_y = min(region[:,1])
    
    return max_x, max_y, min_x, min_y

In [10]:
def draw_bw_rectangle(im_shape, max_x, max_y, min_x, min_y):
        
    g = np.zeros(im_shape)
    
    g[max_x-3:max_x+3, min_y:max_y] = 255
    g[min_x-3:min_x+3, min_y:max_y] = 255

    g[min_x:max_x, min_y-3:min_y+3] = 255
    g[min_x:max_x, max_y-3:max_y+3] = 255
    
    return g

In [11]:
def gray_to_color(gray_frame, color):
    COLORS = {'red':0, 'green':1, 'blue':2}
    assert (color in COLORS)
    
    c = COLORS[color]
    rgb_cell = [0,0,0]
    rgb_cell[c] = 255
    
    new_shape = (gray_frame.shape[0], gray_frame.shape[1], 3)
    rgb_frame = np.zeros(new_shape).astype('uint8')  
    
    rgb_frame[np.where(gray_frame!=0)] = rgb_cell
    
    return rgb_frame

In [12]:
def locate_rgb_regions(rgb_im, seed, min_size, max_size, l_thr, u_thr):
    c = rgb_im.copy()
    black_white = rgb_to_binary(c)*255
    seeds = create_index_grid(black_white.shape, seed)
    regions = collect_all_regions(seeds, black_white, min_size, max_size, l_thr, u_thr)
    return regions

In [13]:
def overlap_frames(underlying, overlying):
    c = underlying.copy()
    c[np.where(overlying)] = overlying[np.where(overlying)]
    
    return c

In [14]:
def extract_candidate_frame(im, max_x, max_y, min_x, min_y):
    
    x_limit, y_limit = im.shape[:-1]
    
    width = max(max_x-min_x, max_y-min_y)

    x_delta = width - (max_x-min_x)
    min_x -= x_delta//2
    buffer = min(0, min_x)
    
    max_x += x_delta//2
    'If border point'
    if buffer: 
        max_x += (-buffer)
        min_x = 0
    else:
        buffer = max_x - (x_limit-1)
        if buffer > 0: 
            min_x -= buffer
            max_x = x_limit-1
    
    y_delta = width - (max_y-min_y)
    min_y -= y_delta//2
    buffer = min(0, min_y)

    max_y += y_delta//2
    'If border point'
    if buffer: 
        max_y += (-buffer)
        min_y = 0
    else:
        buffer = max_y - (y_limit-1)
        if buffer > 0: 
            min_y -= buffer
            max_y = y_limit-1
            
    img = im.copy()
    img = img[min_x:max_x, min_y:max_y]
        
    return np.array(img)

In [15]:
def add_colored_thumbnail(original_frame, candidate, candidate_validity):
    
    'Stretch the candidate'
    c = Image.fromarray(candidate).resize((128, 128))
    
    'Make a white or red background depending on candidate validity'
    if candidate_validity: 
        bg = np.array([[[255,255,255] for _ in range(128)] for _ in range(128)]).astype('uint8')
    else: 
        bg = np.array([[[255,0,0] for _ in range(128)] for _ in range(128)]).astype('uint8')
        
    bg = Image.fromarray(bg)
    
    'Blend the candidate image and the colored background'
    c = Image.blend(bg, c, alpha=0.5)
    
    'And apply it to the bottom right corner of the original frame'
    frame = original_frame.copy()
    frame[-128:, -128:] = c
       
    return frame

In [16]:
def crop_content(frame):

    f = frame.copy()
    max_x, max_y = f.shape[:-1]

    x_0, y_0 = (max_x, max_y)

    'Find content'
    for xi in range(max_x):
        for yi in range(max_y):
            if (f[xi, yi] != np.array([255,255,255])).any():
                x_0 = min(xi, x_0)
                y_0 = min(yi, y_0)

    x_1, y_1 = (x_0, y_0)
    for xi in range(max_x-1, x_0, -1):
        for yi in range(max_y-1, y_0, -1):
            if (f[xi, yi] != np.array([255,255,255])).any():
                x_1 = max(xi, x_1)
                y_1 = max(yi, y_1)
                
    return f[x_0:x_1, y_0:y_1]

In [17]:
def periferal_pixels(im, width, threshold=0):

    gray = rgb_to_binary(im)
    max_x, max_y = gray.shape

    perifery_pixels = sum(gray[0:width, 0:max_y].ravel()) + sum(gray[max_x-width:max_x, 0:max_y].ravel())
    perifery_pixels += sum(gray[0:max_x, 0:width].ravel()) + sum(gray[0:max_x, max_y-width:max_y].ravel())
    
    return perifery_pixels > threshold 

In [18]:
def calculator():
    equation = ' '
    s = ' '  
    while s != '=':
        s = yield s
        equation += s  
    yield eval(equation[:-1])

In [19]:
def visualize_equation(shape):
    mask = np.zeros(shape).astype('uint8')
    
    max_y, max_x = mask.shape[:-1]
    
    mask[-64:, :-128, :] = [1, 1, 1] 
    
    mask = Image.fromarray(mask)
    draw = ImageDraw.Draw(mask)
    
    offset = 5
    font_size = 30
    font = ImageFont.truetype('arial', font_size)
    draw.text((offset, max_y-50),"Equation: ",(255,255,255), font=font)
    
    offset += 0.6*font_size*8
    
    frame, validity, symbol = (mask.copy(), False, '')
    
    while True:
        f = (yield frame)
        v = (yield validity)
        s = (yield symbol)        
        if v:
            draw.text((offset, max_y-50), s ,(255,255,255), font=font)
            offset += 0.6*font_size*1.5
        yield overlap_frames(f, np.array(mask))

In [20]:
def poke_visualizer(viz, im, symbol, valid, result):
    
    next(viz)
    viz.send(im), viz.send(valid)
    frame = viz.send(symbol)
    if symbol == '=':
        next(viz)
        viz.send(im), viz.send(True)
        frame = viz.send(str(result))
    return frame

In [21]:
def make_video(frames, path):
    container = av.open(path, mode='w')
    
    stream = container.add_stream('mpeg4', rate=2)
    (h, w) = frames[0].shape[:-1]

    stream.width = w
    stream.height = h
    stream.pix_fmt = 'yuv420p'
    
    for f in frames:
        frame = av.VideoFrame.from_ndarray(f, format='rgb24')
        for packet in stream.encode(frame): container.mux(packet)
    for packet in stream.encode(): container.mux(packet)
    container.close()

__Program flow__

 * Load .avi movie frames into `frames` array.
 * Equalize intensity of the `frames`, store in `eq_frames`.
 * Assume the vehicle does not cover any number or operator in the first frame. Use it as a reference.
 * Filter the equalized frames for __red__ objects. Find the single red object (the arrow) in every frame, using region growing.
 * Draw a rectangular frame around the arrow for visualization purposes.
 * For every subsequent frame in `eq_frames`:
    * Fetch the area beneath the vehicle from the reference frame.
    * Call this area a `candidate` - that is - a frame that might be sent to the Neural Net for classification if the following is __True__:
       * It is not mostly white (avg pixel value > 254).
       * It does not contain part of the arrow.
       * It does not have any pixels set in its perifery (i.e. the entire region is encapsulated within the frame).
       * It contains at most three regions of __either__ black or blue color. Frames containing multiple colors are rejected.
    * Candidate is cropped and resized to 32x32 pixels if valid.
  

In [22]:
def classifier():
    'Mock classifier'
    im = []
    array = ['3', '3', '/', '2', '2', '+', '+', '+', '7', '*', '2', '2', '=']
    for a in array:
        im = (yield im)
        yield a

In [23]:
def main():

    src_path = 'src/robot_parcours_1.avi'
    frames = load_frames(src_path)

    n = len(frames)
    print('(main) Loaded {} frames'.format(n))

    # Mock classifier
    clf = classifier()

    # Feed me. Yields result whed fed '='
    calc = calculator()
    next(calc)

    # Keeps track of the equation visualization state
    viz = visualize_equation(frames[0].shape)

    # Helper variables for equation integrity purposes
    result = 0
    symbols = ' '
    active_equation = True

    # For video output
    output_frames = []

    print('   Frame\t   Valid\t   Prepared\t   Time  \t ')
    print('+---------------------------------------------------------------+')

    for i, f in enumerate(frames):
        
        tic = time()
        print('   {}'.format(i+1), end='\t\t')
        
        # Do not consider anything for classification after a '=' has been registered
        valid = active_equation

        # Symbol subject to change if a valid classification is made
        symbol = 'N'

        # Equalize intensity and filter the (red) arrow
        eqf = equalize_intensity(f)
        arrow = rgb_threshold(eqf, ((180, 256), (-1,190), (-1,190)))

        # Locate arrow indices using region growing
        arrow_regions = locate_rgb_regions(arrow, 15, 1000, 3000, 250, 256)
        assert (len(arrow_regions)==1), 'Found no arrow in frame {}'.format(i)

        # Draw surrounding rectangle
        max_x, max_y, min_x, min_y = locate_frame(arrow_regions[0])
        bw_rectangle = draw_bw_rectangle(arrow.shape[:-1], max_x, max_y, min_x, min_y)
        rgb_rectangle = gray_to_color(bw_rectangle, 'green')

        # Assume all symbols are visible in first frame
        # Use this as a reference
        if i == 0: 
            reference_region = set([tuple(a) for a in arrow_regions[0]])
            reference_frame = eqf

        # Extract candidate from reference frame, 
        # corresponding to the area beneath vehicle in this frame
        candidate = extract_candidate_frame(reference_frame, max_x, max_y, min_x, min_y)

        # Discard candidate if there exist objects on the border
        valid = not periferal_pixels(candidate, 5)
        
        if valid: 
            # Discard candidate if mostly empty
            valid = sum(candidate.ravel()) / len(candidate.ravel())<254

            if valid: 
                # A frame is not fit for classification if it contains part of the arrow
                valid = len(set([tuple(a) for a in arrow_regions[0]])&reference_region)==0
                
                if valid:
                    # Crop to ease classification, resize to 32x32
                    candidate = crop_content(candidate)
                    candidate = np.array(Image.fromarray(candidate).resize((32,32)))        

                    # MOCK CLASSIFIER #

                    next(clf)
                    symbol = clf.send(candidate)

                    # MOCK CLASSIFIER #


                    # Render candidate invalid if two frames _in a row_ yield the same symbol
                    if symbol == symbols[-1]: valid = False
                    else:
                        # Send valid symbol to calculator
                        result = calc.send(symbol)
                        # = terminates the equation and implies no need for further classification
                        active_equation = (symbol != '=')

        symbols += symbol
        print('   {}  '.format(valid), end='\t')

        ## PREPARE OUTPUT FRAME ##
        with_arrow_rect = overlap_frames(f, rgb_rectangle)
        print('   .', end=' ')
        with_thumbnail = add_colored_thumbnail(with_arrow_rect, candidate, valid)
        print('.', end=' ')
        with_equation = poke_visualizer(viz, with_thumbnail, symbol, valid, result)
        print('.', end=' ')
        output_frames.append(with_equation)
        print('.', end='\t')
        toc = time()    
        print(f'    {toc-tic:.2f}')

    make_video(output_frames, 'out/effective.avi')

In [24]:
tic = time()
main()
toc = time()

(main) Loaded 42 frames
   Frame	   Valid	   Prepared	   Time  	 
+---------------------------------------------------------------+
   1		   False  	   . . . .	    0.21
   2		   False  	   . . . .	    0.20
   3		   False  	   . . . .	    0.20
   4		   False  	   . . . .	    0.21
   5		   False  	   . . . .	    0.20
   6		   False  	   . . . .	    0.20
   7		   False  	   . . . .	    0.22
   8		   False  	   . . . .	    0.24
   9		   True  	   . . . .	    0.23
   10		   False  	   . . . .	    0.25
   11		   False  	   . . . .	    0.19
   12		   False  	   . . . .	    0.19
   13		   True  	   . . . .	    0.25
   14		   False  	   . . . .	    0.19
   15		   False  	   . . . .	    0.22
   16		   False  	   . . . .	    0.19
   17		   False  	   . . . .	    0.19
   18		   True  	   . . . .	    0.24
   19		   False  	   . . . .	    0.24
   20		   False  	   . . . .	    0.21
   21		   True  	   . . . .	    0.32
   22		   False  	   . . . .	    0.26
   23		   False  	   . . . .	    0.24
   24		

bitrate tolerance 128000 too small for bitrate 1024000, overriding


. .	    0.21


In [25]:
toc-tic

9.866992950439453