In [None]:
!python /opt/intel/openvino/deployment_tools/tools/model_downloader/downloader.py --list /opt/intel/openvino/inference_engine/demos/python_demos/colorization_demo/models.lst

In [None]:
!python /opt/intel/openvino/deployment_tools/tools/model_downloader/converter.py --name colorization-v2

In [None]:
from openvino.inference_engine import IECore
import cv2 as cv
import numpy as np
import os
from argparse import ArgumentParser, SUPPRESS
import logging as log
import sys
from PIL import Image
import PIL
import io
import IPython.display
from IPython.display import clear_output


def infer_video(video_path):
    #args = build_arg().parse_args()
    #coeffs = args.coeffs
    coeffs = "public/colorization-v2/colorization-v2.npy"

    # mean is stored in the source caffe model and passed to IR
    verbose = False
    log.basicConfig(format="[ %(levelname)s ] %(message)s",
                    level=log.INFO if not verbose else log.DEBUG, stream=sys.stdout)

    log.debug("Load network")
    ie = IECore()
    model_path = "public/colorization-v2/FP32/colorization-v2.xml"
    load_net = ie.read_network(model_path, os.path.splitext(model_path)[0] + ".bin")
    load_net.batch_size = 1
    exec_net = ie.load_network(network=load_net, device_name="CPU")

    assert len(load_net.inputs) == 1, "Expected number of inputs is equal 1"
    input_blob = next(iter(load_net.inputs))
    input_shape = load_net.inputs[input_blob].shape
    assert input_shape[1] == 1, "Expected model input shape with 1 channel"

    assert len(load_net.outputs) == 1, "Expected number of outputs is equal 1"
    output_blob = next(iter(load_net.outputs))
    output_shape = load_net.outputs[output_blob].shape
    assert output_shape == [1, 313, 56, 56], "Shape of outputs does not match network shape outputs"

    _, _, h_in, w_in = input_shape

    input_path = video_path
    try:
        input_source = int(input_path)
    except ValueError:
        input_source = input_path

    cap = cv.VideoCapture(input_source)
    if not cap.isOpened():
        assert "{} not exist".format(input_source)

    color_coeff = np.load(coeffs).astype(np.float32)
    assert color_coeff.shape == (313, 2), "Current shape of color coefficients does not match required shape"

    while True:
        log.debug("#############################")
        hasFrame, original_frame = cap.read()
        if not hasFrame:
            break
        (h_orig, w_orig) = original_frame.shape[:2]

        log.debug("Preprocessing frame")
        if original_frame.shape[2] > 1:
            frame = cv.cvtColor(cv.cvtColor(original_frame, cv.COLOR_BGR2GRAY), cv.COLOR_GRAY2RGB)
        else:
            frame = cv.cvtColor(original_frame, cv.COLOR_GRAY2RGB)

        img_rgb = frame.astype(np.float32) / 255
        img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab)
        img_l_rs = cv.resize(img_lab.copy(), (w_in, h_in))[:, :, 0]

        log.debug("Network inference")
        res = exec_net.infer(inputs={input_blob: [img_l_rs]})

        update_res = (res[output_blob] * color_coeff.transpose()[:, :, np.newaxis, np.newaxis]).sum(1)

        log.debug("Get results")
        out = update_res.transpose((1, 2, 0))
        out = cv.resize(out, (w_orig, h_orig))
        img_lab_out = np.concatenate((img_lab[:, :, 0][:, :, np.newaxis], out), axis=2)
        img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1)

        no_show = False
        if not no_show:
            log.debug("Show results")
            imshowSize = (320, 240)
            original_image = cv.resize(original_frame, imshowSize)
            grayscale_image = cv.resize(frame, imshowSize)
            colorize_image = (cv.resize(img_bgr_out, imshowSize) * 255).astype(np.uint8)
            lab_image = (cv.resize(img_lab_out, imshowSize)).astype(np.uint8)

            original_image = cv.putText(original_image, 'Original', (25, 50),
                                        cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA)
            grayscale_image = cv.putText(grayscale_image, 'Grayscale', (25, 50),
                                        cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA)
            colorize_image = cv.putText(colorize_image, 'Colorize', (25, 50),
                                        cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA)
            lab_image = cv.putText(lab_image, 'LAB interpetation', (25, 50),
                                   cv.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, cv.LINE_AA)

            #ir_image = [cv.hconcat([original_image, grayscale_image]),
            #            cv.hconcat([lab_image, colorize_image])]
            ir_image = [cv.hconcat([original_image, colorize_image])]
            final_image = cv.vconcat(ir_image)
            final_image = cv.cvtColor(final_image, cv.COLOR_BGR2RGB)
            #cv.imshow('Colorization Demo', final_image)
            
            clear_output(wait=True)
            
            f = io.BytesIO()
            PIL.Image.fromarray(final_image).save(f, 'jpeg')
            IPython.display.display(IPython.display.Image(data=f.getvalue()))
        
            if not cv.waitKey(1) < 0:
                break


In [None]:
infer_video("bw.mp4")