## Detection and tracking objects across the frames (video => video)

In [2]:
%load_ext autoreload
%autoreload 2

from yolo3_torch.lib import *
model_data = 'yolo3_torch/model_data'

model_params = {
    'fileConfig': f'{model_data}/yolov3.cfg',
    'fileYolo3Weights': f'{model_data}/yolov3.weights',
    'fileClasses': f'{model_data}/object.classes',
    'filePallete': f'{model_data}/pallete',
    'inpDim': 416,
    'confidence': 0.5,
    'nmsThresh': 0.4,
    'cachSize': 7,
    'fps': 30
}

classes = load_classes(model_params['fileClasses'])
model_params['numClasses'] = len(classes)

def detect_objects_with_yolo3(frameId, frame, model, CUDA, model_params):
    frameInp = image_to_model_input(frame, model_params['inpDim'])
    
    start = time.time()
    with torch.no_grad():
        pred = model(Variable(frameInp), CUDA)
    end = time.time()
    print("Frame: {0:6d} predicted in {1:6.3f} seconds".format(int(frameId), end - start))

    res = get_results(
        prediction = pred,
        confidence = model_params.get('confidence', 0.5), 
        numClasses = model_params['numClasses'],
        nmsConf = model_params.get('nmsThresh', 0.4))
    
    if res is not None:
        res = rescaling_to_original(res, (frame.shape[1], frame.shape[0]), model_params['inpDim'])
        res[:,0] += frameId
        res = add_column_for_object_id(res)

    return res

def annotation(data_params, model_params):

    CUDA = torch.cuda.is_available()
    print("CUDA is {} available.".format("" if CUDA else "NOT"))
    model = upload_model(model_params['fileConfig'], model_params['fileYolo3Weights'], model_params['inpDim'], CUDA)
    print("Model is loaded.")
    
    results = None
    
    frameId = 0.
    setObjId(0.)

    fps = model_params.get('fps',30)

    fileOut = data_params.get('fileDataOut', None)
    colors = None if fileOut is None else pkl.load(open(model_params['filePallete'], "rb"))
    name_len = data_params.get('name_len', None)

    fileInp = data_params['fileDataInp']

    if fileInp.endswith('.jpg'):
        frame = cv2.imread(fileInp)
        results = detect_objects_with_yolo3(frameId, frame, model, CUDA, model_params)
        if results is not None:
            fw, fh = frame.shape[1], frame.shape[0]
            tracking_params = get_params(fps, fw, fh)
            self_match_frame(results, tracking_params)
            print(logging_classes(frameId, results, classes))
            if fileOut is not None:
                for obj in results:
                    mark_one_object(frame, obj, colors, classes, name_len)
                cv2.imwrite(fileOut, frame)

    elif fileInp.endswith('.mp4'):
        videoInp = cv2.VideoCapture(fileInp)
        fw = int(videoInp.get(cv2.CAP_PROP_FRAME_WIDTH))
        fh = int(videoInp.get(cv2.CAP_PROP_FRAME_HEIGHT))

        tracking_params = get_params(fps, fw, fh)
        cachSize = model_params.get('cachSize', 7)

        videoOut = None if fileOut is None else cv2.VideoWriter(fileOut, cv2.VideoWriter_fourcc(*'H264'), fps, (fw, fh))

        dirDetected = data_params.get('dirDetected', None)
        clean_and_mk_dir(dirDetected)
        dirFrames = data_params.get('dirFrames', None)
        clean_and_mk_dir(dirFrames)

        startId = int(fps * float(data_params.get('startSecond', 0.)))
        endId = int(startId -1 + fps * float(data_params.get('durationSeconds', 0.)))

        cachFrm = []

        start0 = time.time()

        while True:
            ok, frame = videoInp.read()
            if not ok: 
                break
            if frameId < startId:
                frameId += 1
                continue

            res = detect_objects_with_yolo3(frameId, frame, model, CUDA, model_params)
            if res is None:
                frameId += 1
                continue

            print(logging_classes(frameId, res, classes))

            results = res if results is None else torch.cat((results, res))
            cachFrm.append((frameId,frame,res))


            if len(cachFrm) > cachSize:
                frameId_, frame_, _ = cachFrm.pop(0)
                # Mark objects and save to output (video/dir).
                if videoOut is not None:
                    visualize_objects(frameId_, frame_, results, videoOut, colors, classes, dirFrames, dirDetected, name_len)

            # Tracking objects across frames.
            find_matches(cachFrm, tracking_params, results)
            results = restore_miss_detection(cachFrm, results)

            frameId += 1
            if startId <= endId and frameId > endId:
                break

        while cachFrm:
            frameId_, frame_, _ = cachFrm.pop(0)
            if videoOut is not None:
                # Mark objects and save to output (video/dir).
                visualize_objects(frameId_, frame_, results, videoOut, colors, classes, dirFrames, dirDetected, name_len)

        print("Aver. calc.: {0} sec.".format((time.time()-start0)/(frameId-1)))

        videoInp.release()
        if videoOut is not None:
            videoOut.release()

    # if results is not None:
    #     torch.save(results, fileTracked)        
    print("Done.")
    # results: [frameId, xmin, ymin, xmax, ymax, conf., conf., objId, objClassId]
    return results

def convert_to_detected_objects(results):
    return [{
        'frameId': int(r[0]),
        'xmin': int(r[1]),
        'ymin': int(r[2]),
        'xmax': int(r[3]),
        'ymax': int(r[4]),
        'conf1': float(r[5]),
        'conf2': float(r[6]),
        'objId': int(r[7]),
        'class': classes[int(r[8])]
    } for r in results] if results is not None else None

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Download from Youtube

In [4]:
url = 'https://youtu.be/UM0hX7nomi8'
file = download_from_youtube(url, 'tmp')
print("file: {}".format(file))

[youtube] UM0hX7nomi8: Downloading webpage
[youtube] UM0hX7nomi8: Downloading video info webpage
[download] tmp/UM0hX7nomi8.mp4 has already been downloaded and merged
file: tmp/UM0hX7nomi8.mp4


In [5]:
data_params = {
    'fileDataInp': file,
    'fileDataOut': add_suffix(file, '_detect'),
    'dirFrames': 'tmp/{}_frames'.format(get_name(file)),
    'dirDetected':  'tmp/{}_detect'.format(get_name(file)),
    'name_len': None, # length of object class name to show on marked frame,
    'startSecond': 0,
    'durationSeconds': 1
}

results = annotation(data_params, model_params)

CUDA is NOT available.
Model is loaded.
Frame:      0 predicted in  0.929 seconds
Frame:      0 Detected: car, car, car, car, car, car
Frame:      1 predicted in  0.916 seconds
Frame:      1 Detected: car, car, car, car, car, car
Frame:      2 predicted in  0.923 seconds
Frame:      2 Detected: car, car, car, car, car, car
Frame:      3 predicted in  0.880 seconds
Frame:      3 Detected: car, car, car, car, car, car, truck
Frame:      4 predicted in  0.845 seconds
Frame:      4 Detected: car, car, car, car, car, car
Frame:      5 predicted in  0.848 seconds
Frame:      5 Detected: car, car, car, car, car, car
Frame:      6 predicted in  0.846 seconds
Frame:      6 Detected: car, car, car, car, car, car
Frame:      7 predicted in  0.857 seconds
Frame:      7 Detected: car, car, car, car, car, car
Frame:      8 predicted in  0.876 seconds
Frame:      8 Detected: car, car, car, car, car, car, car
Frame:      9 predicted in  0.972 seconds
Frame:      9 Detected: car, car, car, car, car, ca

### Download image

In [None]:
url = "https://nails.newsela.com/s3/newsela-media/article_media/2017/10/self-driving-cars-nyc-6abce23d.jpg?crop=0%2C128%2C1366%2C896&height=497&horizontal_focal_point=center&vertical_focal_point=center&width=885"
frame = download_image(url)
fileOut = 'tmp/test.jpg'
cv2.imwrite(fileOut, frame)

In [None]:
data_params = {
    'fileDataInp': 'tmp/test.jpg',
    'fileDataOut': 'tmp/test_detect.jpg'
}

results2 = annotation(data_params, model_params)