In [360]:
import cv2
import glob
import numpy as np 
import pandas as pd
from matplotlib import pyplot as plt
%matplotlib inline

import seaborn as sns

from sklearn.neural_network import MLPRegressor

from mystic.penalty import quadratic_inequality
from mystic.solvers import diffev2
from mystic.monitors import VerboseMonitor

from src.camera import Camera
from src.utils.cube import generate_cube, draw_cube
from src.utils.functions import window
from src.utils.reprojection import reprojection_distance


In [385]:
class Frame(object):
    """ Frame object that stores all possible information needed """
    MATCHER_THREHSOLD = 0.55
    
    def __init__(self, image):
        self.image = np.copy(image)
        self.R = np.array([])
        self.t = np.array([])
        self.points_3d = np.array([])
        self.points_2d = np.array([])
        self.kp = np.array([])
        self.des = np.array([])
        
    def __str__(self):
        return "Frame:\n>> R:{}\n>> t:{}\n>> 3D:{}\n>> 2D:{}\n>> kp:{}\n>> des:{}\n".format(
            self.R.shape,
            self.t.shape,
            self.points_3d.shape,
            self.points_2d.shape,
            len(self.kp),
            len(self.des)
        )

    def create_camera_and_project(self, obj):
        camera = Camera.create(self.R, self.t)
        return camera.project(obj)
    
    def bundle_adjustment(self):
        """ Bundle adjust frame """

        R = cv2.Rodrigues(self.R)[0]
        t = self.t
        initial = [R[0], R[1], R[2], t[0], t[1], t[2]]
        initial = [x[0] for x in initial]
        
        mon = VerboseMonitor(100)
        result = diffev2(
            reprojection_distance, 
            x0=initial, 
            args=(self.points_3d, self.points_2d),
            npop=10, 
            gtol=200,
            disp=False, 
            full_output=True, 
            itermon=mon, 
            maxiter=1000
        )
        
        result = result[0] #optimized.x
        r1, r2, r3, t1, t2, t3 = result[0], result[1], result[2], result[3], result[4], result[5]

        R_optimized = cv2.Rodrigues(np.array([r1, r2, r3]))[0]
        t_optimized = np.array([[t1], [t2], [t3]])
        
        optimized_frame = Frame(self.image)
        optimized_frame.R = R_optimized
        optimized_frame.t = t_optimized
        optimized_frame.points_3d = self.points_3d
        optimized_frame.points_2d = self.points_2d
        optimized_frame.kp = self.kp
        optimized_frame.des = self.des
        
        return optimized_frame

In [392]:
class Scene(object):
    """ Scene utils """
    @classmethod
    def get_rt_from_essential(cls, pts1, pts2):
        E, mask = cv2.findEssentialMat(
            pts1, 
            pts2, 
            focal=1.0, 
            pp=(486.2, 265.59), 
            method=cv2.RANSAC, 
            prob=0.999, 
            threshold=1.0
        )

        points, R, t, mask = cv2.recoverPose(
            E, 
            pts1, 
            pts2, 
            focal=1.0,
            pp=(486.2, 265.59),
            mask=mask
        )
        
        return R, t
    
    
    @classmethod
    def recover_third_rt(cls, pts1, camera1, pts2, camera2, pts3):
        K, dist = Camera().K, Camera().distortion 
        undistorted_points_1 = cv2.undistortPoints(pts1, K, dist, R=camera1.R, P=camera1.get_projection_matrix())
        undistorted_points_2 = cv2.undistortPoints(pts2, K, dist, R=camera2.R, P=camera2.get_projection_matrix())

        points_3d_homog = cv2.triangulatePoints(
            camera1.get_projection_matrix(),
            camera2.get_projection_matrix(),
            undistorted_points_1,
            undistorted_points_2
        )
        points_3d = cv2.convertPointsFromHomogeneous(points_3d_homog.T)
                
        _, rvecs, tvecs, inliers = cv2.solvePnPRansac(points_3d, pts3, Camera().K, Camera().distortion) 
        return cv2.Rodrigues(rvecs)[0], tvecs, points_3d
    
    @classmethod
    def initial_triangulation(cls, image1, image2, image3):
        """ 1. Initial stereo camera reconstruction
            2. Third camera triangulation
        """
        sift = cv2.xfeatures2d.SIFT_create()
        FLANN_INDEX_KDTREE = 0
        index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
        search_params = dict(checks = 50)
        flann = cv2.FlannBasedMatcher(index_params, search_params)

        kp1, des1 = sift.detectAndCompute(image1, None)
        kp2, des2 = sift.detectAndCompute(image2, None)
        kp3, des3 = sift.detectAndCompute(image3, None)

        matches12 = flann.knnMatch(des1, des2, k=2)
        matches13 = flann.knnMatch(des1, des3, k=2)

        qidx12 = [m.queryIdx for m,n in list(filter(lambda x: x[0].distance < Frame.MATCHER_THREHSOLD*x[1].distance, matches12))]
        qidx13 = [m.queryIdx for m,n in list(filter(lambda x: x[0].distance < Frame.MATCHER_THREHSOLD*x[1].distance, matches13))]

        common_query_indexes = list(set(qidx12).intersection(qidx13))

        # match between 3 images
        good_12_matches = []
        for m, n in matches12:
            if m.queryIdx in common_query_indexes:
                good_12_matches.append(m)

        good_13_matches = []
        for m, n in matches13:
            if m.queryIdx in common_query_indexes:
                good_13_matches.append(m)

        pts1 = np.float64([kp1[m.queryIdx].pt for m in good_12_matches]).reshape(-1,1,2)
        pts2 = np.float64([kp2[m.trainIdx].pt for m in good_12_matches]).reshape(-1,1,2)
        pts3 = np.float64([kp3[m.trainIdx].pt for m in good_13_matches]).reshape(-1,1,2)
        
        R2, t2 = Scene.get_rt_from_essential(pts1, pts2)
            
        camera1 = Camera.create(np.eye(3, 3), np.zeros((3, 1)))
        camera2 = Camera.create(R2, t2)
        
        R3, t3, points_3d = Scene.recover_third_rt(camera1=camera1, camera2=camera2, pts1=pts1, pts2=pts2, pts3=pts3)

        frame1 = Frame(image1)
        frame1.R = camera1.R
        frame1.t = camera1.t
        frame1.points_3d = points_3d
        frame1.points_2d = pts1
        frame1.kp = kp1
        frame1.des = des1
        
        frame2 = Frame(image2)
        frame2.R = camera2.R
        frame2.t = camera2.t
        frame2.points_3d = points_3d
        frame2.points_2d = pts2
        frame2.kp = kp2
        frame2.des = des2
        
        frame3 = Frame(image3)
        frame3.R = R3
        frame3.t = t3
        frame3.points_3d = points_3d
        frame3.points_2d = pts3
        frame3.kp = kp3
        frame3.des = des3
        
        return frame1, frame2, frame3
    
    @classmethod
    def triangulation(cls, frame1, frame2, image3):
        """ 1. Stereo camera reconstruction
            2. Third camera triangulation
        """
        
        sift = cv2.xfeatures2d.SIFT_create()
        FLANN_INDEX_KDTREE = 0
        index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
        search_params = dict(checks = 50)
        flann = cv2.FlannBasedMatcher(index_params, search_params)

        kp1, des1 = sift.detectAndCompute(frame1.image, None)
        kp2, des2 = sift.detectAndCompute(frame2.image, None)
        kp3, des3 = sift.detectAndCompute(image3, None)

        matches12 = flann.knnMatch(des1, des2, k=2)
        matches13 = flann.knnMatch(des1, des3, k=2)

        qidx12 = [m.queryIdx for m,n in list(filter(lambda x: x[0].distance < Frame.MATCHER_THREHSOLD*x[1].distance, matches12))]
        qidx13 = [m.queryIdx for m,n in list(filter(lambda x: x[0].distance < Frame.MATCHER_THREHSOLD*x[1].distance, matches13))]

        common_query_indexes = list(set(qidx12).intersection(qidx13))

        # match between 3 images
        good_12_matches = []
        for m, n in matches12:
            if m.queryIdx in common_query_indexes:
                good_12_matches.append(m)

        good_13_matches = []
        for m, n in matches13:
            if m.queryIdx in common_query_indexes:
                good_13_matches.append(m)

        pts1 = np.float64([kp1[m.queryIdx].pt for m in good_12_matches]).reshape(-1,1,2)
        pts2 = np.float64([kp2[m.trainIdx].pt for m in good_12_matches]).reshape(-1,1,2)
        pts3 = np.float64([kp3[m.trainIdx].pt for m in good_13_matches]).reshape(-1,1,2)
                    
        camera1 = Camera.create(frame1.R, frame1.t)
        camera2 = Camera.create(frame2.R, frame2.t)
        
        R3, t3, points_3d = Scene.recover_third_rt(camera1=camera1, camera2=camera2, pts1=pts1, pts2=pts2, pts3=pts3)
        
        frame3 = Frame(image3)
        frame3.R = R3
        frame3.t = t3
        frame3.points_3d = points_3d
        frame3.points_2d = pts3
        frame3.kp = kp3
        frame3.des = des3
        
        return frame1, frame2, frame3

In [393]:
class Timeline(object):
    """ Main class that stores video """
    RESIZE_SCALE = 2
    KEYFRAME_DISTANCE = 50
    
    def __init__(self, path="videos/video13.mp4"):
        self.path_video = path # path to video
        self.buffer = [] # buffered video a.k.a images
        self.K = Camera().K # calibration matrix
        self.distortion = Camera().distortion # distortion coefs
        self.common_points = [] # common points between keyframes    
        
        self.__read_into_buffer()
        self.keyframes_indexes = [x for x in range(0, len(self.buffer), self.KEYFRAME_DISTANCE)]
        
        self.keyframes = []
        self.adjusted_keyframes = []
        self.__recover_keyframes()
        self.__adjust_keyframes()
        self.interpolated_frames = []

        print("Timeline initialized...")
        
        
    def __resize(self, frame):
        """ Resize video in hald """
        height, width, layers =  frame.shape
        frame = cv2.resize(frame, (int(width/2), int(height/2)))
        return frame
        
    def __read_into_buffer(self, mode="pictures"):
        """ Reads video/images into object buffer """
        buffer = []

        print("Buffering: start...")
        if mode == "pictures":
            print("Reading from images sequence in frames folder...")
            for path in glob.glob("frames/**.png"):
                img = cv2.imread(path)
                buffer.append(img)
        else:
            print("Reading from video file {}...".format(self.path_video))
            cap = cv2.VideoCapture(self.path_video)
            original_images_buffer = []
            while(cap.isOpened()):
                ret, frame = cap.read()    
                if ret == True:
                    frame = self.__resize(frame)
                    buffer.append(frame)
                    original_images_buffer.append(frame)
                else:
                    cap.release()
                
        self.buffer = buffer
        print("Buffering: done")
                
    def __recover_keyframes(self):
        """ Recover keyframes based on number of keyframe triple.
        If first that recover via singular rotation matrix and calculating essential matrix
        """
        print("Keyframes recovering: start...")
        keyframes = []
        frame1, frame2, frame3 = None, None, None
        
        for i, triple in enumerate(window(self.keyframes_indexes, 3)):
            image1, image2, image3 = self.buffer[triple[0]], self.buffer[triple[1]], self.buffer[triple[2]]
            if i > 0:
                frame1, frame2, frame3 = Scene.triangulation(frame1, frame3, image3)
                keyframes.append(frame3)
            else: # if initial
                frame1, frame2, frame3 = Scene.initial_triangulation(image1, image2, image3)
                keyframes = [frame1, frame2, frame3]
                
        self.keyframes = keyframes
        print("Keyframes recovering: done")
                
    def __adjust_keyframes(self):
        print("Keyframe bundle adjustment: start...")
        self.adjusted_keyframes = [frame.bundle_adjustment() for frame in self.keyframes[:]]
        print("Keyframe bundle adjustment: done")

    def interpolate_frames_and_save(self):
        print("Intermediate frames interpolation: start...")
        print("WARNING: this operation take a lot of time... (over 20 minutes)")
        intermediate_frames = []
        for i, (kf1, kf2, kf3) in enumerate(window(self.adjusted_keyframes, 3)):
            print("Step", i)
            if i == 0:
                intermediate_images = self.buffer[i*self.KEYFRAME_DISTANCE:(i+2)*self.KEYFRAME_DISTANCE]
            else:
                intermediate_images = self.buffer[(i+1)*self.KEYFRAME_DISTANCE:(i+2)*self.KEYFRAME_DISTANCE]
            for intermediate_image in intermediate_images:
                intermediate_frame = Scene.triangulation(kf1, kf3, intermediate_image)[2]
                intermediate_frames.append(intermediate_frame)
                
        adjusted_intermediate_frames = [f.bundle_adjustment() for f in intermediate_frames[:100]]
        
        for i, aiframe in enumerate(adjusted_intermediate_frames):
            frame = Frame(aiframe.image)
            try:
                image = draw_cube(frame.image, aiframe.create_camera_and_project(cube), wide=2)
                cv2.imwrite("saved_images/{}.png".format("%03d" % i), image)
            except Exception as e:
                print(e)
        
        self.interpolated_frames = adjusted_intermediate_frames
        print("Intermediate frames interpolation: done")
        
    def interpolate_frames_by_network_and_save(self):
        print("Intermediate frames interpolation by nnet: start...")
        cube = generate_cube(1.0, [0,0,10])
        train = []
        for kf in self.adjusted_keyframes[:]:
            object_points = []
            for point in kf.create_camera_and_project(cube):
                x, y = point[0]
                object_points.append(x)
                object_points.append(y)

            train.append(object_points)

        df = pd.DataFrame(train)
        
        X = np.array([x*self.KEYFRAME_DISTANCE for x in range(df.shape[0])]).reshape(df.shape[0], 1)
        y = df
        
        print("Nnet training: start...")
        mlp = MLPRegressor(
            max_iter=2000,
            learning_rate_init=0.01,
            random_state=42
        )
        mlp.fit(X, y)
        print("Nnet:", mlp)
        print("Nnet training: done")
        
        print("Saving images: start...")
        for i, image in enumerate(self.buffer[:120]):
            frame = Frame(image)
            res = mlp.predict(i)
            image = draw_cube(frame.image, res.reshape((-1, 1, 2)))

            cv2.imwrite("saved_images/{}.png".format("%03d" % i), image)
        
        print("Intermediate frames interpolation: done")
        print("Saving images: done")
        
            
        
        

In [388]:
timeline = Timeline()

Buffering: start...
Reading from images sequence in frames folder...
Buffering: done
Keyframes recovering: start...
(55, 1, 3) (55, 1, 2)
(12, 1, 3) (12, 1, 2)
Keyframes recovering: done
Keyframe bundle adjustment: start...
Generation 0 has Chi-Squared: 3373.808870
Generation 100 has Chi-Squared: 403.835055
Generation 200 has Chi-Squared: 389.364335
Generation 300 has Chi-Squared: 387.701874
Generation 400 has Chi-Squared: 387.601055
Generation 500 has Chi-Squared: 387.593396
Generation 600 has Chi-Squared: 387.592184
STOP("ChangeOverGeneration with {'tolerance': 0.005, 'generations': 200}")
Generation 0 has Chi-Squared: 12060.855976
Generation 100 has Chi-Squared: 1325.277896
Generation 200 has Chi-Squared: 887.428897
Generation 300 has Chi-Squared: 474.878800
Generation 400 has Chi-Squared: 459.154460
Generation 500 has Chi-Squared: 457.134655
Generation 600 has Chi-Squared: 455.926738
Generation 700 has Chi-Squared: 453.490933
Generation 800 has Chi-Squared: 449.842492
Generation 90

In [389]:
cube = generate_cube(1.0, [0,0,10])

In [395]:
intermediate_frames = []
for i, (kf1, kf2, kf3) in enumerate(window(timeline.adjusted_keyframes, 3)):
    print("Step", i)
    if i == 0:
        intermediate_images = timeline.buffer[i*timeline.KEYFRAME_DISTANCE:(i+2)*timeline.KEYFRAME_DISTANCE]
    else:
        intermediate_images = timeline.buffer[(i+1)*timeline.KEYFRAME_DISTANCE:(i+2)*timeline.KEYFRAME_DISTANCE]
    for intermediate_image in intermediate_images:
        intermediate_frame = Scene.triangulation(kf1, kf3, intermediate_image)[2]
        intermediate_frames.append(intermediate_frame)

Step 0
Step 1


In [396]:
for i, iframe in enumerate(intermediate_frames):
    frame = Frame(iframe.image)
    try:
        image = draw_cube(frame.image, iframe.create_camera_and_project(cube), wide=2)
        cv2.imwrite("intermediate/{}.png".format("%03d" % i), image)
    except Exception as e:
        print(e)

In [397]:
# it is gonna last forever!
adjusted_intermediate_frames = [f.bundle_adjustment() for f in intermediate_frames[:100]]

Generation 0 has Chi-Squared: 54343.152999
Generation 100 has Chi-Squared: 5786.314797
Generation 200 has Chi-Squared: 5566.887863
Generation 300 has Chi-Squared: 5566.167581
Generation 400 has Chi-Squared: 5564.168781
Generation 500 has Chi-Squared: 5564.060291
Generation 600 has Chi-Squared: 5562.839904
Generation 700 has Chi-Squared: 5562.484621
Generation 800 has Chi-Squared: 5562.434227
Generation 900 has Chi-Squared: 5562.423741
Generation 1000 has Chi-Squared: 5562.423672
STOP("EvaluationLimits with {'evaluations': 60000, 'generations': 1000}")
Generation 0 has Chi-Squared: 46675.649554
Generation 100 has Chi-Squared: 5270.868163
Generation 200 has Chi-Squared: 4298.842155
Generation 300 has Chi-Squared: 4080.048965
Generation 400 has Chi-Squared: 3952.850708
Generation 500 has Chi-Squared: 3899.689976
Generation 600 has Chi-Squared: 3897.854865
Generation 700 has Chi-Squared: 3897.851459
STOP("ChangeOverGeneration with {'tolerance': 0.005, 'generations': 200}")
Generation 0 has

In [398]:
for i, aiframe in enumerate(adjusted_intermediate_frames):
    frame = Frame(aiframe.image)
    try:
        image = draw_cube(frame.image, aiframe.create_camera_and_project(cube), wide=2)
        cv2.imwrite("intermediate/{}.png".format("%03d" % i), image)
    except Exception as e:
        print(e)