In [1]:
from cv2 import (
    VideoCapture,
    VideoWriter,
    VideoWriter_fourcc,
    CAP_PROP_FRAME_COUNT,
    destroyAllWindows,
    solvePnP,
    SOLVEPNP_IPPE,
    Rodrigues,
    undistort,
    projectPoints,
    drawContours
) 
from os.path import exists as fileExists
import marker_detector as mkdtct
import numpy as np


In [2]:
# Selectable videos
loadableVideos = {"Toucan": 1, "Dino": 2, "Cracker": 3, "Ganesh": 4}

# Choose one among the keys in loadableVideos
objectToTrack = loadableVideos["Ganesh"]

# Checks if the a CSV file for the specified object exists. 
# Returns and assigns true if exists, false otherwise. 
csvExists = fileExists("./obj" + str(objectToTrack) + "_marker.csv")


In [3]:
def detectMarkerAndTrack(objectToTrack: int) -> None:
    """Take the index of a valid video, use it to load the related video
    and process it through "detectAndLabelMarkers", which will produce
    the CSV file containing the 2D-3D coordinates of the markers in every frame.

    Args:
        objectToTrack (int): video file chosen.
    """
    # Initialize both video reader and writer.
    videoCapPath = f"../data/obj0{objectToTrack}.mp4"
    f = open(f"obj{objectToTrack}_marker.csv", "w")
    f.write("FRAME, MARK_ID,   Px,   Py,    X,    Y, Z\n")
    f.close()
    vidcap = VideoCapture(videoCapPath)
    success, frame = vidcap.read() 
    framesCount = int(vidcap.get(CAP_PROP_FRAME_COUNT))
    
    
    # Here I'm processing the video file using the function detectAndLabelMarkers
    # to create the CSV file. 
    for index in range(0, framesCount):
        if success:
            mkdtct.detectAndLabelMarkers(
                image=frame, currentFrame=index, objectToTrack=objectToTrack
            )
            success, frame = vidcap.read()
        else:
            break
    vidcap.release()

# Checks if a CSV file for the specified object already exists.
# If not the detectMarkerAndTrack() is called to create it.
if not csvExists:
    detectMarkerAndTrack(objectToTrack)


With the following code I use the coordinates includes in the input CSV file to draw a 3D cube on top of each frame of the video. 
The code reads from the CSV file the coordinates (2D and 3D) of each marker per each frame, stores them into a list structured as follows: 
* Each element of the list will be a tuple, and each element will correspond by index to the frame of the video with the same index.
* Each tuple will include two lists:
    * The first will contain all the 2D coordinates of every marker in the frame (each coordinate set is a numpy array).
    * The second will contain the 3D coordinates of the same markers, related to the 2D coordinates by index/position.

Such structure is then used in a for loop to process each frame using the said coordinates to extract the camera pose and use it
to draw a cube over the object that resides in the center of the table.

In [4]:

# Read all the data from the csv file.
rawData = np.genfromtxt("./obj" + str(objectToTrack) + "_marker.csv", delimiter= ",")

# Remove the first row of the numpy array (i.e., the column names)
rawData = rawData[1::]

# Create a list of tuples with two empty lists in each tuple. The length 
# of the list is the number of unique values in the first column of rawData.

# Create the said list of tuples, with length equal to the number of
# frames (i.e., the set of unique values in the first column of the CSV file).
coordsMap = [([],[]) for _ in range(len(set(rawData[::,0])))]


# Create the lists of 2D and 3D coordinates from rawData.
_2dList = [(rawData[i, 2], rawData[i, 3]) for i in range(len(rawData))]
_3dList = [(rawData[i, 4], rawData[i, 5], rawData[i, 6]) for i in range(len(rawData))]


# Populate the coordsMap list with the 2D and 3D coordinate lists, using the 
# first column of rawData (the frame index) as the index for the coordsMap list.
for index in range(len(rawData)):
    coordsMap[int(rawData[index,0])][0].append(_2dList[index])
    coordsMap[int(rawData[index,0])][1].append(_3dList[index])


# Matrices of intrinsic parameters and distortion coefficients (already given).
K = np.array([[1.66750771e+03, 0.00000000e+00, 9.54599045e+02],
    [0.00000000e+00, 1.66972683e+03, 5.27926123e+02],
    [0.00000000e+00, 0.00000000e+00, 1.00000000e+00]]) 
dist = np.array([1.16577217e-01, -9.28944623e-02, 7.15149511e-05, -1.80025974e-03, -1.24761932e-01])



# The cube_faces array defines the indices of the 8 corner points of a cube, which are defined in cube_pts (below). 
# Each tuple in cube_faces represents the indices of the four corner points that form a face of the cube.
cube_faces = np.array(
    [
        (0, 1, 2, 3),
        (1, 5, 6, 2),
        (5, 4, 7, 6),
        (4, 0, 3, 7),
        (0, 4, 5, 1),
        (3, 2, 6, 7)
    ],
    dtype=np.int32
)


# Define the 3D coordinates of the cube in object space. 
cube_pts = np.array(
    [
        (-49.5, 49.5, 80.0), # 15
        (49.5, 49.5, 80.0), # 21
        (49.5, -49.5, 80.0), # 3 
        (-49.5, -49.5, 80.0), # 9
        (-49.5, 49.5, 178.994949), # 15
        (49.5, 49.5, 178.994949), # 21
        (49.5, -49.5, 178.994949), # 3
        (-49.5, -49.5, 178.994949), # 9
    ],
    dtype=np.float32,
)

# Output video format
videoFormat = VideoWriter_fourcc("m", "p", "4", "v")
videoWriterPath = "../data/obj" + str(objectToTrack) + "_cube.mp4"
videoWriter = VideoWriter(
    videoWriterPath,
    videoFormat,  # Every output-video will be produced in this format.
    29.97,  # Every input-video has this frame rate.
    (1920, 1080),  # Every input-video has these shapes.
)

videoCapPath = f"../data/obj0{objectToTrack}.mp4"
vidcap = VideoCapture(videoCapPath) 
success, frame = vidcap.read()

# Loop over the list of coordinates in coordsMap.
for coord in coordsMap:
    if success: 
        # Use the solvePnP function to calculate the rotation and translation vectors (the pose) for the current frame.
        rvec, tvec = solvePnP(np.array(coord[1]), np.array(coord[0]), K, dist, flags=SOLVEPNP_IPPE)[1:3:]

        # Convert the rotation vector to a rotation matrix using the Rodrigues function.
        R = Rodrigues(rvec)[0]

        # Project the hand-crafted 3D cube points onto the 2D image plane.
        cube_pts_2d = projectPoints(cube_pts, R, tvec, K, dist)[0]

        # Undistort the current frame using the K and dist matrices.
        frame_undistorted = undistort(frame, K, dist)

        # Loop over the faces of the cube and draw them onto the image
        for face in cube_faces:
            drawContours(
                frame_undistorted, 
                (np.array([cube_pts_2d[face]], dtype=np.int32)), 
                -1,  # draw all contours.
                (0, 128, 255), 
                thickness=3 
            )

        videoWriter.write(frame_undistorted)
        success, frame = vidcap.read()
        
    else:
        break


vidcap.release()
videoWriter.release()
destroyAllWindows()