In [214]:
import os
import pickle
import time

import cv_utils.core as cv_utils
import cv_datetime_utils.core as cv_datetime_utils
import numpy as np
import pandas as pd
import pose_tracking_3d
import simplejson

In [215]:
# Define input and output files
data_directory = "./data"
input_dataframe_filename = "example_2d_poses_dataframe.pickle.xz"
output_dataframe_filename = "example_3d_pose_tracks_dataframe.pickle.xz"

input_dataframe_path = os.path.join(data_directory, input_dataframe_filename)

output_dataframe_path = os.path.join(data_directory, output_dataframe_filename)

In [216]:
# Define model parameters

room_size = np.array([7.0, 8.0, 3.0])

pose_initialization_model = pose_tracking_3d.PoseInitializationModel(
    initial_keypoint_position_means=np.tile(room_size / 2, (18, 1)),
    initial_keypoint_velocity_means=np.zeros((18, 3)),
    initial_keypoint_position_error=np.amax(room_size) / 2.0,
    initial_keypoint_velocity_error=2.0,
)

keypoint_model = pose_tracking_3d.KeypointModel(
    position_observation_error=1.0,
    reference_delta_t=0.1,
    reference_position_transition_error=0.1,
    reference_velocity_transition_error=0.1,
)

pose_tracking_model = pose_tracking_3d.PoseTrackingModel(
    cost_threshold=1.0, num_missed_observations_threshold=10
)

# Ingest Data

In [217]:
# Ingest data
input_dataframe = pd.read_pickle(input_dataframe_path)

num_rows = input_dataframe.shape[0]

camera_names = input_dataframe.columns.levels[0].tolist()

camera_calibration_parameters = cv_utils.fetch_camera_calibration_data_from_local_drive_multiple_cameras(
    camera_names=camera_names, camera_calibration_data_directory="./data"
)

initial_dataframe_row = input_dataframe.iloc[0]

In [7]:
input_dataframe['camera01'].head()

Unnamed: 0,poses,file,model,frame
2018-07-04 18:20:00.000,"[[[1098.0, 216.58696, 0.81464326], [1164.0, 24...",video_2018-07-04-18-20-00.mp4,cmu,0
2018-07-04 18:20:00.100,"[[[1092.0, 221.86957, 0.92140007], [1158.0, 25...",video_2018-07-04-18-20-00.mp4,cmu,1
2018-07-04 18:20:00.200,"[[[1086.0, 227.15218, 0.8493447], [1152.0, 253...",video_2018-07-04-18-20-00.mp4,cmu,2
2018-07-04 18:20:00.300,"[[[1074.0, 227.15218, 0.74878925], [1140.0, 25...",video_2018-07-04-18-20-00.mp4,cmu,3
2018-07-04 18:20:00.400,"[[[924.0, 47.54348, 0.9911146], [954.0, 100.36...",video_2018-07-04-18-20-00.mp4,cmu,4


In [34]:
# poses from the first millisecond frame from 4 cameras
initial_dataframe_row

camera01  poses    [[[1098.0, 216.58696, 0.81464326], [1164.0, 24...
          file                         video_2018-07-04-18-20-00.mp4
          model                                                  cmu
          frame                                                    0
camera02  poses    [[[618.0, 10.565217, 0.40926868], [618.0, 26.4...
          file                         video_2018-07-04-18-20-00.mp4
          model                                                  cmu
          frame                                                    0
camera03  poses    [[[1104.0, 105.652176, 0.8920398], [1134.0, 15...
          file                         video_2018-07-04-18-20-00.mp4
          model                                                  cmu
          frame                                                    0
camera04  poses    [[[618.0, 63.391304, 0.84008336], [612.0, 95.0...
          file                         video_2018-07-04-18-20-00.mp4
          model                   

# Prepare 2D reconstruction data to be consumed by 3D reconstruction process

In [218]:
# convert 2D reconstruction data into a 2D poses class object
# input data is a dataframe including a set of keypoints poses 
# reduced to one frame, the first millisecond from 4 cameras

initial_poses_2d = pose_tracking_3d.Poses2D.from_dataframe_row(
    dataframe_row=initial_dataframe_row, camera_names=camera_names
)

In [219]:
# this is a 2D poses class object 
# contains a list of 2D pose class objects in the "pose_2d_list_list" attribute
# for one timestamp - the first millisecond of camera data for all 4 cameras
initial_poses_2d

<pose_tracking_3d.core.Poses2D at 0x12f7d3490>

In [220]:
# each pose is a 2D pose class object
# for each camera, there is a list of poses found - 
# in this example, the first two cameras pick up 3 people (poses)
# while camera 3 and 4 pick up 4 people (poses)
initial_poses_2d.pose_2d_list_list

[[<pose_tracking_3d.core.Pose2D at 0x12f567590>,
  <pose_tracking_3d.core.Pose2D at 0x12f56eb10>,
  <pose_tracking_3d.core.Pose2D at 0x12f56e290>],
 [<pose_tracking_3d.core.Pose2D at 0x12f56e7d0>,
  <pose_tracking_3d.core.Pose2D at 0x12f56e610>,
  <pose_tracking_3d.core.Pose2D at 0x12f565710>],
 [<pose_tracking_3d.core.Pose2D at 0x12f61c590>,
  <pose_tracking_3d.core.Pose2D at 0x12f61c110>,
  <pose_tracking_3d.core.Pose2D at 0x12f61c0d0>,
  <pose_tracking_3d.core.Pose2D at 0x12f61c190>],
 [<pose_tracking_3d.core.Pose2D at 0x10e15a310>,
  <pose_tracking_3d.core.Pose2D at 0x12c460e10>,
  <pose_tracking_3d.core.Pose2D at 0x12c460390>,
  <pose_tracking_3d.core.Pose2D at 0x12c4602d0>]]

In [221]:
# each 2D pose object has a keypoints attribute with 18 keypoints
# a keypoint has one set of x and y values which are pixel coordinates
# from the camera image
initial_poses_2d.pose_2d_list_list[0][0].keypoints.shape

(18, 2)

# 3D Reconstruction

In [222]:
# 3D reconstruction using initial 2D poses from first millisecond of data from 4 cameras 

initial_poses_3d = pose_tracking_3d.Poses3D.from_poses_2d(
    poses_2d=initial_poses_2d, cameras=camera_calibration_parameters
)

In [223]:
# This is a Poses3d object
# 3 unique poses were identified
# Dropped the 4th poses found by cameras 3 and 4 
# were not considered "best matches"
# why is this a nested list? When would you have more than one set of 3D poses?
initial_poses_3d.pose_3d_list_list

[[<pose_tracking_3d.core.Pose3D at 0x12f686a10>,
  <pose_tracking_3d.core.Pose3D at 0x12f60ff90>,
  <pose_tracking_3d.core.Pose3D at 0x12f685050>]]

In [224]:
# this is a Pose3D object
initial_poses_3d.pose_3d_list_list[0][0]

<pose_tracking_3d.core.Pose3D at 0x12f686a10>

## 3D Pose Object

attributes of a 3D pose object:

- keypoints: 18 key points for one human body, 3 dimensions in space
- valid_keypoints: ?
- projection_error: ?
- tag: ?
- timestamp: unique identifier for frame, 1 frame every 1 millisecond 
- keypoint_std_devs = ?

Which of these are currently being used / called for the 3D pose tracking?

In [163]:
# jsonify code dana wrote

def pose_3d_keypoints_to_list(pose_3d_keypoints):
    """ 
    convert array to list to write to json
    input: 2 dimensional array
    output: 2 dimensional list
    """
    keypoints_converted = []
    for keypoint_coordinates in pose_3d_keypoints:
        keypoints_converted.append(list(keypoint_coordinates))
    return keypoints_converted

def pose_3d_valid_keypoints_to_string(pose_3d_valid_keypoints):
    """
    convert array of ints to a list of ints to write to json
    input: array of ints
    output: list of ints
    """
    valid_keypoints_converted = []

    for keypoint in pose_3d_valid_keypoints:
        keypoint= str(keypoint)
        valid_keypoints_converted.append(keypoint)
    return valid_keypoints_converted

def timestamp_pd_datetime_to_string(timestamp):
    """
    convert a pd datetime to string to write to json
    input: pandas datetime object
    output: string including year-month-day, hour:minutes:milliseconds
    """
    timestamp_datetime = cv_datetime_utils.convert_to_native_utc_naive(pose_3d_object.timestamp)
    timestamp_string = timestamp_datetime.strftime('%Y-%m-%d %H:%M:%S.%f')
    return timestamp_string

def pose_3d_to_dict(pose_3d_object):
    """convert a 3d pose object to a python dictionary
        input: 3d pose object
        output: python dictionary
    """
    # convert data to objects that are writable to json
    keypoints = pose_3d_keypoints_to_list(pose_3d_object.keypoints)
    valid_keypoints = pose_3d_valid_keypoints_to_string(pose_3d_object.valid_keypoints)
    timestamp = timestamp_pd_datetime_to_string(pose_3d_object.timestamp)

    pose_3d_dict = {
        "keypoints": keypoints,
        "valid_keypoints": valid_keypoints,
        "projection_error": pose_3d_object.projection_error,
        "tag": pose_3d_object.tag,
        "timestamp": timestamp_string,
        "keypoint_std_devs": pose_3d_object.keypoint_std_devs
    }
    return pose_3d_dict

def write_pose_3d_to_json(pose_3d_object):
    """
    write a pose 3d object to json
    input: pose_3d_object
    output: None
    """
    pose_3d_dict = pose_3d_to_dict(pose_3d_object)
    
    with open('3d_pose.json', 'w') as json_file:
        json_file.write(simplejson.dumps(pose_3d_dict, indent=4, sort_keys=True))

In [164]:
# json dict for 1 3D pose object
pose_3d_object = initial_poses_3d.pose_3d_list_list[0][0]

In [165]:
write_pose_3d_to_json(pose_3d_object)

## 3D poses object

attributes of a 3D poses object:

- pose_3d_list_list: a list of the "best match" 3d poses for a given timestamp, in the form of 3d pose objects
- num_cameras_source_images: number of cameras for a given timestamp
- num_2d_poses_source_images: a list, for each camera, number of poses that were captured
- source_cameras: list of dictionaries, one dictionary per camera with camera meta data- source_images: ?

In [226]:
# jsonify code dana wrote

def source_cameras_data_to_lists(source_cameras):
    """
    convert np arrays of camera calibration data to json writeable lists
    input: a 3d Poses object
    output: writable camera calibration data - 
    (a list of dictionaries with each key mapping to lists instead of np arrays)"""
    source_cameras_json_writeable = []
    for camera in source_cameras:
        camera_converted = {}
        camera_converted['camera_matrix'] = camera['camera_matrix'].tolist()
        camera_converted['distortion_coefficients'] = camera['distortion_coefficients'].tolist()
        camera_converted['rotation_vector'] = camera['rotation_vector'].tolist()
        camera_converted['translation_vector'] = camera['translation_vector'].tolist()
        source_cameras_json_writeable.append(camera_converted)
    
    return source_cameras_json_writeable 

def poses_3d_to_dict(poses_3d_object):
    """convert a 3d poses object to a python dictionary
        input: 3d pose object
        output: python dictionary
    """
    poses_3d = []
    for pose_collection in poses_3d_object.pose_3d_list_list:
        for pose_3d in pose_collection:
            pose_3d_dict = pose_3d_to_dict(pose_3d)
            poses_3d.append(pose_3d_dict)
            
    source_cameras = source_cameras_data_to_lists(poses_3d_object.source_cameras)
    
    poses_3d_dict = {
        "pose_3d_list_list": poses_3d,
        "num_cameras_source_images": poses_3d_object.num_cameras_source_images,
        "num_2d_poses_source_images": poses_3d_object.num_2d_poses_source_images,
        "source_cameras": source_cameras,
        "source_images": poses_3d_object.source_images
    }
    return poses_3d_dict

def write_poses_3d_to_json(poses_3d_object):
    """
    write a poses 3d object to json
    input: poses_3d_object
    output: None
    """
    
    poses_3d_dict = poses_3d_to_dict(poses_3d_object)
    
    with open('3d_poses.json', 'w') as json_file:
        json_file.write(simplejson.dumps(poses_3d_dict, indent=4, sort_keys=True))

In [228]:
write_poses_3d_to_json(initial_poses_3d)

In [94]:
initial_poses_3d.pose_3d_list_list

[[<pose_tracking_3d.core.Pose3D at 0x12b79bed0>,
  <pose_tracking_3d.core.Pose3D at 0x12b6fae90>,
  <pose_tracking_3d.core.Pose3D at 0x12bdd4f90>]]

In [151]:
# json dict for 1 3D poses object
poses_3d_object = initial_poses_3d

In [142]:
poses_3d_dict

{'pose_3d_list_list': [{'keypoints': [[3.254598617553711,
     1.168303370475769,
     1.4924794435501099],
    [3.356855630874634, 0.9134044647216797, 1.3636484146118164],
    [3.5265567302703857, 1.0338704586029053, 1.3853520154953003],
    [3.7678744792938232, 0.8904278874397278, 1.1950708627700806],
    [nan, nan, nan],
    [3.206874370574951, 0.856023371219635, 1.3625627756118774],
    [nan, nan, nan],
    [nan, nan, nan],
    [3.4976155757904053, 0.9732094407081604, 0.878951907157898],
    [nan, nan, nan],
    [nan, nan, nan],
    [3.322049140930176, 0.8554972410202026, 0.866292417049408],
    [nan, nan, nan],
    [nan, nan, nan],
    [3.2757222652435303, 1.2216941118240356, 1.5253654718399048],
    [nan, nan, nan],
    [3.3557536602020264, 1.1372450590133667, 1.517604112625122],
    [nan, nan, nan]],
   'valid_keypoints': ['True',
    'True',
    'True',
    'True',
    'False',
    'True',
    'False',
    'False',
    'True',
    'False',
    'False',
    'True',
    'False',


currently, we are generating a Poses3D object for each timestamp, which includes Pose3D objects for each camera

Could output to 1 json file per timestamp:

2018-07-04_18-20-00-000.json

{
    "camera01": [
        "Pose3D object 1",
        Pose3d object 2
        ]
    "camera02": [
        "Pose3D object 1", 
        ...
        ]
}

# 3D Pose Tracking

In [None]:
# initialize 3D pose tracking

pose_tracks = pose_tracking_3d.Pose3DTracks.initialize(
    pose_initialization_model=pose_initialization_model,
    keypoint_model=keypoint_model,
    pose_tracking_model=pose_tracking_model,
    pose_3d_observations=initial_poses_3d,
)

start_time = time.time()
for row_index in range(1, num_rows):
    dataframe_row = input_dataframe.iloc[row_index]
    poses_2d = pose_tracking_3d.Poses2D.from_dataframe_row(dataframe_row, camera_names)
    poses_3d = pose_tracking_3d.Poses3D.from_poses_2d(poses_2d, camera_calibration_parameters)
    pose_tracks.update(poses_3d)
end_time = time.time()
elapsed_time = end_time - start_time

In [None]:
# Output

print(
    "{} tracks produced from {} frames in {:.1f} seconds: {:.1f} milliseconds per frame".format(
        pose_tracks.num_inactive_tracks() + pose_tracks.num_active_tracks(),
        num_rows,
        elapsed_time,
        1000 * elapsed_time / num_rows,
    )
)

output_dataframe = pose_tracks.dataframe()

output_dataframe.to_pickle(output_dataframe_path)

print("Output saved in {}".format(output_dataframe_filename))
