In [34]:
import pyvista as pv
import meshio
import numpy as np
import psycopg2
import hdf5storage
import h5py
import pickle
import logging
from sys import getsizeof
from dotenv import dotenv_values
import matplotlib.pyplot as plt
import os

config = dotenv_values("../.env")
# Filepaths for script
originalDatasetPath = config["datasetFilepath"]
datasetSubmissionDir = config["submissionDatasetDirectory"]


In [35]:
# Save single trial of data into new hdf5 file for dataset submission
def save_trial_for_submission(trial_number, original_dataset_path, submission_directory):
    with h5py.File(original_dataset_path, 'r') as f:
        # print("Keys: %s" % f.keys())
        state_data = f["stateData"][:,:,trial_number-1]
        print("Shape of state_data: ", state_data.shape)
        input_data = f["inputData"][:,:,trial_number-1]
        print("Shape of input_data: ", input_data.shape)
        output_data = np.flip(f["outputData"][:,:,trial_number-1], axis=0)
        print("Shape of output_data: ", output_data.shape)
        time = f["simulationTimes"][:,trial_number-1]
        print("Shape of time: ", time.shape)

    submission_file_path = f"{submission_directory}/Trial_{int(trial_number)}.hdf5"
    with h5py.File(submission_file_path, 'w') as f:
        f.create_dataset("stateData", data=state_data)
        f.create_dataset("inputData", data=input_data)
        f.create_dataset("outputData", data=output_data)
        f.create_dataset("simulationTimes", data=time)
    print(f"Saved trial {trial_number} to {submission_file_path}")


In [36]:
for trial_num in range(1, 41):
    save_trial_for_submission(trial_num, originalDatasetPath, datasetSubmissionDir)

Shape of state_data:  (243789, 1000)
Shape of input_data:  (6, 1000)
Shape of output_data:  (40, 1000)
Shape of time:  (1000,)
Saved trial 1 to /media/brdl/rompcBackup/datasetSubmission//Trial_1.hdf5
Shape of state_data:  (243789, 1000)
Shape of input_data:  (6, 1000)
Shape of output_data:  (40, 1000)
Shape of time:  (1000,)
Saved trial 2 to /media/brdl/rompcBackup/datasetSubmission//Trial_2.hdf5
Shape of state_data:  (243789, 1000)
Shape of input_data:  (6, 1000)
Shape of output_data:  (40, 1000)
Shape of time:  (1000,)
Saved trial 3 to /media/brdl/rompcBackup/datasetSubmission//Trial_3.hdf5
Shape of state_data:  (243789, 1000)
Shape of input_data:  (6, 1000)
Shape of output_data:  (40, 1000)
Shape of time:  (1000,)
Saved trial 4 to /media/brdl/rompcBackup/datasetSubmission//Trial_4.hdf5
Shape of state_data:  (243789, 1000)
Shape of input_data:  (6, 1000)
Shape of output_data:  (40, 1000)
Shape of time:  (1000,)
Saved trial 5 to /media/brdl/rompcBackup/datasetSubmission//Trial_5.hdf5


In [37]:
# Try reading in saved trial 
def read_whole_trial(trial_file_path):
    """
    Read all data from a full Trial<number>.hdf5 file given file name. 

    Args:
        trial_file_path (str): Path to the Trial<number>.hdf5 file.

    Returns:
        state_data (np.ndarray): Array of shape (num_dofs, num_timesteps) containing state data over all timesteps for the trial. 
                                 Each column of state coordinates is formatted as [x_0, y_0, z_0, x_1, y_1, z_1, ..., x_(N-1), y_(N-1), z_(N-1)] 
                                 where N = 81263 is the number of nodes in the mesh. All coordinates are in millimeters.
        input_data (np.ndarray): Array of shape (num_inputs, num_timesteps) containing control data over all timesteps for the trial
                                 Each column of inputs is formatted as [u_0, u_1, u_2, u_3, u_4, u_5]. Inputs are non-dimensionalized pressures.
        output_data (np.ndarray): Array of shape (num_outputs, num_timesteps) containing output data over all timesteps for the trial
                                  Each column of outputs is formatted as [z_0, x_0, z_1, x_1, ..., z_(M-1), x_(M-1)] where M = 20
                                  is the number of control points placed along the robot's body. All coordinates are in millimeters.
        simulation_times (np.ndarray): Array of shape (num_timesteps,) containing the simulation time at each timestep. Times are in seconds.
    
    Raises:
        FileNotFoundError: If the specified file does not exist at the given path.
    """
    if not os.path.isfile(trial_file_path):
        raise FileNotFoundError(f"The file '{trial_file_path}' does not exist.")

    with h5py.File(trial_file_path, 'r') as f:
        # Read datasets and coerce to numpy arrays with sensible shapes/dtypes
        state_data = np.asarray(f["stateData"][:])
        input_data = np.asarray(f["inputData"][:])
        output_data = np.asarray(f["outputData"][:])
        simulation_times = np.asarray(f["simulationTimes"][:]).ravel()

    return state_data, input_data, output_data, simulation_times



In [38]:
state_data, input_data, output_data, simulation_times = read_whole_trial(f"{datasetSubmissionDir}/Trial_38.hdf5")
print("State data shape:", state_data.shape)
print("Input data shape:", input_data.shape)
print("Output data shape:", output_data.shape)
print("Simulation times shape:", simulation_times.shape)

print("First 5 simulation times:", simulation_times[:5])

State data shape: (243789, 1000)
Input data shape: (6, 1000)
Output data shape: (40, 1000)
Simulation times shape: (1000,)
First 5 simulation times: [0.   0.01 0.02 0.03 0.04]


In [None]:
# Read in original dataset to compare
with h5py.File(originalDatasetPath, 'r') as f:
    # print("Keys: %s" % f.keys())
    state_data_original = f["stateData"][:,:,37]
    print("Shape of state_data: ", state_data.shape)
    input_data_original = f["inputData"][:,:,37]
    print("Shape of input_data: ", input_data.shape)
    output_data_original = np.flip(f["outputData"][:,:,37], axis=0)
    print("Shape of output_data: ", output_data.shape)
    time_original = f["simulationTimes"][:,37]
    print("Shape of time: ", time_original.shape)



Shape of state_data:  (243789, 1000)
Shape of input_data:  (6, 1000)
Shape of output_data:  (40, 1000)
Shape of time:  (1000,)


In [41]:
# Compare reopened data to original data for a single timestep by making sure theyre equal
print("State data equal:", np.array_equal(state_data, state_data_original))
print("Input data equal:", np.array_equal(input_data, input_data_original))
print("Output data equal:", np.array_equal(output_data, output_data_original))
print("Simulation times equal:", np.array_equal(simulation_times, time_original))



State data equal: False
Input data equal: False
Output data equal: False
Simulation times equal: True
