In [2]:
import os
import sys
import h5py

import torch
import numpy as np
from pathlib import Path
from matplotlib import pyplot as plt

# READ FILES

In [3]:
def load_h5(path: Path):
    "Array in .h5 is under 'data' key"
    arr = None
    with h5py.File(path, "r") as f:
        data = f["data"][:]
        try:
            # Attempt to convert the data to floats
            arr = np.array(data, dtype="float32").T  # Transpose for C-order
        except ValueError:
            # If the conversion fails, keep the data as a string
            arr = np.array(data, dtype=str)
    return arr

In [7]:
data_dir = r"C:\Users\cesar\Desktop\Projects\FoundationModels\fimodemix\data\state_sde_full\expressions_3d\dimension_1dim-3\1"

batchdata_dir_path = Path(data_dir)
file_names = os.listdir(data_dir)
file_names

['drift_functions_at_hypercube.h5',
 'f_strs.h5',
 'g_strs.h5',
 'hypercube_locations.h5',
 'init_condition_distr_parameters.h5',
 'obs_times.h5',
 'obs_values.h5',
 'scaled_diffusion_functions_at_hypercube.h5']

In [8]:
# Load all files
loaded_data = {}
for file_name in file_names:
    file_path = batchdata_dir_path / file_name
    file_name_ = file_path.name.removesuffix(".h5")
    loaded_data[file_name_] = load_h5(file_path)

In [9]:
loaded_data.keys()

dict_keys(['drift_functions_at_hypercube', 'f_strs', 'g_strs', 'hypercube_locations', 'init_condition_distr_parameters', 'obs_times', 'obs_values', 'scaled_diffusion_functions_at_hypercube'])

In [10]:
obs_times = loaded_data['obs_times']
obs_values =  loaded_data['obs_values']
f_strs = loaded_data['f_strs']

In [11]:
obs_values.shape

(15, 300, 128, 3, 2)

In [16]:
f_strs[:,0]

array(['0.3297 * x_1', '-4.5810 * x_1',
       'x_0 * (0.8794 + 0.0535 * x_2 + -0.9622 * x_1)'], dtype='<U98')