# reader

> Necessary scripts to read orbits from different formats

In [None]:
#| default_exp reader

In [None]:
#| hide
#| export
import h5py
from scipy.io import loadmat
import numpy as np
import pandas as pd
from typing import Optional, Any

In [None]:
#| hide
#| export
from unittest.mock import patch, MagicMock
from fastcore.test import test_eq

## Loading Data

In [None]:
#| export
def load_orbit_data(file_path: str,  # The path to the .mat, .h5, or .npy file.
                    variable_name: Optional[str] = None,  # Name of the variable in the .mat file, optional.
                    dataset_path: Optional[str] = None  # Path to the dataset in the .h5 file, optional.
                   ) -> Any:  # The loaded orbit data.
    """
    Load orbit data from MATLAB .mat files, HDF5 .h5 files, or NumPy .npy files.
    """
    if file_path.endswith('.mat'):
        if variable_name is None:
            raise ValueError("variable_name must be provided for .mat files")
        mat = loadmat(file_path)
        if variable_name in mat:
            data = mat[variable_name]
        else:
            raise ValueError(f"{variable_name} not found in {file_path}")

    elif file_path.endswith('.h5'):
        with h5py.File(file_path, 'r') as file:
            if dataset_path is None:
                raise ValueError("dataset_path must be provided for .h5 files")
            if dataset_path in file:
                data = np.array(file[dataset_path])
            else:
                raise ValueError(f"{dataset_path} not found in {file_path}")

    elif file_path.endswith('.npy'):
        data = np.load(file_path)

    else:
        raise ValueError("Unsupported file format. Please provide a .mat, .h5, or .npy file.")
    
    return data

In [None]:
#| test load_orbit_data
#| hide
mock_mat_data = {'Xarray': np.array([1, 2, 3])}
mock_h5_data = np.array([4, 5, 6])
mock_npy_data = np.array([7, 8, 9])

# Test for load_orbit_data with .mat file
with patch('__main__.loadmat', return_value=mock_mat_data) as mock_loadmat:
    result = load_orbit_data('test_data.mat', variable_name='Xarray')
    assert (result == mock_mat_data['Xarray']).all(), "MAT file loading failed or data mismatch"
    mock_loadmat.assert_called_once_with('test_data.mat')

# Test for load_orbit_data with .h5 file
with patch('__main__.h5py.File') as mock_h5py:
    mock_file = MagicMock()
    mock_file.__enter__.return_value = {'/files/PERIODIC ORBITS': mock_h5_data}
    mock_h5py.return_value = mock_file
    result = load_orbit_data('test_data.h5', dataset_path='/files/PERIODIC ORBITS')
    assert (result == mock_h5_data).all(), "H5 file loading failed or data mismatch"

# Test for load_orbit_data with .npy file
with patch('numpy.load', return_value=mock_npy_data) as mock_load:
    result = load_orbit_data('test_data.npy')
    assert (result == mock_npy_data).all(), "NPY file loading failed or data mismatch"
    mock_load.assert_called_once_with('test_data.npy')

In [None]:
#| export
def get_orbit_features(file_path: str,  # The path to the file (can be .mat, .h5, or .npy).
                       variable_name: Optional[str] = None,  # Name of the variable in the .mat file, optional.
                       dataset_path: Optional[str] = None  # Path to the dataset in the .h5 file, optional.
                      ) -> pd.DataFrame:  # DataFrame with detailed orbit features.
    """
    Load orbit feature data from a specified file and convert it to a DataFrame.
    """
    # Load data using the previously defined function that supports .mat, .h5, and .npy files
    orbit_data = load_orbit_data(file_path, variable_name=variable_name, dataset_path=dataset_path)
    
    # Define column labels for the DataFrame
    column_labels = [
        'Orbit Family', 'Initial Position X', 'Initial Position Y', 'Initial Position Z',
        'Initial Velocity X', 'Initial Velocity Y', 'Initial Velocity Z',
        'Jacobi Constant', 'Period', 'Stability Index'
    ]
    
    # Create a DataFrame from the loaded data
    features = pd.DataFrame(orbit_data, columns=column_labels)

    return features

In [None]:
#| test get_orbit_features
#| hide
def test_get_orbit_features():
    # Sample data simulating what might be returned by load_orbit_data
    mock_data = np.array([
        [1, 0, 0, 0, 1, 0, 0, 3.0, 2.0, 1.0],
        [2, 1, 1, 1, 0, 1, 0, 2.5, 1.5, 0.5]
    ])
    
    # Expected DataFrame structure
    expected_columns = [
        'Orbit Family', 'Initial Position X', 'Initial Position Y', 'Initial Position Z',
        'Initial Velocity X', 'Initial Velocity Y', 'Initial Velocity Z',
        'Jacobi Constant', 'Period', 'Stability Index'
    ]
    expected_df = pd.DataFrame(mock_data, columns=expected_columns)
    
    # Patch the load_orbit_data function to return mock_data
    with patch('__main__.load_orbit_data', return_value=mock_data) as mock_load_orbit_data:
        # Test for .mat file
        result_df = get_orbit_features('dummy_path.mat', variable_name='dummy_var')
        test_eq(result_df.equals(expected_df), True)
        
        # Ensure the mock was called correctly
        mock_load_orbit_data.assert_called_once_with('dummy_path.mat', variable_name='dummy_var', dataset_path=None)

        # Test for .h5 file with dataset_path
        mock_load_orbit_data.reset_mock()
        result_df = get_orbit_features('dummy_path.h5', dataset_path='dummy_dataset')
        test_eq(result_df.equals(expected_df), True)
        
        # Ensure the mock was called correctly
        mock_load_orbit_data.assert_called_once_with('dummy_path.h5', variable_name=None, dataset_path='dummy_dataset')

        # Test for .npy file
        mock_load_orbit_data.reset_mock()
        result_df = get_orbit_features('dummy_path.npy')
        test_eq(result_df.equals(expected_df), True)
        
        # Ensure the mock was called correctly
        mock_load_orbit_data.assert_called_once_with('dummy_path.npy', variable_name=None, dataset_path=None)

# Call the test function to execute tests
test_get_orbit_features()

## Save Data

In [None]:
def save_data(data: np.ndarray,  # The numpy array data to save.
              file_name: str,  # The name of the file to save the data in.
              file_type: str = 'hdf5'  # The type of file to save ('hdf5' or 'npy').
             ) -> None:
    """
    Save a numpy array to an HDF5 or a NumPy .npy file based on the specified file type.
    """
    if file_type == 'hdf5':
        # Open a new HDF5 file
        with h5py.File(file_name, 'w') as f:
            # Create a dataset in the file
            f.create_dataset('orbit_data', data=data, compression='gzip', compression_opts=9)
    elif file_type == 'npy':
        # Save the array to a NumPy .npy file
        np.save(file_name, data)
    else:
        raise ValueError("Unsupported file type specified. Use 'hdf5' or 'npy'.")

In [None]:
#| test save_data
#| hide
# Test for NPY saving functionality
def test_save_data_npy():
    data = np.random.rand(5, 5)
    file_name = 'test_data.npy'

    with patch('numpy.save', autospec=True) as mock_save:
        save_data(data, file_name, 'npy')
        mock_save.assert_called_once_with(file_name, data)

test_save_data_npy()

# Test for handling invalid file type
def test_save_data_invalid_type():
    data = np.random.rand(5, 5)
    file_name = 'test_data.unknown'

    try:
        save_data(data, file_name, 'unknown')
        assert False, "ValueError expected but not raised"
    except ValueError as e:
        assert str(e) == "Unsupported file type specified. Use 'hdf5' or 'npy'.", "Incorrect error message"

test_save_data_invalid_type()


## Get Example Data

In [None]:
#| export
def get_example_orbit_data():
    """
    Load orbit data from a hardcoded MAT file located in the `data` directory.
    
    The function is specifically designed to load the 'Xarray' variable 
    from the '1_L2_S_200_EM_CR3BP.mat' file. This setup is intended for 
    demonstration or testing purposes, where the data file and the variable 
    of interest are known ahead of time.

    :return: A numpy.ndarray containing the transposed data from the MAT file.
    """
    # Hardcoded file name and variable name
    filename = "example_orbits_1_L2_S_200_EM_CR3BP.mat"
    variable_name = 'Xarray'
    
    # Assuming the notebook or script is executed in a directory at the same level as the `data` folder
    matlab_file_path = '..' + "/data/" + filename
    
    # Assuming `load_orbit_data` is a predefined function that loads and returns data from the .mat file
    data = load_orbit_data(str(matlab_file_path), variable_name=variable_name)
    # Transpose the data for further use
    data = np.transpose(data, (2, 1, 0))
    
    return data

In [None]:
data = get_example_orbit_data()
data.shape

(200, 6, 300)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

JSONDecodeError: Expecting value: line 1 column 1 (char 0)