# Processing

> Necessary scripts to read orbits from different formats

In [28]:
#| default_exp processing

In [29]:
#| export
#| hide
from scipy.interpolate import interp1d
import numpy as np
from typing import Tuple, Any, List, Dict

In [30]:
#| hide
from fastcore.test import test_eq

## Resample by Interpolation

In [31]:
#| export
def resample_3d_array(data: np.ndarray,  # The original 3D array to be resampled.
                      axis: int,         # The axis along which to perform the interpolation.
                      target_size: int   # The new size of the axis after resampling.
                     ) -> np.ndarray:
    """
    Resample a 3D numpy array along a specified axis using linear interpolation.
    """
    if axis not in [0, 1, 2]:  # Validate the axis to ensure it's within the correct range.
        raise ValueError("Invalid axis. Axis must be 0, 1, or 2.")

    old_indices = np.linspace(0, 1, num=data.shape[axis])  # Calculate old indices for interpolation.
    new_indices = np.linspace(0, 1, num=target_size)       # New indices for the target size.

    new_shape = list(data.shape)  # Define the shape of the new data array.
    new_shape[axis] = target_size
    new_data = np.empty(new_shape, dtype=data.dtype)
    
    # Perform interpolation for each slice of the array along the specified axis.
    if axis == 0:
        for i in range(data.shape[1]):
            for j in range(data.shape[2]):
                interpolator = interp1d(old_indices, data[:, i, j], kind='linear')
                new_data[:, i, j] = interpolator(new_indices)
    elif axis == 1:
        for i in range(data.shape[0]):
            for j in range(data.shape[2]):
                interpolator = interp1d(old_indices, data[i, :, j], kind='linear')
                new_data[i, :, j] = interpolator(new_indices)
    else:  # axis == 2
        for i in range(data.shape[0]):
            for j in range(data.shape[1]):
                interpolator = interp1d(old_indices, data[i, j, :], kind='linear')
                new_data[i, j, :] = interpolator(new_indices)

    return new_data

In [32]:
#| test resample_3d_array
def test_resample_3d_array():
    # Original 3D array
    data = np.array([
        [[1, 2], [3, 4]],
        [[5, 6], [7, 8]],
        [[9, 10], [11, 12]],
        [[13, 14], [15, 16]]
    ])

    # Downsampling from 4 to 2 along the first axis
    target_size = 3

    # Perform resampling
    resampled_data = resample_3d_array(data, axis=0, target_size=target_size)

    # Expected results by true linear interpolation
    expected_data = np.array([
        [[1, 2], [3, 4]],  # 1st slice
        [[7, 8], [9, 10]],  # Interpolation between 2nd and 3rd slices (mean in this case)
        [[13, 14], [15, 16]]  # 4st slice
    ])
    # Check the resampled data against expected data
    test_eq(resampled_data, expected_data)

# Invoke the test
test_resample_3d_array()

In [33]:
#| test resample_3d_array
#| hide
def test_resample_3d_array():
    # Simulate get_example_orbit_data() by creating a 3D array with a predictable gradient
    x = np.linspace(0, 1, 200)
    y = np.linspace(0, 1, 6)
    z = np.linspace(0, 1, 300)
    data = np.meshgrid(x, y, z, indexing='ij')
    data = np.array(data).sum(axis=0)

    # Target new size for the axis
    target_size = 100  # example target size for the test

    # Test each axis
    for axis in range(3):
        # Resample the array
        resampled_data = resample_3d_array(data, axis, target_size)

        # Check the shape of the output
        expected_shape = list(data.shape)
        expected_shape[axis] = target_size
        test_eq(resampled_data.shape, tuple(expected_shape))

        # Verify the correctness of the interpolation by using more direct interpolation checks
        original_indices = np.linspace(0, data.shape[axis] - 1, data.shape[axis])
        new_indices = np.linspace(0, data.shape[axis] - 1, target_size)
        for i in new_indices:
            original_slice = np.take(data, indices=int(np.round(i)), axis=axis)
            interpolated_slice = np.take(resampled_data, indices=int(np.round((i / (data.shape[axis] - 1)) * (target_size - 1))), axis=axis)
            # Verify that the mean of the interpolated slice is close to the original slice mean within a tolerance
            test_eq(np.isclose(np.mean(interpolated_slice), np.mean(original_slice), atol=0.1), True)

# Invoke the test
test_resample_3d_array()

## Downsample by Average

In [34]:
#| export
def average_downsample_3d_array(data: np.ndarray,  # The original 3D array to be downsampled.
                                axis: int,         # The axis along which to perform the downsampling (0, 1, or 2).
                                target_size: int   # The desired size of the specified axis after downsampling.
                               ) -> np.ndarray:
    """
    Downsample a 3D numpy array along a specified axis using averaging.
    """
    # Validate the axis to ensure it's within the correct range.
    if axis not in [0, 1, 2]:
        raise ValueError("Invalid axis. Axis must be 0, 1, or 2.")

    # Calculate the number of elements in each block that will be averaged.
    original_size = data.shape[axis]
    block_size = original_size / target_size

    # Define the shape of the new, downsampled data array.
    new_shape = list(data.shape)
    new_shape[axis] = target_size
    new_data = np.empty(new_shape, dtype=data.dtype)

    # Perform averaging along the specified axis.
    if axis == 0:
        for i in range(target_size):
            start_idx = int(i * block_size)
            end_idx = int((i + 1) * block_size)
            new_data[i, :, :] = np.mean(data[start_idx:end_idx, :, :], axis=0)  # Average blocks along the 0th axis.
    elif axis == 1:
        for i in range(target_size):
            start_idx = int(i * block_size)
            end_idx = int((i + 1) * block_size)
            new_data[:, i, :] = np.mean(data[:, start_idx:end_idx, :], axis=1)  # Average blocks along the 1st axis.
    else:  # axis == 2
        for i in range(target_size):
            start_idx = int(i * block_size)
            end_idx = int((i + 1) * block_size)
            new_data[:, :, i] = np.mean(data[:, :, start_idx:end_idx], axis=2)  # Average blocks along the 2nd axis.

    return new_data

In [35]:
#| test average_downsample_3d_array
def test_average_downsample_3d_array():
    # Create a simple 3D array with shape (4, 2, 2)
    # Each element in the z-dimension is the same to make averaging predictable
    data = np.array([
        [[3, 0.1], [2, 5]],
        [[1, 0.1], [2, 2]],
        [[0.3, 3], [4, 4]],
        [[0.2, 3], [4, 6]]
    ])

    # Target new size for the axis 0 is 2
    target_size = 2

    # Perform averaging along axis 0
    downsampled_data = average_downsample_3d_array(data, axis=0, target_size=target_size)

    # Manually calculate expected results
    expected_data = np.array([
        [[2, 0.1], [2, 3.5]],  # Average of the first two and the last two blocks along axis 0
        [[0.25, 3], [4, 5]]
    ])
    
    # Check that the downsampled data matches the expected data
    test_eq(downsampled_data, expected_data)

# Invoke the test
test_average_downsample_3d_array()

## Reorder Orbit with Time

In [36]:
#| export
def reorder_orbits(orbit_dataset: np.ndarray  # The original 3D numpy array representing the orbits.
                  ) -> np.ndarray:
    """
    Reorders the time steps of each orbit in the dataset such that the time values are always incrementally increasing.
    
    Parameters:
    orbit_dataset (np.ndarray): A 3D numpy array where the first dimension is the number of orbits,
                                the second dimension contains 7 scalars (time, posx, posy, posz, velx, vely, velz),
                                and the third dimension is the time steps.
                                
    Returns:
    np.ndarray: A reordered version of the input orbit_dataset.
    """
    num_orbits, num_scalars, num_timesteps = orbit_dataset.shape
    reordered_dataset = np.zeros_like(orbit_dataset)

    for i in range(num_orbits):
        # Extract the time steps and corresponding data for the current orbit
        orbit_data = orbit_dataset[i]
        time_steps = orbit_data[0]
        
        # Get the indices that would sort the time steps
        sorted_indices = np.argsort(time_steps)
        
        # Reorder the orbit data based on the sorted indices
        reordered_orbit_data = orbit_data[:, sorted_indices]
        
        # Store the reordered orbit data in the new dataset
        reordered_dataset[i] = reordered_orbit_data
    
    return reordered_dataset

In [37]:
#| test reorder_orbits
def test_reorder_orbits():
    # Create a simple 3D array with shape (2, 7, 4)
    # Each element in the second dimension represents (time, posx, posy, posz, velx, vely, velz)
    # The time values are intentionally disordered
    data = np.array([
        [
            [3, 1, 4, 2],  # time
            [0, 0, 0, 0],  # posx
            [0, 0, 0, 0],  # posy
            [0, 0, 0, 0],  # posz
            [0, 0, 0, 0],  # velx
            [0, 0, 0, 0],  # vely
            [0, 0, 0, 0]   # velz
        ],
        [
            [2, 3, 1, 4],  # time
            [0, 0, 0, 0],  # posx
            [0, 0, 0, 0],  # posy
            [0, 0, 0, 0],  # posz
            [0, 0, 0, 0],  # velx
            [0, 0, 0, 0],  # vely
            [0, 0, 0, 0]   # velz
        ]
    ])

    # Manually calculate expected results
    expected_data = np.array([
        [
            [1, 2, 3, 4],  # time
            [0, 0, 0, 0],  # posx
            [0, 0, 0, 0],  # posy
            [0, 0, 0, 0],  # posz
            [0, 0, 0, 0],  # velx
            [0, 0, 0, 0],  # vely
            [0, 0, 0, 0]   # velz
        ],
        [
            [1, 2, 3, 4],  # time
            [0, 0, 0, 0],  # posx
            [0, 0, 0, 0],  # posy
            [0, 0, 0, 0],  # posz
            [0, 0, 0, 0],  # velx
            [0, 0, 0, 0],  # vely
            [0, 0, 0, 0]   # velz
        ]
    ])
    
    # Perform the reordering
    reordered_data = reorder_orbits(data)
    
    # Check that the reordered data matches the expected data
    assert np.array_equal(reordered_data, expected_data), f"Expected {expected_data}, but got {reordered_data}"

# Invoke the test
test_reorder_orbits()

## Reshaping Arrays

In [38]:
#| export
def pad_and_convert_to_3d(orbits: Dict[int, np.ndarray],     # Dictionary of orbits with numerical keys.
                          timesteps: int                     # Desired number of timesteps.
                         ) -> np.ndarray:                    # 3D numpy array of padded orbits.
    """
    Truncate and pad each orbit to a uniform length and convert to a 3D numpy array.
    """
    # Initialize a list to store the padded arrays
    padded_arrays = []

    # Iterate over each orbit in the dictionary
    for key, orbit in orbits.items():
        # Determine the number of timesteps to take from the orbit
        num_timesteps = min(timesteps, orbit.shape[1])

        # Take the first num_timesteps from the orbit
        truncated_orbit = orbit[:, :num_timesteps]

        # Pad the truncated orbit to have length timesteps in the final dimension
        padded_orbit = np.pad(truncated_orbit, ((0, 0), (0, timesteps - num_timesteps)))

        # Add the padded orbit to the list
        padded_arrays.append(padded_orbit)

    # Convert the list of padded arrays to a 3D numpy array and return it
    return np.stack(padded_arrays)

In [39]:
#| export
def segment_and_convert_to_3d(orbits: Dict[int, np.ndarray],  # Dictionary of orbits with numerical keys.
                              segment_length: int             # Desired length of each segment.
                             ) -> Tuple[np.ndarray,           # 3D numpy array of segments.
                                        List[int]]:           # List of IDs representing each new segment.
    """
    Divide each orbit into segments of a given length and convert to a 3D numpy array.
    """
    import numpy as np
    
    # Initialize a list to store the segments and their corresponding IDs
    segments = []
    segment_ids = []

    # Iterate over each orbit in the dictionary
    for key, orbit in orbits.items():
        # Determine the number of complete segments that can be taken from the orbit
        num_segments = orbit.shape[1] // segment_length

        # Iterate over the number of complete segments
        for i in range(num_segments):
            # Take the segment of the desired length
            segment = orbit[:, i*segment_length:(i+1)*segment_length]

            # Add the segment to the list
            segments.append(segment)

            # Add the corresponding ID to the list
            segment_ids.append(key)

    # Convert the list of segments to a 3D numpy array
    segments_3d = np.stack(segments)

    return segments_3d, segment_ids

## Add Time Vector

In [40]:
#| export
def add_time_vector_to_orbits(orbits: Dict[int, np.ndarray],  # Dictionary of orbits with numerical keys.
                              propagated_periods: List[float], # List of propagated periods for each orbit.
                              periods: List[float]            # List of periods for each orbit.
                             ) -> Dict[int, np.ndarray]:      # Dictionary of updated orbits with time vectors added.
    """
    Add a time vector to each orbit in the dictionary.
    """
    # Create a new dictionary to store the updated orbits
    updated_orbits = {}

    # Iterate over each orbit in the dictionary
    for key, orbit in orbits.items():
        # Extract the propagated_periods and period for this orbit using the key as index
        propagated_period = propagated_periods[key]
        period = periods[key]

        # Compute the new time vector
        tvec = np.linspace(0, propagated_period * period, orbit.shape[1])

        # Add the time vector as the first vector in the orbit array
        updated_orbit = np.vstack([tvec, orbit])

        # Add the updated orbit to the new dictionary
        updated_orbits[key] = updated_orbit

    return updated_orbits

In [41]:
#| hide
import nbdev; nbdev.nbdev_export()