# Processing

> Necessary scripts to read orbits from different formats

In [1]:
#| default_exp processing

In [2]:
#| export
#| hide
from scipy.interpolate import interp1d
import numpy as np
from typing import Tuple, Any, List, Dict
from scipy.stats import kendalltau

In [3]:
#| hide
from fastcore.test import test_eq

## Resampling

### Downsample

In [4]:
#| export   
def downsample_3d_array(data: np.ndarray,     # The original 3D array to be downsampled.
                        axis: int,            # The axis along which to perform the downsampling.
                        hop: int = None,      # The interval at which to keep elements.
                        target_size: int = None  # The target size for the specified axis.
                       ) -> np.ndarray:
    """
    Downsample a 3D numpy array along a specified axis by keeping only every hop-th element or 
    to a target size.
    """
    if axis not in [0, 1, 2]:  # Validate the axis to ensure it's within the correct range.
        raise ValueError("Invalid axis. Axis must be 0, 1, or 2.")

    if hop is not None and hop < 1:
        raise ValueError("Hop must be a positive integer greater than or equal to 1.")

    if target_size is not None and (target_size < 1 or target_size > data.shape[axis]):
        raise ValueError("Target size must be a positive integer and less than or equal to the size of the axis.")
    
    if hop is not None:
        # Create slices for each axis
        slices = [slice(None)] * 3
        slices[axis] = slice(None, None, hop)
        # Use the slices to downsample the array
        downsampled_data = data[tuple(slices)]
    
    elif target_size is not None:
        # Calculate the hop based on the target size
        original_size = data.shape[axis]
        hop = max(original_size // target_size, 1)
        slices = [slice(None)] * 3
        slices[axis] = slice(None, None, hop)
        downsampled_data = data[tuple(slices)]
        # Adjust if the resulting size does not match the target size due to rounding
        if downsampled_data.shape[axis] != target_size:
            indices = np.round(np.linspace(0, downsampled_data.shape[axis] - 1, target_size)).astype(int)
            downsampled_data = np.take(downsampled_data, indices, axis=axis)
    
    else:
        raise ValueError("Either hop or target_size must be specified.")
    
    return downsampled_data

In [5]:
#| hide
#| test downsample_3d_array

# Original 3D array
data = np.array([
    [[1, 2], [3, 4]],
    [[5, 6], [7, 8]],
    [[9, 10], [11, 12]],
    [[13, 14], [15, 16]]
])

# Downsampling from 4 to 2 along the first axis
target_size = 2

# Perform downsampling
downsampled_data = downsample_3d_array(data, axis=0, target_size=target_size)

# Expected results by selecting every 2nd slice
expected_data = np.array([
    [[1, 2], [3, 4]],   # 1st slice
    [[9, 10], [11, 12]] # 3rd slice
])

# Check the downsampled data against expected data
test_eq(downsampled_data, expected_data)

### Interpolation

In [6]:
#| export
def resample_3d_array(data: np.ndarray,  # The original 3D array to be resampled.
                      axis: int,         # The axis along which to perform the interpolation.
                      target_size: int   # The new size of the axis after resampling.
                     ) -> np.ndarray:
    """
    Resample a 3D numpy array along a specified axis using linear interpolation.
    """
    if axis not in [0, 1, 2]:  # Validate the axis to ensure it's within the correct range.
        raise ValueError("Invalid axis. Axis must be 0, 1, or 2.")

    old_indices = np.linspace(0, 1, num=data.shape[axis])  # Calculate old indices for interpolation.
    new_indices = np.linspace(0, 1, num=target_size)       # New indices for the target size.

    new_shape = list(data.shape)  # Define the shape of the new data array.
    new_shape[axis] = target_size
    new_data = np.empty(new_shape, dtype=data.dtype)
    
    # Perform interpolation for each slice of the array along the specified axis.
    if axis == 0:
        for i in range(data.shape[1]):
            for j in range(data.shape[2]):
                interpolator = interp1d(old_indices, data[:, i, j], kind='linear')
                new_data[:, i, j] = interpolator(new_indices)
    elif axis == 1:
        for i in range(data.shape[0]):
            for j in range(data.shape[2]):
                interpolator = interp1d(old_indices, data[i, :, j], kind='linear')
                new_data[i, :, j] = interpolator(new_indices)
    else:  # axis == 2
        for i in range(data.shape[0]):
            for j in range(data.shape[1]):
                interpolator = interp1d(old_indices, data[i, j, :], kind='linear')
                new_data[i, j, :] = interpolator(new_indices)

    return new_data

In [7]:
#| hide
#| test resample_3d_array

# Original 3D array
data = np.array([
    [[1, 2], [3, 4]],
    [[5, 6], [7, 8]],
    [[9, 10], [11, 12]],
    [[13, 14], [15, 16]]
])

# Downsampling from 4 to 2 along the first axis
target_size = 3

# Perform resampling
resampled_data = resample_3d_array(data, axis=0, target_size=target_size)

# Expected results by true linear interpolation
expected_data = np.array([
    [[1, 2], [3, 4]],  # 1st slice
    [[7, 8], [9, 10]],  # Interpolation between 2nd and 3rd slices (mean in this case)
    [[13, 14], [15, 16]]  # 4st slice
])
# Check the resampled data against expected data
test_eq(resampled_data, expected_data)

In [8]:
#| hide
#| test resample_3d_array
def test_resample_3d_array():
    # Simulate get_example_orbit_data() by creating a 3D array with a predictable gradient
    x = np.linspace(0, 1, 200)
    y = np.linspace(0, 1, 6)
    z = np.linspace(0, 1, 300)
    data = np.meshgrid(x, y, z, indexing='ij')
    data = np.array(data).sum(axis=0)

    # Target new size for the axis
    target_size = 100  # example target size for the test

    # Test each axis
    for axis in range(3):
        # Resample the array
        resampled_data = resample_3d_array(data, axis, target_size)

        # Check the shape of the output
        expected_shape = list(data.shape)
        expected_shape[axis] = target_size
        test_eq(resampled_data.shape, tuple(expected_shape))

        # Verify the correctness of the interpolation by using more direct interpolation checks
        original_indices = np.linspace(0, data.shape[axis] - 1, data.shape[axis])
        new_indices = np.linspace(0, data.shape[axis] - 1, target_size)
        for i in new_indices:
            original_slice = np.take(data, indices=int(np.round(i)), axis=axis)
            interpolated_slice = np.take(resampled_data, indices=int(np.round((i / (data.shape[axis] - 1)) * (target_size - 1))), axis=axis)
            # Verify that the mean of the interpolated slice is close to the original slice mean within a tolerance
            test_eq(np.isclose(np.mean(interpolated_slice), np.mean(original_slice), atol=0.1), True)

# Invoke the test
test_resample_3d_array()

### Average

In [9]:
#| export
def average_downsample_3d_array(data: np.ndarray,  # The original 3D array to be downsampled.
                                axis: int,         # The axis along which to perform the downsampling (0, 1, or 2).
                                target_size: int   # The desired size of the specified axis after downsampling.
                               ) -> np.ndarray:
    """
    Downsample a 3D numpy array along a specified axis using averaging.
    """
    # Validate the axis to ensure it's within the correct range.
    if axis not in [0, 1, 2]:
        raise ValueError("Invalid axis. Axis must be 0, 1, or 2.")

    # Calculate the number of elements in each block that will be averaged.
    original_size = data.shape[axis]
    block_size = original_size / target_size

    # Define the shape of the new, downsampled data array.
    new_shape = list(data.shape)
    new_shape[axis] = target_size
    new_data = np.empty(new_shape, dtype=data.dtype)

    # Perform averaging along the specified axis.
    if axis == 0:
        for i in range(target_size):
            start_idx = int(i * block_size)
            end_idx = int((i + 1) * block_size)
            new_data[i, :, :] = np.mean(data[start_idx:end_idx, :, :], axis=0)  # Average blocks along the 0th axis.
    elif axis == 1:
        for i in range(target_size):
            start_idx = int(i * block_size)
            end_idx = int((i + 1) * block_size)
            new_data[:, i, :] = np.mean(data[:, start_idx:end_idx, :], axis=1)  # Average blocks along the 1st axis.
    else:  # axis == 2
        for i in range(target_size):
            start_idx = int(i * block_size)
            end_idx = int((i + 1) * block_size)
            new_data[:, :, i] = np.mean(data[:, :, start_idx:end_idx], axis=2)  # Average blocks along the 2nd axis.

    return new_data

In [10]:
#| hide
#| test average_downsample_3d_array
def test_average_downsample_3d_array():
    # Create a simple 3D array with shape (4, 2, 2)
    # Each element in the z-dimension is the same to make averaging predictable
    data = np.array([
        [[3, 0.1], [2, 5]],
        [[1, 0.1], [2, 2]],
        [[0.3, 3], [4, 4]],
        [[0.2, 3], [4, 6]]
    ])

    # Target new size for the axis 0 is 2
    target_size = 2

    # Perform averaging along axis 0
    downsampled_data = average_downsample_3d_array(data, axis=0, target_size=target_size)

    # Manually calculate expected results
    expected_data = np.array([
        [[2, 0.1], [2, 3.5]],  # Average of the first two and the last two blocks along axis 0
        [[0.25, 3], [4, 5]]
    ])
    
    # Check that the downsampled data matches the expected data
    test_eq(downsampled_data, expected_data)

# Invoke the test
test_average_downsample_3d_array()

## Reorder Orbit with Time

In [11]:
#| export
def reorder_orbits(orbit_dataset: np.ndarray
                  ) -> Tuple[np.ndarray,      # 3D numpy array of reordered orbits.
                             np.ndarray,        # 2D numpy array of metric values.
                             List[str]]:        # List of metric labels.
    """
    Reorders the time steps of each orbit in the dataset such that the time values are always incrementally increasing.
    Returns the reordered dataset, a 2D array of metric values for each orbit, and a list of metric labels.
    """
    num_orbits, num_scalars, num_timesteps = orbit_dataset.shape
    reordered_dataset = np.zeros_like(orbit_dataset)
    metrics_array = np.zeros((num_orbits, 4))  # Assuming four metrics
    metric_labels = ['disorder_metric', 'correct_order', 'inversions', 'kendall_tau_distance']
    
    for i in range(num_orbits):
        # Extract the time steps and corresponding data for the current orbit
        orbit_data = orbit_dataset[i]
        time_steps = orbit_data[0]
        
        # Calculate the disorder metric for the current orbit
        sorted_indices = np.argsort(time_steps)
        disorder_metric = np.sum(np.abs(sorted_indices - np.arange(len(time_steps))))
        correct_order = np.sum(np.diff(time_steps) >= 0)
        
        # Calculate the number of inversions
        inversions = sum(1 for j in range(num_timesteps) for k in range(j + 1, num_timesteps) if time_steps[j] > time_steps[k])
        
        # Calculate Kendall's tau distance
        tau, _ = kendalltau(time_steps, np.sort(time_steps))
        kendall_tau_distance = 1 - tau if not np.isnan(tau) else 1.0  # Handle NaN
        
        # Store the metrics in the array
        metrics_array[i] = [disorder_metric, correct_order, inversions, kendall_tau_distance]
        
        # Reorder the orbit data based on the sorted indices
        reordered_orbit_data = orbit_data[:, sorted_indices]
        
        # Store the reordered orbit data in the new dataset
        reordered_dataset[i] = reordered_orbit_data
    
    return reordered_dataset, metrics_array, metric_labels

In [12]:
#| hide
#| test reorder_orbits

data = np.array([[[4, 3, 2, 1],  # Time steps for orbit 1
                  [0, 0, 0, 0],  # posx
                  [0, 0, 0, 0],  # posy
                  [0, 0, 0, 0],  # posz
                  [0, 0, 0, 0],  # velx
                  [0, 0, 0, 0],  # vely
                  [0, 0, 0, 0]], # velz

                  [[4, 3, 2, 1],  # Time steps for orbit 2
                  [0, 0, 0, 0],  # posx
                  [0, 0, 0, 0],  # posy
                  [0, 0, 0, 0],  # posz
                  [0, 0, 0, 0],  # velx
                  [0, 0, 0, 0],  # vely
                  [0, 0, 0, 0]]]) # velz

expected_data = np.array([[[1, 2, 3, 4],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0]],

                          [[1, 2, 3, 4],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0],
                            [0, 0, 0, 0]]])

# Reorder the data
reordered_data, metrics_array, metric_labels = reorder_orbits(data)

# Aggregating the metrics to obtain average values
average_metrics = metrics_array.mean(axis=0)
average_metrics_dict = dict(zip(metric_labels, average_metrics))

# Check that the reordered data matches the expected data
assert np.array_equal(reordered_data, expected_data), f"Expected {expected_data}, but got {reordered_data}"

# Check the average disorder metric
expected_avg_disorder_metric = 8.0  # The disorder metric for each orbit is 8, so the average is also 8
assert np.isclose(average_metrics_dict['disorder_metric'], expected_avg_disorder_metric), f"Expected {expected_avg_disorder_metric}, but got {average_metrics_dict['disorder_metric']}"

# Check the average correct order
expected_avg_correct_order = 0.0  # None of the time steps were originally in order
assert np.isclose(average_metrics_dict['correct_order'], expected_avg_correct_order), f"Expected {expected_avg_correct_order}, but got {average_metrics_dict['correct_order']}"

# Check the average number of inversions
expected_avg_inversions = 6.0  # There are 6 inversions in each orbit
assert np.isclose(average_metrics_dict['inversions'], expected_avg_inversions), f"Expected {expected_avg_inversions}, but got {average_metrics_dict['inversions']}"

# Check the average Kendall's tau distance
expected_avg_kendall_tau = 2.0  # Complete disagreement for both orbits
assert np.isclose(average_metrics_dict['kendall_tau_distance'], expected_avg_kendall_tau), f"Expected {expected_avg_kendall_tau}, but got {average_metrics_dict['kendall_tau_distance']}"

## Reshaping Arrays

In [13]:
#| export
def pad_and_convert_to_3d(orbits: Dict[int, np.ndarray],     # Dictionary of orbits with numerical keys.
                          timesteps: int                     # Desired number of timesteps.
                         ) -> np.ndarray:                    # 3D numpy array of padded orbits.
    """
    Truncate and pad each orbit to a uniform length and convert to a 3D numpy array.
    """
    # Initialize a list to store the padded arrays
    padded_arrays = []

    # Iterate over each orbit in the dictionary
    for key, orbit in orbits.items():
        # Determine the number of timesteps to take from the orbit
        num_timesteps = min(timesteps, orbit.shape[1])

        # Take the first num_timesteps from the orbit
        truncated_orbit = orbit[:, :num_timesteps]

        # Pad the truncated orbit to have length timesteps in the final dimension
        padded_orbit = np.pad(truncated_orbit, ((0, 0), (0, timesteps - num_timesteps)))

        # Add the padded orbit to the list
        padded_arrays.append(padded_orbit)

    # Convert the list of padded arrays to a 3D numpy array and return it
    return np.stack(padded_arrays)

In [14]:
#| export
def segment_and_convert_to_3d(orbits: Dict[int, np.ndarray],  # Dictionary of orbits with numerical keys.
                              segment_length: int             # Desired length of each segment.
                             ) -> Tuple[np.ndarray,           # 3D numpy array of segments.
                                        List[int]]:           # List of IDs representing each new segment.
    """
    Divide each orbit into segments of a given length and convert to a 3D numpy array.
    """
    
    # Initialize a list to store the segments and their corresponding IDs
    segments = []
    segment_ids = []

    # Iterate over each orbit in the dictionary
    for key, orbit in orbits.items():
        # Determine the number of complete segments that can be taken from the orbit
        num_segments = orbit.shape[1] // segment_length

        # Iterate over the number of complete segments
        for i in range(num_segments):
            # Take the segment of the desired length
            segment = orbit[:, i*segment_length:(i+1)*segment_length]

            # Add the segment to the list
            segments.append(segment)

            # Add the corresponding ID to the list
            segment_ids.append(key)

    # Convert the list of segments to a 3D numpy array
    segments_3d = np.stack(segments)

    return segments_3d, segment_ids

In [15]:
#| test segment_and_convert_to_3d

orbits = {
    1: np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
                    [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
                    [25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36],
                    [37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48],
                    [49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60],
                    [61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72]]),
    2: np.array([[73, 74, 75, 76, 77, 78, 79],
                    [81, 82, 83, 84, 85, 86, 87],
                    [89, 90, 91, 92, 93, 94, 95],
                    [97, 98, 99, 100, 101, 102, 103],
                    [105, 106, 107, 108, 109, 110, 111],
                    [113, 114, 115, 116, 117, 118, 119]])
}
segment_length = 4

# Expected segments and IDs
expected_segments = np.array([
    [[1, 2, 3, 4], [13, 14, 15, 16], [25, 26, 27, 28], [37, 38, 39, 40], [49, 50, 51, 52], [61, 62, 63, 64]],
    [[5, 6, 7, 8], [17, 18, 19, 20], [29, 30, 31, 32], [41, 42, 43, 44], [53, 54, 55, 56], [65, 66, 67, 68]],
    [[9, 10, 11, 12], [21, 22, 23, 24], [33, 34, 35, 36], [45, 46, 47, 48], [57, 58, 59, 60], [69, 70, 71, 72]],
    [[73, 74, 75, 76], [81, 82, 83, 84], [89, 90, 91, 92], [97, 98, 99, 100], [105, 106, 107, 108], [113, 114, 115, 116]]
])
expected_ids = [1, 1, 1, 2]

# Call the function
segments_3d, segment_ids = segment_and_convert_to_3d(orbits, segment_length)

# Use test_eq to check the results
test_eq(segments_3d.tolist(), expected_segments.tolist())
test_eq(segment_ids, expected_ids)

## Add Time Vector

In [16]:
#| export
def add_time_vector_to_orbits(orbits: Dict[int, np.ndarray],  # Dictionary of orbits with numerical keys.
                              propagated_periods: List[float], # List of propagated periods for each orbit.
                              periods: List[float]            # List of periods for each orbit.
                             ) -> Dict[int, np.ndarray]:      # Dictionary of updated orbits with time vectors added.
    """
    Add a time vector to each orbit in the dictionary.
    """
    # Create a new dictionary to store the updated orbits
    updated_orbits = {}

    # Iterate over each orbit in the dictionary
    for key, orbit in orbits.items():
        # Extract the propagated_periods and period for this orbit using the key as index
        propagated_period = propagated_periods[key]
        period = periods[key]

        # Compute the new time vector
        tvec = np.linspace(0, propagated_period * period, orbit.shape[1])

        # Add the time vector as the first vector in the orbit array
        updated_orbit = np.vstack([tvec, orbit])

        # Add the updated orbit to the new dictionary
        updated_orbits[key] = updated_orbit

    return updated_orbits

## Interpolating Equal Times

In [22]:
#| export
def interpolate_equal_times(orbit_dataset: np.ndarray) -> np.ndarray:
    num_orbits, num_scalars, num_timesteps = orbit_dataset.shape
    processed_dataset = np.zeros_like(orbit_dataset, dtype=float)
    
    for i in range(num_orbits):
        orbit_data = orbit_dataset[i]
        time_steps = orbit_data[0]  # Extract time steps
        
        first_unequal = np.argmax(time_steps != time_steps[0])
        if first_unequal == 0:  # All values are equal
            first_unequal = len(time_steps)
        
        if first_unequal > 1:
            new_orbit_data = np.copy(orbit_data).astype(float)
            
            # Perform interpolation for the full range plus one extra step
            num_points = first_unequal + 1  # Add one more point
            if first_unequal < len(time_steps):
                interp_times = np.linspace(time_steps[0], time_steps[first_unequal], num_points)
            else:
                interp_times = np.linspace(0, 1, num_points)
            
            new_orbit_data[0, :num_points] = interp_times
            
            # Interpolate other scalar values
            for j in range(1, num_scalars):
                if first_unequal < len(time_steps):
                    f = interp1d([time_steps[0], time_steps[first_unequal]], 
                                 [orbit_data[j, 0], orbit_data[j, first_unequal]],
                                 fill_value="extrapolate")
                    new_orbit_data[j, :num_points] = f(interp_times)
                else:
                    new_orbit_data[j, :num_points] = np.linspace(orbit_data[j, 0], orbit_data[j, -1], num_points)
            
            processed_dataset[i] = new_orbit_data
        else:
            processed_dataset[i] = orbit_data.astype(float)
    
    return processed_dataset

In [23]:
# Testing with dummy data that has equal time values at the beginning
test_data = np.array([
    [0, 0, 0, 0, 0, 0, 0, 0, 1, 2],
    [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
    [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
])

# Reshape to 3D array (1 orbit with 3 scalars and 10 timesteps)
test_data = test_data.reshape(1, 3, 10)

output = interpolate_equal_times(test_data)
print(output[:,0])


[[0.    0.125 0.25  0.375 0.5   0.625 0.75  0.875 1.    2.   ]]


In [19]:
#| hide
import nbdev; nbdev.nbdev_export()