# statistics

> Necessary scripts to read orbits from different formats

In [1]:
#| default_exp stats

In [2]:
#| export
import numpy as np

# What is an orbit?

In [3]:
#| export
def select_and_remove(data, select_n):
    """
    Select elements from the first dimension of the data and remove the first index
    from the second dimension.
    
    Parameters:
    - data: A 3D NumPy array.
    - select_n: An integer specifying the number of random elements to select from
                the first dimension, or a list of specific indices to select.
    
    Returns:
    - A new array with the selected elements and without the first index in the
      second dimension.
    """
    
    # Check if select_n is an integer (for random selection)
    if isinstance(select_n, int):
        indices = np.random.choice(data.shape[0], select_n, replace=False)
    elif isinstance(select_n, list):
        indices = np.array(select_n)
    else:
        raise ValueError("select_n must be an integer or a list of integers")
    
    # Selecting the data based on indices
    selected_data = data[indices]
    
    # Removing the time dimension
    reshaped_data = np.delete(selected_data, 0, axis=1)
    
    return reshaped_data

In [4]:
#| export
def calculate_statistics_by_orbit(orbits):
    """
    Calculate the min, mean, max, 25%, 50%, and 75% statistics for each scalar 
    (posx, posy, posz, velx, vely, velz) across all time instants for each orbit.

    Parameters:
    - orbits: numpy array of shape [number_of_orbits, 6, number_of_time_instants]

    Returns:
    - stats: A dictionary where keys are orbit indices and values are dictionaries
      with stats ('min', 'mean', 'max', '25%', '50%', '75%') for each scalar.
    """
    stats = {}
    for orbit_index in range(orbits.shape[0]):
        orbit_stats = {}
        for scalar_index, scalar_name in enumerate(['posx', 'posy', 'posz', 'velx', 'vely', 'velz']):
            scalar_data = orbits[orbit_index, scalar_index, :]
            orbit_stats[scalar_name] = {
                'min': np.min(scalar_data),
                'mean': np.mean(scalar_data),
                'max': np.max(scalar_data),
                '25%': np.percentile(scalar_data, 25),
                '50%': np.median(scalar_data), 
                '75%': np.percentile(scalar_data, 75)
            }
        stats[orbit_index] = orbit_stats
    
    return stats

In [5]:
#| export
def calculate_overall_statistics(orbits):
    """
    Calculate the overall min, mean, max, 25%, 50%, and 75% statistics for each scalar 
    (posx, posy, posz, velx, vely, velz) across all time instants and orbits.

    Parameters:
    - orbits: numpy array of shape [number_of_orbits, 6, number_of_time_instants]

    Returns:
    - stats: A dictionary with stats ('min', 'mean', 'max', '25%', '50%', '75%') for each scalar.
    """
    stats = {}
    for scalar_index, scalar_name in enumerate(['posx', 'posy', 'posz', 'velx', 'vely', 'velz']):
        scalar_data = orbits[:, scalar_index, :].flatten()  # Flatten to treat all orbits' data as a single array
        stats[scalar_name] = {
            'min': np.min(scalar_data),
            'mean': np.mean(scalar_data),
            'max': np.max(scalar_data),
            '25%': np.percentile(scalar_data, 25),
            '50%': np.median(scalar_data),  # Median is equivalent to the 50th percentile
            '75%': np.percentile(scalar_data, 75)
        }
    
    return stats

In [6]:
#| export
def foo(): pass

In [7]:
#| hide
import nbdev; nbdev.nbdev_export()