In [None]:
import pandas as pd
import numpy as np

import json

In [None]:
# load data
df = pd.read_hdf('sat-data.h5')
df

In [None]:
convert_dict = {
    "NORAD_CAT_ID": "object",
    "EPOCH": "datetime64",
    "SEMIMAJOR_AXIS": "float64",
    "ECCENTRICITY": "float64",
    "INCLINATION": "float64",
    "RA_OF_ASC_NODE": "float64",
    "ARG_OF_PERICENTER": "float64",
    "MEAN_ANOMALY": "float64"
}

# Convert columns using the dictionary
df = df.astype(convert_dict, errors='ignore')

# Print the data types of all columns
print(df.dtypes)

In [None]:
df[df['NORAD_CAT_ID'] == '8832']

In [None]:
def deduplicate_by_date(
    data: pd.DataFrame,
    group_by_id_col: str,
    date_col: str,
    sort_ascending: bool = False,
) -> pd.DataFrame:
    """
    Deduplicates data by keeping the most recent entry for each group based on date.

    Args:
        data: A pandas DataFrame containing the data.
        group_by_id_col: The column to group the data by (e.g., satellite ID).
        date_col: The column containing the date information for sorting and deduplication.
        sort_ascending: Whether to sort in ascending order (False for most recent first).

    Returns:
        A pandas DataFrame containing the deduplicated data (copy of the input).
    """

    # Operate on a copy to avoid modifying original data
    data_copy = data.copy()

    # Extract the date part from the date column (assuming format YYYY-MM-DD)
    data_copy['date'] = pd.to_datetime(data_copy[date_col]).dt.date

    return (
        data_copy.groupby([group_by_id_col, 'date'])
        .apply(lambda x: x.sort_values(date_col, ascending=sort_ascending).head(1))
        .drop(columns='date')
        .reset_index(drop=True)  # Restructure for consistency
    )

# Example usage
deduplicated_data = deduplicate_by_date(df.copy(), 'NORAD_CAT_ID', 'EPOCH', sort_ascending=False)


In [None]:
deduplicated_data[deduplicated_data['NORAD_CAT_ID'] == '8832']

In [None]:
deduplicated_data

In [None]:
def reshape_by_ws(data: pd.DataFrame, w: int = 20, step: int = 1) -> pd.DataFrame:
  """
  Reshapes time series data into segments using the Windowing Sliding (WS) method.

  Args:
    data: A pandas DataFrame containing the time series data with columns:
      - NORAD_CAT_ID: The NORAD Catalog ID (string).
      - EPOCH: The epoch time (datetime).
      - SEMIMAJOR_AXIS, ECCENTRICITY, INCLINATION, RA_OF_ASC_NODE, ARG_OF_PERICENTER, MEAN_ANOMALY: Orbital elements (numerical).
    w: The slicing width (window size) of each segment.
    step: The slicing step length for the window sliding (default=1).

  Returns:
    A pandas DataFrame containing the reshaped data with segments as rows. Each segment
    contains columns for NORAD_CAT_ID, all orbital elements, and a segment ID.

  Raises:
    ValueError: If the slicing width (w) is less than 1.
  """

  if w < 1:
      raise ValueError("Slicing width (w) must be greater than or equal to 1")

  def segment_data(group):
    """Segments data for a single NORAD_CAT_ID group."""
    # Sort by epoch (ensure data is ordered chronologically)
    group = group.sort_values(by='EPOCH')

    n = len(group)
    # Calculate number of segments (s) using integer division
    s = n // w

    # Create list to store segments with segment ID
    segments = []
    for i in range(s):
      start_idx = i * step
      end_idx = min(start_idx + w, n)

      # Extract segment data and create segment ID
      segment_id = i
      segment = group[['NORAD_CAT_ID'] + list(group.columns[1:])].iloc[start_idx:end_idx]
      segment['segment_id'] = segment_id

      segments.append(segment)

    segment.groupby('segment_id')
    
    return pd.concat(segments)

  # 1. Group data by NORAD_CAT_ID
  grouped_data = data.groupby('NORAD_CAT_ID')
  # 1.1. Filter groups with less than window size (w) data points
  filtered_data = grouped_data.filter(lambda x: len(x) >= w)

  # 2. group the filtered data
  grouped_filtered_data = filtered_data.groupby('NORAD_CAT_ID')
  # 3. Apply segment_data function to each filtered group, grouping by segment ID
  reshaped_data = grouped_filtered_data.apply(segment_data).reset_index(drop=True)

  # drop segment_id column
  return reshaped_data

reshaped_data = reshape_by_ws(deduplicated_data, w=5, step=1)


In [None]:
reshaped_data.dtypes

In [None]:
def normalize_data(data: pd.DataFrame) -> pd.DataFrame:
  """
  Normalizes the segment_vector column in a DataFrame using z-score normalization.

  Args:
    data: A pandas DataFrame containing columns:
      - segment_id: The segment ID (string).
      - segment_vector: A vector containing the six orbital elements (numpy array).

  Returns:
    A pandas DataFrame with the same columns but the segment_vector normalized.
  """

  # Get the segment vectors
  segment_vectors = data[['SEMIMAJOR_AXIS', 'ECCENTRICITY', 'INCLINATION', 'RA_OF_ASC_NODE', 'ARG_OF_PERICENTER', 'MEAN_ANOMALY']]

  # Normalize each vector using z-score (mean 0, standard deviation 1)
  from sklearn.preprocessing import StandardScaler
  scaler = StandardScaler()
  normalized_vectors = scaler.fit_transform(segment_vectors)
  normalized_vectors = pd.DataFrame(normalized_vectors, columns=segment_vectors.columns)
  print(normalized_vectors)

  # Replace the segment_vector column with the normalized vectors
  # data = data.join(normalized_vectors[normalized_vectors.columns.difference(data.columns)])
  data = pd.concat([normalized_vectors, data[data.columns.difference(normalized_vectors.columns)]], axis=1)


  return data

normalized_data = normalize_data(reshaped_data.copy())

In [None]:
def label_data(data: pd.DataFrame) -> pd.DataFrame:
  """
  Labels data segments with one-hot encoded maneuver categories based on NORAD_CAT_ID.

  Args:
    data: A pandas DataFrame containing columns:
      - NORAD_CAT_ID: The NORAD Catalog ID (string).
      - segment_id: The segment ID (string).
      - segment_vector: A vector containing the six normalized orbital elements (numpy array).

  Returns:
    A pandas DataFrame with additional columns:
      - maneuver_category: The maneuver category string (e.g., "EW").
      - maneuver_label: The one-hot encoded maneuver category (list of 3 integers).
  """
  # import json
  # Load the maneuver categories from a JSON file
  with open('category-map.json', 'r') as f:
    MANEUVER_CATEGORIES = json.load(f)
  cat_to_np = {
    "EW": np.array([1, 0, 0]),
    "EW-NS": np.array([0, 1, 0]),
    "NM": np.array([0, 0, 1]),
  }
  category_map = {}
  
  for key, value in MANEUVER_CATEGORIES.items():
    for cat_id in value:
      category_map[cat_id] = cat_to_np[key]
    
  # Assuming you have a function (or external data) to get maneuver categories by NORAD_CAT_ID
  def get_maneuver_category(cat_id):



    return category_map.get(cat_id, 'Unknown')

  data['maneuver_category'] = data['NORAD_CAT_ID'].apply(get_maneuver_category)

  return data

labeled_data = label_data(normalized_data.copy())

In [None]:
labeled_data

In [None]:
labeled_data[(labeled_data['NORAD_CAT_ID'] == '8832') & (labeled_data['segment_id'] == 0)].drop(columns=['NORAD_CAT_ID', 'segment_id', 'maneuver_category']).to_numpy()

In [None]:
fdf = labeled_data
fnp = []
fnpy = []
# fnp = np.array([])
id_list = fdf['NORAD_CAT_ID'].unique()


for id in id_list:
    fdf[fdf['NORAD_CAT_ID'] == id]
    segment_list = fdf[fdf['NORAD_CAT_ID'] == id]['segment_id'].unique()
    for segment in segment_list:
        fnp.append(fdf[(fdf['NORAD_CAT_ID'] == id) & (fdf['segment_id'] == segment)].drop(columns=['NORAD_CAT_ID', 'segment_id', 'maneuver_category', 'EPOCH']).to_numpy())
        fnpy.append(fdf[(fdf['NORAD_CAT_ID'] == id) & (fdf['segment_id'] == segment)]['maneuver_category'].to_numpy() )
        print(fdf[(fdf['NORAD_CAT_ID'] == id) & (fdf['segment_id'] == segment)]['maneuver_category'].to_numpy())
        
        
        # print(fdf[(fdf['NORAD_CAT_ID'] == id) & (fdf['segment_id'] == segment)]['maneuver_category'])
        # fnp = np.append(fnp, fdf[(fdf['NORAD_CAT_ID'] == id) & (fdf['segment_id'] == segment)].drop(columns=['NORAD_CAT_ID', 'segment_id', 'maneuver_category', 'EPOCH']).to_numpy())

# print(fnp)

In [None]:
fnp = np.array(fnp)
np.save('sat_data.npy', fnp)
fnpy = np.array(fnpy)
np.save('sat-labels.npy', fnpy)
