## Import libraries and dataset

In [1]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# project root = two levels up from this notebook
PROC_DIR = Path.cwd().parents[1] / "data" / "01_processed" / "elset_history_aodr"
print(PROC_DIR)

c:\Users\ash\Desktop\wid-datathon\data\01_processed\elset_history_aodr


In [2]:
# select first of each month Jan–Aug 2025
days = [f"2025-{m:02d}-01" for m in range(1, 9)]
paths = [PROC_DIR / f"epoch_date={d}" for d in days]

df = pd.concat([pd.read_parquet(p) for p in paths], ignore_index=True)
df.head()

Unnamed: 0,algorithm,apogee,argOfPerigee,bStar,classificationMarking,createdAt,createdBy,eccentricity,source,semiMajorAxis,...,inclination,idOnOrbit,idElset,epoch,agom,ballisticCoeff,uct,origin,origObjectId,ephemType
0,SGP4,10202.213,259.3559,0.000943,U,2025-01-02 00:05:10.959000+00:00,system.ob-ingest,0.184317,18th SPCS,8614.424,...,34.2561,5,,2025-01-01 01:49:54.598368+00:00,,,,,,
1,SGP4,10202.217,262.2564,0.000996,U,2025-01-02 06:05:08.465000+00:00,system.ob-ingest,0.184319,18th SPCS,8614.417,...,34.2564,5,,2025-01-01 17:17:08.960640+00:00,,,,,,
2,SGP4,9286.717,275.265,0.001633,U,2025-01-01 23:05:08.691000+00:00,system.ob-ingest,0.145307,18th SPCS,8108.493,...,32.8795,11,,2025-01-01 19:06:31.800960+00:00,,,,,,
3,SGP4,9673.868,173.3927,0.000874,U,2025-01-01 23:05:08.697000+00:00,system.ob-ingest,0.165118,18th SPCS,8302.909,...,32.9011,12,,2025-01-01 19:47:07.123200+00:00,,,,,,
4,SGP4,10594.496,34.2347,0.000585,U,2025-01-02 00:05:10.967000+00:00,system.ob-ingest,0.202279,18th SPCS,8812.011,...,34.2656,16,,2025-01-01 06:18:04.587840+00:00,,,,,,


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333598 entries, 0 to 333597
Data columns (total 30 columns):
 #   Column                 Non-Null Count   Dtype              
---  ------                 --------------   -----              
 0   algorithm              333574 non-null  object             
 1   apogee                 333574 non-null  float64            
 2   argOfPerigee           333598 non-null  float64            
 3   bStar                  333578 non-null  float64            
 4   classificationMarking  333598 non-null  string             
 5   createdAt              333598 non-null  datetime64[us, UTC]
 6   createdBy              333598 non-null  string             
 7   eccentricity           333598 non-null  float64            
 8   source                 333598 non-null  string             
 9   semiMajorAxis          333574 non-null  float64            
 10  satNo                  333595 non-null  Int64              
 11  revNo                  333574 non-null 

## Preprocessing

In [4]:
# view rows with non-nulls in columns 24-29
cols = df.columns[24:30]  

mask_any = df[cols].notna().any(axis=1)
print(df.loc[mask_any, ['epoch', *cols]].to_string(index=False))


                           epoch      agom  ballisticCoeff   uct origin origObjectId  ephemType
2025-06-01 23:31:26.698368+00:00 -0.071285            0.01 false    DnD         2653        4.0
2025-06-01 23:53:01.665024+00:00  0.000000            0.00 false    DnD        20738        4.0
2025-06-01 23:20:47.330592+00:00  0.000000            0.00 false    DnD        30797        4.0
2025-06-01 00:42:28.727424+00:00 -0.049333            0.01 false    DnD          858        4.0
2025-06-01 06:38:04.380576+00:00  0.000000            0.00 false    DnD         7665        4.0
2025-06-01 09:32:11.224032+00:00  0.000000            0.00 false    DnD        16141        4.0
2025-06-01 18:20:48.742656+00:00  0.000000            0.00 false    DnD        18334        4.0
2025-06-01 06:20:49.874496+00:00  0.000000            0.00 false    DnD        20739        4.0
2025-06-01 02:00:05.084352+00:00  0.000000            0.00 false    DnD        21281        4.0
2025-06-01 08:17:13.270272+00:00  0.0000

In [5]:
# drop columns
df = df.drop(columns=['idElset',
                      'agom', 
                      'ballisticCoeff',
                      'uct', 
                      'origin',
                      'origObjectId',
                      'ephemType'], errors='ignore')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333598 entries, 0 to 333597
Data columns (total 23 columns):
 #   Column                 Non-Null Count   Dtype              
---  ------                 --------------   -----              
 0   algorithm              333574 non-null  object             
 1   apogee                 333574 non-null  float64            
 2   argOfPerigee           333598 non-null  float64            
 3   bStar                  333578 non-null  float64            
 4   classificationMarking  333598 non-null  string             
 5   createdAt              333598 non-null  datetime64[us, UTC]
 6   createdBy              333598 non-null  string             
 7   eccentricity           333598 non-null  float64            
 8   source                 333598 non-null  string             
 9   semiMajorAxis          333574 non-null  float64            
 10  satNo                  333595 non-null  Int64              
 11  revNo                  333574 non-null 

## Orbit size and shape

The [European Space Agency](https://www.esa.int/Enabling_Support/Space_Transportation/Types_of_orbits) classifies orbits into the following types:

1. Low Earth Orbit (LEO) - comms and remote sensing systems

    Altitude is under 2000 km.

2. Medium Earth Orbit (MEO) - navigation, U.S. GPS

    Altitude range is between LEO and GEO.

3. Geostationary (GEO) Orbits - telecom, weather, and Earth observation

    The orbital speed of GSO objects match the Earth's rotation (23 hours, 56 minutes, 4 seconds) and have an altitude of ~35,786 km.

4. Highly Elliptical Orbit (HEO) - comms, radio
    
    An HEO is oblong, with one end nearer the Earth and other more distant. The eccentricity is greater than 0.1 and the apogee > GEO.

### Altitude vs. radius of perigee

Perigee: point in the orbit at which object is nearest to the Earth
Radius of perigee: distance from Earth's center to object at perigee

Altitude depends on which radius of Earth used: equatorial (6378.137 km) or mean (6371.0 km).
Since Earth has an oblate spheroid shape, the choice can shift values by 10–20+ km. 

In [6]:
# TASK: classify objects by orbit

# constants and thresholds
R_EARTH = 6_378.137             # equatorial radius of Earth

GEO_ALT = 35_786.0              # km above Earth's surface
GEO_TOL = 200.0                 # ± band
GEO_RAD = R_EARTH + GEO_ALT     # ~42,164 km

LEO_MAX_RP = R_EARTH + 2_000.0  # RP: radius of perigee
MEO_MIN_RP = R_EARTH + 2_000.0
MEO_MAX_RP = R_EARTH + 35_000.0

In [7]:
# masks

geo = (
    df["perigee"].between(GEO_RAD - GEO_TOL, GEO_RAD + GEO_TOL)
    & df["apogee"].between(GEO_RAD - GEO_TOL, GEO_RAD + GEO_TOL)
) # both perigee and apogee lie in the GEO radius band (42,164 ± 200 km)

heo = (df["eccentricity"] > 0.1) & (df["apogee"] > GEO_RAD + GEO_TOL)  
    # eccentric (e > 0.1) with apogee beyond GEO

leo = df["perigee"] < LEO_MAX_RP
    # altitude less than 2000 km

meo = (df["perigee"] >= MEO_MIN_RP) & (df["perigee"] <= MEO_MAX_RP)
    # altitude between 2000 and 35000

In [8]:
df["orbitType"] = np.select(
    [geo, heo, leo, meo],
    ["GEO", "HEO", "LEO", "MEO"],
    default="Other"  # e.g., supersynchronous circular, missing data, etc.
)

df["orbitType"] = pd.Categorical(
    df["orbitType"],
    categories=["LEO","MEO","GEO","HEO","Other"],
    ordered=True
)

# check
print(df["orbitType"].value_counts(dropna=False))

orbitType
LEO      305823
GEO       10973
Other      7562
MEO        6358
HEO        2882
Name: count, dtype: int64


In [9]:
# orbitType by month/day

monthly_counts = (
    df.set_index('epoch')                           # tz-aware is fine
      .groupby('orbitType', observed=True)          # set observed explicitly
      .resample('MS')                               # monthly bins at Month Start
      .size()
      .unstack(0)                                   # columns = orbit types
      .fillna(0)
      .astype('int64')
)

monthly_counts = (
    monthly_counts
    .rename_axis(index='month', columns='orbitType')
    .rename(index=lambda x: x.strftime('%Y-%m-%d'))
)
print(monthly_counts)

orbitType     LEO  MEO   GEO  HEO  Other
month                                   
2025-01-01  30292  752  1464  364   1055
2025-02-01  34645  616  1193  291    759
2025-03-01  40600  811  1439  402    886
2025-04-01  47200  991  1608  414   1094
2025-05-01  21482  880   863  381    758
2025-06-01  50076  964  1884  393   1245
2025-07-01  45277  785  1734  356   1136
2025-08-01  36251  559   788  281    629


  .size()
