In [0]:
#Download the gps data (Note: Change the "url" for your choice of navigation satellite)
import requests

def download_gps_tle(save_path="gps-ops.tle"):
    """
    Downloads the GPS Operational TLE data from CelesTrak
    and saves it to the specified file path.
    """
    url = "https://celestrak.org/NORAD/elements/gp.php"
    params = {
        "GROUP": "gps-ops",
        "FORMAT": "tle"
    }
    response = requests.get(url, params=params)
    response.raise_for_status()  # Raises an error for bad responses (4xx, 5xx)
    
    with open(save_path, "w") as f:
        f.write(response.text)
    
    print(f"GPS Operational TLE data saved to '{save_path}'.")

if __name__ == "__main__":
    download_gps_tle()


In [0]:
#Scrubbing gps data
%python
from pyspark.sql.functions import monotonically_increasing_id, row_number, floor, col
from pyspark.sql.window import Window

# Read the TLE file as a DataFrame
tle_lines = spark.read.text(
    "/Volumes/workspace/default/spacedata/gps-ops.tle"
).withColumnRenamed("value", "line")

# Add a unique id to each row
tle_lines = tle_lines.withColumn(
    "unique_id", monotonically_increasing_id()
)

# Add a row number to group every 3 lines
window = Window.orderBy("unique_id")
tle_with_rownum = tle_lines.withColumn(
    "row_num", row_number().over(window)
)

# Assign a group id for each TLE triplet
tle_with_group = tle_with_rownum.withColumn(
    "group_id", floor((col("row_num") - 1) / 3)
)

# Pivot the lines into columns: name, line1, line2
tle_with_group = tle_with_group.withColumn(
    "line_type", ((col("row_num") - 1) % 3)
)

tle_pivoted = tle_with_group.groupBy("group_id").pivot(
    "line_type", [0, 1, 2]
).agg({"line": "first"})

# Rename columns for clarity
tle_df = tle_pivoted.select(
    col("0").alias("name"),
    col("1").alias("line1"),
    col("2").alias("line2")
)

display(tle_df)

In [0]:
#Writing the data into a csv
%python
# Save the parsed TLE DataFrame as a CSV for use in Pandas/sgp4
tle_df.write.option("header", True).mode("overwrite").csv(
    "/Volumes/workspace/default/spacedata/gps-ops.csv"
)

In [0]:
#Writing the data into a delta table
%python
tle_df.write.format("delta").mode("overwrite").saveAsTable("default.gps_ops_tle")

In [0]:
%python
%pip install sgp4

In [0]:
%python
import pandas as pd
from sgp4.api import Satrec, jday

df = spark.read.format("delta").table("default.gps_ops_tle")
pdf = df.toPandas()

sat = Satrec.twoline2rv(
    pdf.loc[0, "line1"],
    pdf.loc[0, "line2"]
)

jd, fr = jday(2025, 8, 19, 0, 0, 0)
e, r, v = sat.sgp4(jd, fr)

print("Error code:", e)
print("Position (km):", r)
print("Velocity (km/s):", v)

In [0]:
#Parsing the gps data(TLE lines)
from pyspark.sql.functions import substring, trim

df = spark.read.format("delta").table("default.gps_ops_tle")

parsed_df = df.select(
    "name",
    "line1",
    "line2",
    # Example: NORAD Catalog Number (columns 3-7 in line1)
    trim(substring("line1", 3, 5)).alias("norad_cat_id"),
    # Example: Inclination (columns 9-16 in line2)
    trim(substring("line2", 9, 8)).alias("inclination_deg"),
    # Example: RA of Ascending Node (columns 18-25 in line2)
    trim(substring("line2", 18, 8)).alias("raan_deg"),
    # Example: Eccentricity (columns 27-33 in line2)
    trim(substring("line2", 27, 7)).alias("eccentricity"),
    # Example: Argument of Perigee (columns 35-42 in line2)
    trim(substring("line2", 35, 8)).alias("arg_perigee_deg"),
    # Example: Mean Anomaly (columns 44-51 in line2)
    trim(substring("line2", 44, 8)).alias("mean_anomaly_deg"),
    # Example: Mean Motion (columns 53-63 in line2)
    trim(substring("line2", 53, 11)).alias("mean_motion")
)

parsed_df.write.format("delta").mode("overwrite").saveAsTable("default.gps_ops_tle_parsed")

In [0]:
display(parsed_df)

In [0]:
df_gps = spark.read.table("workspace.default.gps_ops_tle_parsed").alias("gps")

In [0]:
#Joining the TLE data with the GPS data to filter out the GPS satellites
df_orbital = spark.read.table("workspace.default.parsed_orbital_elements").alias("orb")
df_gps = spark.read.table("workspace.default.gps_ops_tle_parsed").alias("gps")

df_navigation = df_orbital.join(
    df_gps,
    df_orbital["NORAD_CAT_ID"] == df_gps["norad_cat_id"],
    "left"
)
display(df_navigation)

In [0]:
from pyspark.sql.functions import when

df_orbital = spark.read.table("workspace.default.parsed_orbital_elements").alias("orb")
df_gps = spark.read.table("workspace.default.gps_ops_tle_parsed").alias("gps")

df_flagged = df_orbital.join(
    df_gps,
    df_orbital["NORAD_CAT_ID"] == df_gps["norad_cat_id"],
    "left"
).withColumn(
    "is_gps_satellite",
    when(df_gps["norad_cat_id"].isNotNull(), True).otherwise(False)
)

display(df_flagged)

In [0]:
%sql
SELECT 
  `column_name`,
  `data_type`,
  `comment`
FROM 
  `information_schema`.`columns`
WHERE 
  `table_catalog` = 'workspace' 
  AND `table_schema` = 'default' 
  AND `table_name` = 'parsed_orbital_elements_flagged';

In [0]:
# Drop the 'mean_motion' and 'eccentricity_gps' columns
df_flagged = df_flagged.drop("mean_motion", "eccentricity_gps","norad_cat_id")

# Save the DataFrame as a Delta table
df_flagged.write.format("delta").mode("overwrite").saveAsTable("default.parsed_orbital_elements_flagged")

In [0]:
%python

# Read the Delta table
df_gps = spark.read.table("workspace.default.gps_ops_tle_parsed")
# Writing the data into a CSV

# Save the parsed TLE DataFrame as a CSV for use in Pandas/sgp4
df_gps.write.option("header", True).mode("overwrite").csv(
    "/Volumes/workspace/default/spacedata/gps-ops"
)

In [0]:
display(df_gps  )

In [0]:
from pyspark.sql import SparkSession
import pandas as pd
import numpy as np
from sgp4.api import Satrec, jday

# Spark session
spark = SparkSession.builder.getOrCreate()

# Load from Delta
df = spark.read.table("workspace.default.parsed_orbital_elements_flagged")

# Convert to Pandas for propagation (SGP4 works easier with Python objects)
sat_df = df.toPandas()

# Split NAVSTAR vs others
navstar_df = sat_df[sat_df["is_gps_satellite"] == True].copy()
others_df  = sat_df[sat_df["is_gps_satellite"] == False].copy()

# Target epoch (Sept 10, 2025)
year, month, day, hour, minute, second = 2025, 9, 10, 0, 0, 0
jd, fr = jday(year, month, day, hour, minute, second)

def propagate(line1, line2):
    sat = Satrec.twoline2rv(line1, line2)
    e, r, v = sat.sgp4(jd, fr)
    if e == 0:
        return np.array(r)  # [X, Y, Z]
    return None

# Propagate NAVSTAR sats
navstar_positions = []
for _, row in navstar_df.iterrows():
    pos = propagate(row["TLE_LINE1"], row["TLE_LINE2"])
    if pos is not None:
        navstar_positions.append({
            "Satellite": row["OBJECT_NAME"],
            "Object_ID": row["OBJECT_ID"],
            "Object_Type": row["OBJECT_TYPE"],
            "ORBIT_CLASS": row["ORBIT_CLASS"],
            "X": pos[0],
            "Y": pos[1],
            "Z": pos[2]
        })
navstar_positions = pd.DataFrame(navstar_positions)

# Propagate other sats
other_positions = []
for _, row in others_df.iterrows():
    pos = propagate(row["TLE_LINE1"], row["TLE_LINE2"])
    if pos is not None:
        other_positions.append({
            "Satellite": row["OBJECT_NAME"],
            "Object_ID": row["OBJECT_ID"],
            "Object_Type": row["OBJECT_TYPE"],
            "ORBIT_CLASS": row["ORBIT_CLASS"],
            "X": pos[0],
            "Y": pos[1],
            "Z": pos[2]
        })
other_positions = pd.DataFrame(other_positions)

# Nearest neighbors calculation
nearest_results = []
for _, nav in navstar_positions.iterrows():
    nav_pos = np.array([nav["X"], nav["Y"], nav["Z"]])

    other_positions["Distance_km"] = np.linalg.norm(
        other_positions[["X","Y","Z"]].values - nav_pos, axis=1
    )

    nearest = other_positions.nsmallest(5, "Distance_km")
    for _, near in nearest.iterrows():
        nearest_results.append({
            "NAVSTAR": nav["Satellite"],
            "Nearest_Sat": near["Satellite"],
            "Object_ID": near["Object_ID"],
            "Distance_km": near["Distance_km"],
            "Object_Type": near["Object_Type"],
            "ORBIT_CLASS": near["ORBIT_CLASS"],
        })

nearest_df = pd.DataFrame(nearest_results)

# Save back to Delta for reuse
nearest_spark_df = spark.createDataFrame(nearest_df)
nearest_spark_df.write.format("delta").mode("overwrite").saveAsTable("default.navstar_nearest_neighbors")
display(nearest_spark_df)


In [0]:
%python
navstar_df = sat_df[sat_df["is_gps_satellite"] == True].copy()


In [0]:
display(navstar_df )