In [0]:
from pyspark.sql import SparkSession
import pandas as pd
import numpy as np
from sgp4.api import Satrec, jday

# Spark session
spark = SparkSession.builder.getOrCreate()

# Load from Delta
df = spark.read.table("workspace.default.parsed_orbital_elements_flagged")

# Convert to Pandas for propagation (SGP4 works easier with Python objects)
sat_df = df.toPandas()

# Split NAVSTAR vs others
navstar_df = sat_df[sat_df["is_gps_satellite"] == True].copy()
others_df  = sat_df[sat_df["is_gps_satellite"] == False].copy()

# Target epoch (Sept 10, 2025)
year, month, day, hour, minute, second = 2025, 9, 10, 0, 0, 0
jd, fr = jday(year, month, day, hour, minute, second)

def propagate(line1, line2):
    sat = Satrec.twoline2rv(line1, line2)
    e, r, v = sat.sgp4(jd, fr)
    if e == 0:
        return np.array(r)  # [X, Y, Z]
    return None

# Propagate NAVSTAR sats
navstar_positions = []
for _, row in navstar_df.iterrows():
    pos = propagate(row["TLE_LINE1"], row["TLE_LINE2"])
    if pos is not None:
        navstar_positions.append({
            "Satellite": row["OBJECT_NAME"],
            "Object_ID": row["OBJECT_ID"],
            "Object_Type": row["OBJECT_TYPE"],
            "ORBIT_CLASS": row["ORBIT_CLASS"],
            "X": pos[0],
            "Y": pos[1],
            "Z": pos[2]
        })
navstar_positions = pd.DataFrame(navstar_positions)

# Propagate other sats
other_positions = []
for _, row in others_df.iterrows():
    pos = propagate(row["TLE_LINE1"], row["TLE_LINE2"])
    if pos is not None:
        other_positions.append({
            "Satellite": row["OBJECT_NAME"],
            "Object_ID": row["OBJECT_ID"],
            "Object_Type": row["OBJECT_TYPE"],
            "ORBIT_CLASS": row["ORBIT_CLASS"],
            "X": pos[0],
            "Y": pos[1],
            "Z": pos[2]
        })
other_positions = pd.DataFrame(other_positions)

# Nearest neighbors calculation
nearest_results = []
for _, nav in navstar_positions.iterrows():
    nav_pos = np.array([nav["X"], nav["Y"], nav["Z"]])

    other_positions["Distance_km"] = np.linalg.norm(
        other_positions[["X","Y","Z"]].values - nav_pos, axis=1
    )

    nearest = other_positions.nsmallest(5, "Distance_km")
    for _, near in nearest.iterrows():
        nearest_results.append({
            "NAVSTAR": nav["Satellite"],
            "Nearest_Sat": near["Satellite"],
            "Object_ID": near["Object_ID"],
            "Distance_km": near["Distance_km"],
            "Object_Type": near["Object_Type"],
            "ORBIT_CLASS": near["ORBIT_CLASS"],
        })

nearest_df = pd.DataFrame(nearest_results)

# Save back to Delta for reuse
nearest_spark_df = spark.createDataFrame(nearest_df)
# nearest_spark_df.write.format("delta").mode("overwrite").saveAsTable("default.navstar_nearest_neighbors")
display(nearest_spark_df)


In [0]:
%python
%pip install skyfield

In [0]:
from skyfield.api import EarthSatellite, load
from datetime import datetime

ts = load.timescale()

def get_future_position(line1, line2, target_date_str):
    """Return (x,y,z) in km for a given satellite at target_date."""
    sat = EarthSatellite(line1, line2, "sat", ts)
    target_date = datetime.fromisoformat(target_date_str)  # e.g. '2025-09-10T00:00:00'
    t = ts.utc(target_date)
    pos = sat.at(t).position.km  # returns (x, y, z)
    return pos[0], pos[1], pos[2]


In [0]:
%python
from pyspark.sql import functions as F

# Load the parsed orbital elements table
df = spark.read.table("workspace.default.parsed_orbital_elements_flagged")

# Filter NAVSTAR satellites
navstar_df = df.filter(F.col("is_gps_satellite") == True)

# Collect OBJECT_ID, TLE_LINE1, and TLE_LINE2 as a Python list of dictionaries
navstar_list = [
    {
        "OBJECT_ID": row.OBJECT_ID,
        "TLE_LINE1": row.TLE_LINE1,
        "TLE_LINE2": row.TLE_LINE2
    }
    for row in navstar_df.select("OBJECT_ID", "TLE_LINE1", "TLE_LINE2").collect()
]

print("NAVSTAR satellites:", navstar_list[:10])  # check first 10

In [0]:
%python
from pyspark.sql import functions as F

# Filter Other satellites
others_df = df.filter(F.col("is_gps_satellite") == False)

# Collect OBJECT_ID, TLE_LINE1, and TLE_LINE2 as a Python list of dictionaries
others_list = [
    {
        "OBJECT_ID": row.OBJECT_ID,
        "TLE_LINE1": row.TLE_LINE1,
        "TLE_LINE2": row.TLE_LINE2
    }
    for row in others_df.select("OBJECT_ID", "TLE_LINE1", "TLE_LINE2").collect()
]

In [0]:
from skyfield.api import EarthSatellite, load
from datetime import datetime

ts = load.timescale()

def get_future_position(line1, line2, target_date_str):
    """Return (x,y,z) in km for a given satellite at target_date."""
    sat = EarthSatellite(line1, line2, "sat", ts)
    target_date = datetime.fromisoformat(target_date_str)  # e.g. '2025-09-10T00:00:00'
    t = ts.utc(target_date)
    pos = sat.at(t).position.km  # returns (x, y, z)
    return pos[0], pos[1], pos[2]


In [0]:
%python
import numpy as np
from datetime import datetime
from pytz import utc

def distance_km(p1, p2):
    return np.linalg.norm(np.array(p1) - np.array(p2))



target_date = "2025-09-07T00:00:00"

# Convert to timezone-aware datetime (UTC)
target_date_dt = datetime.fromisoformat(target_date).replace(tzinfo=utc)

# Convert back to ISO string with timezone
target_date_str = target_date_dt.isoformat()

# target_date_str = target_date_dt.strftime("%Y-%m-%dT%H:%M:%S")  # Keep the target_date as a string

conjunctions = []
for nav in navstar_list:
    nav_pos = get_future_position(nav['TLE_LINE1'], nav['TLE_LINE2'],target_date_str)
    for other in others_list:
        other_pos = get_future_position(other['TLE_LINE1'], other['TLE_LINE2'],target_date_str)
        d = distance_km(nav_pos, other_pos)
        if d < 50:  # threshold km
            conjunctions.append((nav.OBJECT_ID, other.OBJECT_ID, d))

In [0]:
# Precompute positions at target date
positions = {}

for sat in navstar_list + others_list:
    positions[sat["OBJECT_ID"]] = get_future_position(sat["TLE_LINE1"], sat["TLE_LINE2"], target_date)

In [0]:
import numpy as np

# Extract nav positions
nav_ids = [s["OBJECT_ID"] for s in navstar_list]
nav_positions = np.array([positions[i] for i in nav_ids])

# Extract other positions
other_ids = [s["OBJECT_ID"] for s in others_list]
other_positions = np.array([positions[i] for i in other_ids])

# Compute pairwise distances using broadcasting
diff = nav_positions[:, None, :] - other_positions[None, :, :]
distances = np.linalg.norm(diff, axis=2)  # shape = (len(nav), len(others))

# Find indices where distance < threshold
threshold = 50  # km
close_pairs = np.argwhere(distances < threshold)

# Build results
conjunctions = [
    (nav_ids[i], other_ids[j], float(distances[i, j]))
    for i, j in close_pairs
]

In [0]:
# Print all conjunctions
# print("Conjunctions (NAVSTAR vs Others within threshold):")
# print("NAVSTAR_ID      OTHER_ID        DISTANCE_KM")
# print("------------------------------------------------")

for nav_id, other_id, d in conjunctions:
    print(f"{nav_id:<15} {other_id:<15} {d:10.3f}")


In [0]:
print("NAVSTAR count:", len(navstar_list))
print("Others count:", len(others_list))


In [0]:
test_nav = navstar_list[0]
print(get_future_position(test_nav["TLE_LINE1"], test_nav["TLE_LINE2"], target_date))

In [0]:
conjunctions = []

for nav in navstar_list[:2]:
    nav_pos = get_future_position(nav["TLE_LINE1"], nav["TLE_LINE2"], target_date)
    for other in others_list[:5]:
        other_pos = get_future_position(other["TLE_LINE1"], other["TLE_LINE2"], target_date)
        d = distance_km(nav_pos, other_pos)
        if d < 500:  # instead of 50 km
            conjunctions.append((nav["OBJECT_ID"], other["OBJECT_ID"], d))
        print(nav["OBJECT_ID"], other["OBJECT_ID"], d)

In [0]:
nearest = []

for nav in navstar_list:
    nav_pos = get_future_position(nav["TLE_LINE1"], nav["TLE_LINE2"], target_date)
    min_d = float("inf")
    closest_obj = None
    for other in others_list:
        other_pos = get_future_position(other["TLE_LINE1"], other["TLE_LINE2"], target_date)
        d = distance_km(nav_pos, other_pos)
        if d < min_d:
            min_d = d
            closest_obj = other["OBJECT_ID"]
    nearest.append((nav["OBJECT_ID"], closest_obj, min_d))



In [0]:
# Print top 10 closest objects
for obj in nearest[:10]:
    print(f"NAVSTAR {obj[0]} nearest is {obj[1]} at distance {obj[2]:,.1f} km")

In [0]:
from pyspark.sql import SparkSession

# Load Delta table
spark = SparkSession.builder.getOrCreate()
df = spark.read.table("workspace.default.parsed_orbital_elements_flagged")

# Collect into Python objects
all_objects = df.collect()


In [0]:
target_id = "1997-035A"   # NAVSTAR object ID
target_date = "2025-09-10T00:00:00"
threshold_km = 5000  # let's say "around" means within 500 km (you can shrink later)

# Find NAVSTAR position
navstar = next(obj for obj in all_objects if obj.OBJECT_ID == target_id)
nav_pos = get_future_position(navstar.TLE_LINE1, navstar.TLE_LINE2, target_date)

# Compare with all others
neighbors = []
for other in all_objects:
    if other.OBJECT_ID == target_id:
        continue
    other_pos = get_future_position(other.TLE_LINE1, other.TLE_LINE2, target_date)
    d = distance_km(nav_pos, other_pos)
    if d < threshold_km:
        neighbors.append((other.OBJECT_ID, d))

# Sort by distance
neighbors = sorted(neighbors, key=lambda x: x[1])

# Print results
if neighbors:
    print(f"Objects near {target_id} (within {threshold_km} km):")
    for obj, d in neighbors:
        print(f"  {obj} at {d:.2f} km")
else:
    print(f"No objects found near {target_id} within {threshold_km} km.")


In [0]:
%python
import numpy as np

def distance_km(p1, p2):
    return np.linalg.norm(np.array(p1) - np.array(p2))

target_date = "2025-09-07T00:00:00"

closest_objects = {}

for nav in navstar_list:
    nav_pos = get_future_position(nav["TLE_LINE1"], nav["TLE_LINE2"], target_date)
    
    min_dist = float("inf")
    min_obj = None
    
    for other in others_list:
        other_pos = get_future_position(other["TLE_LINE1"], other["TLE_LINE2"], target_date)
        d = distance_km(nav_pos, other_pos)
        
        if d < min_dist:
            min_dist = d
            min_obj = other["OBJECT_ID"]
    
    closest_objects[nav["OBJECT_ID"]] = (min_obj, min_dist)

# Print results
for nav_id, (obj_id, dist) in closest_objects.items():
    print(f"{nav_id} → closest object: {obj_id} at {dist:.2f} km")

In [0]:
%python
import pandas as pd

# Convert the results to a DataFrame
results_df = pd.DataFrame([
    {"NAVSTAR_ID": nav_id, "Closest_Object_ID": obj_id, "Distance_km": dist}
    for nav_id, (obj_id, dist) in closest_objects.items()
])

# Display the DataFrame as a table
display(results_df)

In [0]:
# Install the openpyxl library
%pip install openpyxl

In [0]:
%python
import pandas as pd

# Read the Excel file
results_df = pd.read_excel("/Volumes/workspace/default/spacedata/NAVSTAR_data.xlsx")

# Display the DataFrame as a table
display(results_df)

In [0]:
%python
%pip install networkx
import networkx as nx
import matplotlib.pyplot as plt

# Build graph
G = nx.Graph()

for _, row in results_df.iterrows():
    G.add_edge(row["NAVSTAR_ID"], row["Closest_Object_ID"], weight=row["Distance_km"])

# Draw network with two colors
pos = nx.spring_layout(G, seed=42)  # layout for clarity
plt.figure(figsize=(14,8))

# Define node colors
node_colors = ["skyblue" if node in results_df["NAVSTAR_ID"].values else "lightcoral" for node in G.nodes()]

nx.draw(G, pos, with_labels=True, node_size=800, font_size=7, node_color=node_colors, edge_color="gray")
labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels={k:f"{v:.0f} km" for k,v in labels.items()}, font_size=6)

plt.title("NAVSTAR Satellites and Their Closest Objects")
plt.show()

In [0]:
%python
import matplotlib.pyplot as plt

plt.figure(figsize=(16,6))
bars = plt.bar(results_df["NAVSTAR_ID"], results_df["Distance_km"], color="skyblue", edgecolor="black")

# Annotate each bar with the closest object ID
for bar, obj_id, d in zip(bars, results_df["Closest_Object_ID"], results_df["Distance_km"]):
    plt.text(bar.get_x() + bar.get_width()/2, d + 50, obj_id, 
             ha="center", va="bottom", fontsize=7, rotation=90)

plt.xticks(rotation=75, ha="right")
plt.ylabel("Closest Object Distance (km)")
plt.title("Closest Object Distances & IDs for NAVSTAR Satellites")
plt.tight_layout()
plt.show()

In [0]:
#Missed distance calculation
import numpy as np
from datetime import datetime, timedelta

def distance_km(p1, p2):
    return np.linalg.norm(np.array(p1) - np.array(p2))

def compute_miss_distance(tle1_a, tle2_a, tle1_b, tle2_b, center_date, hours=24, step_minutes=10):
    """
    Compute miss distance between two satellites given TLEs.
    
    tle1_a, tle2_a : TLE lines for sat A
    tle1_b, tle2_b : TLE lines for sat B
    center_date    : center of propagation window (string "YYYY-MM-DDTHH:MM:SS")
    hours          : half-width of window to propagate around (default ±24h)
    step_minutes   : propagation step size (default 10 min)
    """
    center = datetime.fromisoformat(center_date)
    start = center - timedelta(hours=hours)
    end   = center + timedelta(hours=hours)

    min_dist = float("inf")
    min_time = None
    
    t = start
    while t <= end:
        t_str = t.strftime("%Y-%m-%dT%H:%M:%S")
        pos_a = get_future_position(tle1_a, tle2_a, t_str)
        pos_b = get_future_position(tle1_b, tle2_b, t_str)
        
        d = distance_km(pos_a, pos_b)
        if d < min_dist:
            min_dist = d
            min_time = t
        t += timedelta(minutes=step_minutes)

    return min_dist, min_time


In [0]:
miss_d, miss_t = compute_miss_distance(nav.TLE_LINE1, nav.TLE_LINE2,
                                       other.TLE_LINE1, other.TLE_LINE2,
                                       "2025-09-07T00:00:00")
print(f"Miss distance: {miss_d:.3f} km at {miss_t}")
