In [0]:
%pip install skyfield

In [0]:
from skyfield.api import EarthSatellite, load
import pandas as pd
from datetime import datetime

# Load Skyfield timescale
ts = load.timescale()

def get_eci_from_tle(tle1, tle2, when):
    """
    Compute Earth-Centered Inertial (ECI) coordinates from TLE.
    'when' can be datetime or string.
    Returns (x, y, z) in kilometers.
    """
    sat = EarthSatellite(tle1, tle2)
    
    # Ensure datetime object
    if isinstance(when, str):
        # Strip milliseconds if present
        when = when.split(".")[0]
        dt = datetime.strptime(when, "%Y-%m-%d %H:%M:%S")
    else:
        dt = when

    t = ts.utc(dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)
    geocentric = sat.at(t)
    pos = geocentric.position.km
    return pos[0], pos[1], pos[2]

# Read data
df = spark.read.table("workspace.default.cdms_data_profile_v2").toPandas()

eci_results = []
for _, row in df.iterrows():
    tle1_1 = row["TLE_LINE1"]
    tle2_1 = row["TLE_LINE2"]
    tle1_2 = row["tle1_2"]
    tle2_2 = row["tle2_2"]
    tca = row["TCA"]

    eci_1 = get_eci_from_tle(tle1_1, tle2_1, when=tca)
    eci_2 = get_eci_from_tle(tle1_2, tle2_2, when=tca)

    eci_results.append({
        "sat_1_id": row["SAT_1_ID"],
        "eci_1_x": eci_1[0],
        "eci_1_y": eci_1[1],
        "eci_1_z": eci_1[2],
        "sat_2_id": row["SAT_2_ID"],
        "eci_2_x": eci_2[0],
        "eci_2_y": eci_2[1],
        "eci_2_z": eci_2[2]
    })

eci_df = pd.DataFrame(eci_results)
display(eci_df)


In [0]:
eci_df = eci_df.drop_duplicates()

In [0]:
# Convert Pandas DataFrame to Spark DataFrame
spark_eci_df = spark.createDataFrame(eci_df)

# Save Spark DataFrame to Delta table
spark_eci_df.write.format("delta").mode("overwrite").saveAsTable("workspace.default.eci_coordinates")

In [0]:
display(df)

In [0]:
%python
from skyfield.api import EarthSatellite, load
import pandas as pd
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Load Skyfield timescale
ts = load.timescale()

def get_trajectory_from_tle(tle1, tle2, when, window_minutes=45, step_seconds=60):
    """
    Generate ECI trajectory around a given epoch.
    Returns lists of x, y, z (km).
    """
    sat = EarthSatellite(tle1, tle2)
    
    # Parse datetime
    if isinstance(when, str):
        when = when.split(".")[0]  # remove milliseconds
        dt = datetime.strptime(when, "%Y-%m-%d %H:%M:%S")
    else:
        dt = when

    x, y, z = [], [], []
    for delta in range(-window_minutes*60, window_minutes*60+1, step_seconds):
        t_step = dt + timedelta(seconds=delta)
        t = ts.utc(t_step.year, t_step.month, t_step.day, 
                   t_step.hour, t_step.minute, t_step.second)
        pos = sat.at(t).position.km
        x.append(pos[0])
        y.append(pos[1])
        z.append(pos[2])
    return x, y, z

# Read the table
df = spark.read.table("workspace.default.cdms_data_profile_v2").toPandas()

# --- Plot in 3D ---
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection="3d")

# Plot trajectories for all pairs
for _, row in df.iterrows():
    tle1_1 = row["TLE_LINE1"]
    tle2_1 = row["TLE_LINE2"]
    tle1_2 = row["tle1_2"]
    tle2_2 = row["tle2_2"]
    tca = row["TCA"]

    # Get trajectories
    x1, y1, z1 = get_trajectory_from_tle(tle1_1, tle2_1, tca)
    x2, y2, z2 = get_trajectory_from_tle(tle1_2, tle2_2, tca)

    ax.plot(x1, y1, z1, label=f"Sat 1 ({row['SAT_1_ID']})", color="blue")
    ax.plot(x2, y2, z2, label=f"Sat 2 ({row['SAT_2_ID']})", color="red")

    # Mark TCA positions
    ax.scatter(x1[len(x1)//2], y1[len(y1)//2], z1[len(z1)//2], color="blue", s=50, marker="o")
    ax.scatter(x2[len(x2)//2], y2[len(y2)//2], z2[len(z2)//2], color="red", s=50, marker="x")

# Labels
ax.set_xlabel("X (km)")
ax.set_ylabel("Y (km)")
ax.set_zlabel("Z (km)")
ax.set_title("3D Trajectories around TCA")
ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=2)
plt.show()

In [0]:
%python
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
df = spark.read.table("workspace.default.eci_coordinates").toPandas()

# Compute relative distance
df["miss_distance"] = np.sqrt(
    (df["eci_1_x"] - df["eci_2_x"])**2 +
    (df["eci_1_y"] - df["eci_2_y"])**2 +
    (df["eci_1_z"] - df["eci_2_z"])**2
)

# Parameters (approximation)
sigma = 100  # [m] uncertainty (assumed)
radius = 5   # [m] combined hard-body radius
A = np.pi * radius**2  # effective collision area

# Collision probability (simplified)
df["Pc"] = (A / (2 * np.pi * sigma**2)) * np.exp(- (df["miss_distance"]*1000)**2 / (2*sigma**2))

# Plot distribution
plt.figure(figsize=(8,6))
plt.hist(df["Pc"], bins=50, color="skyblue", edgecolor="black", alpha=0.7)
plt.xlabel("Collision Probability (Pc)")
plt.ylabel("Frequency")
plt.title("Collision Probability Distribution from ECI Dataset")
plt.yscale("log")
plt.grid(True, linestyle="--", alpha=0.6)
plt.show()


In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load data
df = spark.read.table("workspace.default.eci_coordinates").toPandas()

# Compute relative distance [km]
df["miss_distance_km"] = np.sqrt(
    (df["eci_1_x"] - df["eci_2_x"])**2 +
    (df["eci_1_y"] - df["eci_2_y"])**2 +
    (df["eci_1_z"] - df["eci_2_z"])**2
)

# Parameters (approximated)
sigma = 100  # [m] position uncertainty
radius = 5   # [m] combined hard-body radius
A = np.pi * radius**2  # [m²] effective collision cross-section

# Collision probability (simplified Gaussian)
df["Pc"] = (A / (2 * np.pi * sigma**2)) * np.exp(- (df["miss_distance_km"]*1000)**2 / (2*sigma**2))

# Create a label for sat pairs
df["sat_pair"] = df["sat_1_id"] + " vs " + df["sat_2_id"]

# Scatter plot Pc vs Miss Distance with sat pairs
plt.figure(figsize=(10,7))
plt.scatter(df["miss_distance_km"], df["Pc"], alpha=0.7, c="crimson", edgecolors="black")
plt.xlabel("Miss Distance [km]")
plt.ylabel("Collision Probability (Pc)")
plt.title("Collision Probability vs Miss Distance (by Satellite Pair)")
plt.yscale("log")
plt.grid(True, linestyle="--", alpha=0.6)

# Annotate top N riskiest pairs
top_risk = df.sort_values("Pc", ascending=False).head(10)
for _, row in top_risk.iterrows():
    plt.annotate(row["sat_pair"], 
                 (row["miss_distance_km"], row["Pc"]), 
                 textcoords="offset points", xytext=(5,5), fontsize=8,
                 bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))

plt.show()

# Show table of riskiest pairs
risk_table = df.sort_values("Pc", ascending=False)[["sat_pair", "miss_distance_km", "Pc"]].head(10)
display(risk_table)