In [0]:
%python
import pandas as pd
from datetime import datetime, timedelta

# Define the file path
file_path = '/Volumes/workspace/default/spacedata/ELSET_data_08092025.csv'

# Read the CSV file into a DataFrame
df = pd.read_csv(file_path)

# Display the first few rows of the DataFrame
display(df.head())

# Convert to Spark DataFrame
df_spark = spark.createDataFrame(df)

# Adjust the table name to include yesterday's date
yesterday = datetime.utcnow() - timedelta(days=1)
table_name = f"gp_latest_{yesterday.strftime('%Y%m%d_%H%M%S')}s"

# Save as Delta table
df_spark.write.format("delta").mode("overwrite").saveAsTable(table_name)

In [0]:
%python
import requests
import pandas as pd
from io import StringIO
from datetime import datetime

USERNAME = "dristy.changkakoti@gmail.com"
PASSWORD = "SouthJordan1!*5"

# Space-Track query: latest 30 days of GP data in CSV format
URL = (
    "https://www.space-track.org/basicspacedata/query/class/gp"
    "/EPOCH/%3Enow-30"
    "/orderby/NORAD_CAT_ID,EPOCH"
    "/format/csv"
)

def fetch_latest_tle():
    session = requests.Session()

    # Login
    login_url = "https://www.space-track.org/ajaxauth/login"
    resp = session.post(login_url, data={"identity": USERNAME, "password": PASSWORD})
    resp.raise_for_status()

    # Download CSV
    resp = session.get(URL)
    resp.raise_for_status()

    # Convert to DataFrame
    df = pd.read_csv(StringIO(resp.text))
    print(f"Downloaded {len(df)} records")

    # Save as Delta table
    # delta_path = "/Volumes/workspace/default/spacedata/gp_latest_delta"
    df_spark = spark.createDataFrame(df)
    table_name = f"gp_latest_{datetime.utcnow().strftime('%Y%m%d_%H%M%S')}s"
    df_spark.write.format("delta").mode("overwrite").saveAsTable(table_name)

    return df

if __name__ == "__main__":
    fetch_latest_tle()

In [0]:
# After creating the Pandas DataFrame in cell 2, save it as a CSV file
df = fetch_latest_tle()
df.to_csv("gp_latest.csv", index=False)

In [0]:
%python
df_csv = spark.read.option("header", "true").csv(
    "/Volumes/workspace/default/spacedata/cdms_export.csv"
)

df_csv.write.format("delta").mode("overwrite").saveAsTable("default.cdms_export_delta")

In [0]:
%python
df_csv = spark.read.option("header", "true").csv(
    "/Volumes/workspace/default/spacedata/cdm_filtered_snapshot.csv"
)

df_csv.write.format("delta").mode("overwrite").saveAsTable("default.cdms_filtered_delta")

In [0]:
import requests, pandas as pd, time
def get_cdms_with_retry(session, url, max_retries=3, delay=2):
    for attempt in range(max_retries):
        resp = session.get(url)
        if resp.status_code == 500:
            print(f"[{attempt+1}/{max_retries}] Error 500. Retrying in {delay}s...")
            time.sleep(delay)
            delay *= 2
            continue
        resp.raise_for_status()
        data = resp.json()
        if isinstance(data, dict):
            data = [data]
        df = pd.DataFrame(data)
        if "TCA" in df:
            df["TCA"] = pd.to_datetime(df["TCA"], errors="coerce")
        if "MISS_DISTANCE" in df:
            df["MISS_DISTANCE"] = pd.to_numeric(df["MISS_DISTANCE"], errors="coerce")
        return df.sort_values("TCA").reset_index(drop=True)
    raise Exception("Failed to fetch CDMs after multiple retries.")
# Usage
session = requests.Session()
USERNAME = "dristy.changkakoti@gmail.com"
PASSWORD = "SouthJordan1!*5"
session.post("https://www.space-track.org/ajaxauth/login", data={'identity': USERNAME, 'password': PASSWORD})
cdm_url = "https://www.space-track.org/basicspacedata/query/class/cdm_public/orderby/TCA asc/format/json"
try:
    df_cdms = get_cdms_with_retry(session, cdm_url)
    print("Retrieved", len(df_cdms), "CDMs")
except Exception:
    print("Error persists. Trying fallback: limit 5...")
    cdm_url_small = cdm_url.replace("format/json", "limit/5/format/json")
    df_cdms = get_cdms_with_retry(session, cdm_url_small)
df_cdms.head()

In [0]:
%python
# Save the df_cdms DataFrame to a CSV file
df_cdms.to_csv("cdms_export.csv", index=False)

In [0]:
df_orbital = spark.read.table("workspace.default.parsed_orbital_elements")
df_orbital.write \
    .option("header", True) \
    .mode("overwrite") \
    .csv("/Volumes/workspace/default/spacedata/parsed_orbital_elements_export.csv")

In [0]:
df_orbital = spark.read.table("workspace.default.parsed_orbital_elements")
df_cdms = spark.read.table("default.cdms_filtered_delta")

df_joined = df_orbital.join(
    df_cdms,
    df_orbital["NORAD_CAT_ID"] == df_cdms["SAT_1_ID"],
    "left"
)

display(df_joined)