**Import libraries**

In [0]:
import pandas as pd
import requests
from pyspark.sql import SparkSession
from pyspark.sql.types import StructType, StructField, StringType, DoubleType
from datetime import datetime

**Logging**

In [0]:
spark = SparkSession.builder.appName("Neo Ingestion").getOrCreate()

def log_event(level, message):
    log_df = spark.createDataFrame([(datetime.now(), level, message)], ["timestamp", "level", "message"])
    log_df.write.format("delta").mode("append").saveAsTable("neo_logs")

**Bronze layer - Ingestion: fetch data from Nasa NEO feed API**

In [0]:
def extract_neo_data():
    url = "https://api.nasa.gov/neo/rest/v1/feed"
    orbit_url = "https://api.nasa.gov/neo/rest/v1/neo/{}"

    params = {
        "start_date": "2015-09-07",
        "end_date": "2015-09-08",
        "api_key": "DEMO_KEY"
    }

    response = requests.get(url, params=params)
    data = response.json()
    
    # Bronze layer 
    # 1. fetch NEO data 
    neos = []
    approaches = []

    for date, objects in data['near_earth_objects'].items():
        for obj in objects:
            neos.append({
                "id": obj["id"],
                "name": obj["name"],
                "absolute_magnitude_h": obj["absolute_magnitude_h"],
                "estimated_diameter_min": obj["estimated_diameter"]["meters"]["estimated_diameter_min"],
                "estimated_diameter_max": obj["estimated_diameter"]["meters"]["estimated_diameter_max"],
                "is_potentially_hazardous": obj["is_potentially_hazardous_asteroid"]
            })

            for approach in obj["close_approach_data"]:
                approaches.append({
                    "neo_id": obj["id"],
                    "close_approach_date": approach["close_approach_date"],
                    "epoch_date_close_approach": approach["epoch_date_close_approach"],
                    "relative_velocity_km_h": float(approach["relative_velocity"]["kilometers_per_hour"]),
                    "relative_velocity_km_s": float(approach["relative_velocity"]["kilometers_per_second"]),
                    "miss_distance_km": float(approach["miss_distance"]["kilometers"]),
                    "orbiting_body": approach["orbiting_body"]
                })

    # 2. Fetch orbital details for each NEO
    records = []
    for neo in neos:
        neo_id = neo["id"]
        detail_response = requests.get(orbit_url.format(neo_id), params={"api_key": "DEMO_KEY"}).json()
        orbital = detail_response.get("orbital_data", {})
        records.append({
            "orbit_id": neo_id,
            "orbit_name": neo["name"],
            "orbital_period_days": float(orbital.get("orbital_period", 0)),
            "orbital_period_years": float(orbital.get("orbital_period", 0)) / 365.25,
            "semi_major_axis": float(orbital.get("semi_major_axis", 0)),
            "eccentricity": float(orbital.get("eccentricity", 0)),
            "inclination": float(orbital.get("inclination", 0))
        })

        # Convert to Spark DataFrame
        schema = StructType([
            StructField("orbit_id", StringType(), True),
            StructField("orbit_name", StringType(), True),
            StructField("orbital_period_days", DoubleType(), True),
            StructField("orbital_period_years", DoubleType(), True),
            StructField("semi_major_axis", DoubleType(), True),
            StructField("eccentricity", DoubleType(), True),
            StructField("inclination", DoubleType(), True)
        ])

        df_orbit = spark.createDataFrame(pd.DataFrame(records), schema=schema)

    # Convert to spark dataframe
    df_neos = spark.createDataFrame(pd.DataFrame(neos))
    df_approaches = spark.createDataFrame(pd.DataFrame(approaches))

    return df_neos, df_approaches, df_orbit


**Bronze layer : load neos, approaches and neo_orbits tables**

In [0]:
def write_bronze(df_neos, df_approaches, df_orbit):
    df_neos.write.format("delta").mode("overwrite").saveAsTable("neos")
    df_approaches.write.format("delta").mode("overwrite").saveAsTable("approaches")
    df_orbit.write.format("delta").mode("overwrite").saveAsTable("neo_orbits")

**Call function to build tables**

In [0]:
try:
    log_event("INFO", "Neo ingestion started")
    neos_df, approaches_df, orbits_df = extract_neo_data();
    write_bronze(neos_df, approaches_df, orbits_df)
    log_event("SUCCESS", "Neo ingestion completed")   
except Exception as e:
    log_event("ERROR", f"Neo ingestion failed: {str(e)}")
    raise e
