**Import libraries**

In [0]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import when, col, lit, to_date, log10, round
from datetime import datetime, timedelta

**Logging**

In [0]:
spark = SparkSession.builder.appName("Neo Analysis").getOrCreate()

# Logging 
def log_event(level, message):
    log_df = spark.createDataFrame([(datetime.now(), level, message)], ["timestamp", "level", "message"])
    log_df.write.format("delta").mode("append").saveAsTable("neo_logs")


**Gold layer function: build, add required objects/aggreagation for Analytics)**

In [0]:
def create_gold():
    df_silver = spark.table("neo_approaches")
    df_gold = df_silver.select(
        "neo_id",
        "name",
        "close_approach_date",
        round(col("estimated_diameter_min"), 2).alias("diameter_min_m"),
        round(col("estimated_diameter_max"), 2).alias("diameter_max_m"),
        round(col("absolute_magnitude_h"), 2).alias("absolute_magnitude_h"),  
        round(col("relative_velocity_km_h"), 2).alias("velocity_km_h"),
        round(col("relative_velocity_km_s"), 2).alias("velocity_km_s"),
        "miss_distance_km",
        round(col("miss_distance_km"), 2).alias("miss_distance_abs_km"),            
        "size_avg",
        "size_category",
        "size_category_label",
        "in_50yr_window",
        round(col("orbital_period_days"), 2).alias("orbital_period_days"),         
        round(col("orbital_period_years"), 2).alias("orbital_period_years"),          
        "is_potentially_hazardous",
        "hazard_level",
        "hazard_category",
        "distance_category",
        "log_diameter_max",
        "log_miss_distance"
        ).filter(
            col("estimated_diameter_min").isNotNull() & 
            col("estimated_diameter_max").isNotNull() &
            col("orbital_period_years").isNotNull() &
            col("miss_distance_km").isNotNull()
        ).filter(col("in_50yr_window") == True) 

    df_gold.write.format("delta").mode("overwrite").option("mergeSchema", "true").saveAsTable("neo_analysis")


**Call pipeline process function to build and load 'neo_analysis' table**

In [0]:
try:
    log_event("INFO", "Neo load started")

    create_gold()

    log_event("SUCCESS", "Neo load completed")   
except Exception as e:
    log_event("ERROR", f"Neo load failed: {str(e)}")
    raise e