In [0]:
%sql
create or replace view aviation_project.bronze.airports_nulls as
select IATA_CODE from aviation_project.bronze.airports
where latitude is null or longitude is null or airport is null or city is null or state is null or country is null

In [0]:
df = spark.table('aviation_project.bronze.airports_nulls')

In [0]:
iata_codes = [row['IATA_CODE'] for row in df.collect()]

In [0]:
print(iata_codes)

In [0]:
import requests
import time

def fetch_airport_data(iata_codes, sleep_seconds=0.5):
    """
    Fetch airport details for a list of IATA codes from airport-data.com API.

    :param iata_codes: list of IATA codes (e.g. ["JFK", "LAX"])
    :param sleep_seconds: delay between API calls to avoid rate limits
    :return: list of airport records (dict)
    """
    base_url = "https://airport-data.com/api/ap_info.json"
    #https://airport-data.com/api/ap_info.json?iata=DEF
    results = []

    for iata in iata_codes:
        try:
            response = requests.get(base_url, params={"iata": iata}, timeout=10)

            if response.status_code == 200:
                data = response.json()

                # API returns empty dict if airport not found
                if data:
                    data["iata_code"] = iata  # keep original code
                    results.append(data)
                else:
                    print(f"No data found for IATA: {iata}")
            else:
                print(f"Failed for {iata}, status: {response.status_code}")

        except Exception as e:
            print(f"Error fetching {iata}: {e}")

        time.sleep(sleep_seconds)  # rate-limit protection

    return results

    print(result)


In [0]:

airport_data = fetch_airport_data(iata_codes)

In [0]:
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

api_schema = StructType([
    StructField("icao", StringType(), True),
    StructField("iata", StringType(), True),
    StructField("iata_code", StringType(), True),
    StructField("name", StringType(), True),
    StructField("location", StringType(), True),
    StructField("country", StringType(), True),
    StructField("country_code", StringType(), True),
    StructField("longitude", StringType(), True),
    StructField("latitude", StringType(), True),
    StructField("link", StringType(), True),
    StructField("status", IntegerType(), True)
])




In [0]:
api_df = spark.createDataFrame(airport_data, schema=api_schema)
api_df.show()

In [0]:
from pyspark.sql.functions import col, split, trim

final_df = (
    api_df
    .withColumn("CITY", trim(split(col("location"), ",")[0]))
    .withColumn("STATE", trim(split(col("location"), ",")[1]))
    .select(
        col("iata_code").alias("IATA_CODE"),
        col("name").alias("AIRPORT"),
        col("CITY"),
        col("STATE"),
        col("country").alias("COUNTRY"),
        col("latitude").cast("double").alias("LATITUDE"),
        col("longitude").cast("double").alias("LONGITUDE")
    )
)
final_df.show()

In [0]:
final_df.write.mode("overwrite").format("delta").saveAsTable("aviation_project.silver.airports_api")

In [0]:
%sql
select * from aviation_project.silver.airports_api

In [0]:
%sql
select a.IATA_CODE, a.AIRPORT, a.CITY, a.STATE, a.COUNTRY, a.LATITUDE, a.LONGITUDE

from a  join aviation_project.silver.airports b on a.IATA_CODE = b.IATA_CODE
