# Querying the Airport Arrival Demand Chart fly.faa.gov page
There is some interesting data available via [this](https://www.fly.faa.gov/aadc/) endpoint about the status of flights, broken down by airport. See [here](https://www.faa.gov/air_traffic/flight_info/aeronav/aero_data/Loc_ID_Search/Encodes_Decodes/) and [here](https://www.faa.gov/documentLibrary/media/Order/7350.9BB_LID_dtd_7-14-22.pdf) for locator identifier code reference. 

In [None]:
# HTTP
import requests

# Some string manipulation
import string

# HTML/XML
from bs4 import BeautifulSoup

In [None]:
# Attempts to retrieve airline name from three letter code via https://www.faa.gov/air_traffic/publications/atpubs/cnt_html/chap3_section_3.html or other sources
# TODO
def airline_name_from_code(target_carrier):
  return

In [None]:
# Attempts to retrieve city and state given FAA airport locator ID, IATA or ICAO code. 
# First try https://www.faa.gov/air_traffic/flight_info/aeronav/aero_data/Loc_ID_Search/Encodes_Decodes/ and then https://www.iata.org/en/publications/directories/code-search/ 
def airport_city_from_lid(target_lid):

  # We will be sending a POST request to this url
  faa_lid_url = "https://nfdc.faa.gov/nfdcApps/controllers/PublicDataController/getLidData"
  faa_req_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36", "Referer" : "https://www.fly.faa.gov/aadc/", "Origin" : "https://www.faa.gov" }

  # Dictionary for post form data:
  faa_lid_data = {
      "dataType" : "LIDENCODESDECODES",
      "start" : "0", # Starting index
      "length" : "1000", # Max we can request back at a time
      "sortcolumn" : "locator_id",
      "sortdir" : "asc",
      "searchval" : target_lid.upper(),
      "r" : "" # Not sure what this does, seems to be set to some sort of hash when inspecting requests made in browser. Seems to work fine without it or left blank
  }

  # TODO: Handle if there are more than 1000 results to our query. Shouldn't happen but possible
  resp = requests.post(url = faa_lid_url, headers = faa_req_headers, data = faa_lid_data)
  resp.raise_for_status()
  result_dict = resp.json()

  # Check the number of results returned. No results will mean no totalrows key in the response
  try: 
    if "totalrows" in result_dict:
      for current_result in result_dict.get("data"):
        # An airport should have both a tie-in facility and ARTCC. If there are multiple results which match our LID and have a tie-in and ARTCC, trust the city + state of the first we encounter.
        try:
          if (current_result.get("locator_id").upper() == target_lid.upper() and "tie_facility" in current_result \
              and "artcc_id" in current_result):
            return string.capwords(current_result["city"]) + ", " + string.capwords(current_result["state"])
        except:
          pass
  
  # Most likely means no results. Try IATA website
  except:
    pass 
  
  # Build URL
  iata_req_url = r"https://www.iata.org/en/publications/directories/code-search/?airport.search={}".format(target_lid.upper())
  iata_req_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }

  # Send get
  resp = requests.get(url = iata_req_url, headers = iata_req_headers)
  resp.raise_for_status()

  # HTML response. There should only be one <table> (of class "datatable"). No results if it doesn't exist 
  soup = BeautifulSoup(resp.content, "html.parser")
  result_table = soup.find("table", class_ = "datatable")
  if result_table:

    # First column of the first body row should be the city
    city_name = result_table.tbody.tr.find("td").text.strip()
    if city_name:
      return string.capwords(city_name)
  
  # In the case it is an ICAO number, I have chosen to use https://airportsbase.org/ as my reference
  # TODO

  # Failed all three lookups
  return "Unknown"

In [None]:
# Call /api/airports/ to get a list of available airports we can query. Airports are given in FAA locator ID, IATA, or ICAO airport code format
def list_aadc_airports():
  
  # URL and headers
  airport_list_url = "https://www.fly.faa.gov/aadc/api/airports"
  req_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36", "Referer" : "https://www.fly.faa.gov/aadc/" }

  resp = requests.get(url = airport_list_url, headers = req_headers)
  resp.raise_for_status()
  resp_list = resp.json()

  # Return an empty list 
  if len(resp_list) == 0:
    return []

  return resp_list

# Querying a specific airport 
Calling `/api/airports/LOCATOR_ID` returns a JSON object with the following structure:


```
{
   "name":"PHX",
   "totalFlightCount":"639",
   "cancelledFlightCount":2,
   "dateTime":"261740",
   "month":"9",
   "day":"26",
   "year":"2022",
   "defaultAarRate":"74",
   "control":"No GDP",
   "rates":[
      "72",
      ...
   ],
   "fixes":[
      "EAGUL",
      "PINNG",
      "HYDRR",
      "BRUSR",
      "OTHER",
      ...
   ],
   "timeBuckets":[
      {
         "day":"26",
         "time":"1600",
         "counts":[
            {
               "type":"STATUS",
               "name":"Past Dept Time",
               "count":"0"
            },
            {
               "type":"STATUS",
               "name":"Departing",
               "count":"0"
            },
            {
               "type":"STATUS",
               "name":"EDCT Issued",
               "count":"0"
            },
            {
               "type":"STATUS",
               "name":"Irregular",
               "count":"0"
            },
            {
               "type":"STATUS",
               "name":"Flight Active",
               "count":"0"
            },
            {
               "type":"STATUS",
               "name":"Arrived",
               "count":"12"
            },
            {
               "type":"CENTER",
               "name":"ZAB",
               "count":"2"
            },
            ...,
            {
               "type":"FIX",
               "name":"EAGUL",
               "count":"5"
            },
            ...
         ],
         "flights":[
            {
               "acid":"AAL2574",
               "type":"A319",
               "origin":"STL",
               "etd":"A26/1312",
               "destination":"PHX",
               "eta":"A26/1601",
               "ete":"163",
               "departureCenter":"ZKC",
               "majorAirline":"AAL"
            },
            ...
         ]
      },
      {
         "day":"26",
         "time":"1615", # New dictionary for each 15 minute increment...
         ...
```



In [None]:
# Call /api/airports/LOCATOR_ID for the given airport. 
def query_airport_aadc(target_id):

  # URL and headers
  req_url = r"https://www.fly.faa.gov/aadc/api/airports/{}".format(target_id.upper())
  req_headers = { "Aser-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36", "Referer" : "https://www.fly.faa.gov/aadc/" }

  resp = requests.get(url = req_url, headers = req_headers)
  resp.raise_for_status()
  resp_dict = resp.json()

  # Verify that the "name" field we receive matches the ID we entered
  if resp_dict.get("name").upper() == target_id.upper():
    return resp_dict

  # Otherwise return empty dict
  return {}

# Aggregating cancelled flights, EDCTs
The total flights and flights cancelled can be found on the first level of the dictionary returned by /aadc/api/airports/LID. EDCT's issued and other pieces of information are available for each `timeBucket` in the `counts` list

In [None]:
"""
Lists general counts garnered from the first level of the result dictionary and the "counts" sub dictionaries. Returns a dictionary of structure:
{
  "lid" : "AIRPORT_LID",
  "date" : "MM/DD/YYYY",
  "total_flights" : TOTAL_NUM_FLIGHTS,
  "cancelled" : TOTAL_NUM_CANCELLED,
  "irreg" : TOTAL_NUM_IRREGULAR,
  "edcts" : TOTAL_NUM_EDCT,
}
"""
import traceback

def get_daily_airport_arrival_counts(aadc_result):

  # Check we were passed a dictionary with atleast an airport name
  if not aadc_result.get("name"):
    print("Unknown AADC result dictionary passed, unable to generate stats.")
    return {}

  # Dict we will return. Initialize the values we will be incrementing to 0
  daily_stats = {}
  daily_stats["irreg"] = 0
  daily_stats["edcts"] = 0
  
  # First copy items from first level of dictionary
  try:
    daily_stats["lid"] = aadc_result.get("name")
    daily_stats["date"] = str(aadc_result.get("month")) + "/" + str(aadc_result.get("day")) + "/" + str(aadc_result.get("year"))
    daily_stats["total_flights"] = int(aadc_result.get("totalFlightCount"))
    daily_stats["cancelled"] = int(aadc_result.get("cancelledFlightCount"))

    # Now step through timeBuckets->counts dicts to tally up irregulars and EDCT's issued
    for current_bucket in aadc_result.get("timeBuckets"):
      for current_counter in current_bucket.get("counts"):
        if current_counter.get("name") == "Irregular":
          daily_stats["irreg"] += int(current_counter.get("count"))
        elif current_counter["name"] == "EDCT Issued":
          daily_stats["edcts"] += int(current_counter.get("count"))
        else:
          pass

  # Shouldn't really happen given we are using .get(), but incase daily_stats becomes malformed or something
  except:
    print("Failed to read airport {} AADC result dictionary as expected. Information may be missing".format(aadc_result.get("name")))
  
  return daily_stats

# Aggregating Airline and Origin Stats
Within the flights list for each timebucket, relatively detailed information for each flight is given:


```
"flights":[
            {
               "acid":"AAL2574",
               "type":"A319",
               "origin":"STL",
               "etd":"A26/1312",
               "destination":"PHX",
               "eta":"A26/1601",
               "ete":"163",
               "departureCenter":"ZKC",
               "majorAirline":"AAL"
            },
            ...
         ]
```

The below method tallys the origin airports and major airlines for an airport passed to it via a result from `query_airport_aadc`

In [None]:
"""
Takes a dictionary from query_airport_aadc as input
A dictionary of the following structure is returned:
{
  "lid" : "AIRPORT_LID",
  "date" : "MM/DD/YYYY",
  "total_flights" : TOTAL_NUM_FLIGHTS,
  "airlines" : 
  {
    "AIRLINE_NAME" : NUM_OF_FLIGHTS,
    ...
  },
  "origins" : 
  {
    "ORIGIN_AIRPORT" : NUM_OF_FLIGHTS_FROM,
    ...
  }
}
"""
def get_daily_arrival_airlines_origins(aadc_result):

  # Check we were passed a dictionary with atleast an airport name
  if not aadc_result.get("name"):
    print("Unknown AADC result dictionary passed, unable to list airlines and origin airports.")
    return {}

  # Returned dictionary 
  master_dict = {
      "lid" : "",
      "date" : "",
      "total_flights" : 0,
      "airlines" : {},
      "origins" : {}
  }

  # Get info from first level of the AADC result dict
  try:
    master_dict["lid"] = aadc_result.get("name")
    master_dict["date"] = str(aadc_result.get("month")) + "/" + str(aadc_result.get("day")) + "/" + str(aadc_result.get("year"))
    master_dict["total_flights"] = int(aadc_result.get("totalFlightCount"))

    # Step through timeBuckets->flights (a list of dictionaries)
    for current_bucket in aadc_result.get("timeBuckets"):
      for current_flight in current_bucket.get("flights"):

        current_origin = current_flight.get("origin")
        current_airline = current_flight.get("majorAirline")

        # Update origin and airline counts accordingly
        master_dict["origins"][current_origin] = master_dict["origins"].get(current_origin, 0) + 1
        master_dict["airlines"][current_airline] = master_dict["airlines"].get(current_airline, 0) + 1

  except:
    print("Failed to read airport {} AADC result dictionary as expected. Information may be missing".format(aadc_result.get("name")))

  return master_dict

# POC and Testing

In [None]:
# Testing get_daily_arrival_airlines_origins to list the most common origin airports and airlines for today's arrivals

# Create master dicts for origins and airlines
origins_masterdict = {}
airlines_masterdict = {}
airport_list = list_aadc_airports()

for index, current_airport in enumerate(airport_list):
  
  aadc_result = query_airport_aadc(current_airport)
  current_origins_airlines = get_daily_arrival_airlines_origins(aadc_result)

  current_origins = current_origins_airlines["origins"]
  current_airlines = current_origins_airlines["airlines"]

  # Loop through this airport's dict of arrival origins. Add the count to the appropriate master dict value
  for current_origin, count_read in current_origins.items():
    origins_masterdict[current_origin] = origins_masterdict.get(current_origin, 0) + count_read

  # Same for airlines of arriving flights
  for current_airline, count_read in current_airlines.items():
    airlines_masterdict[current_airline] = airlines_masterdict.get(current_airline, 0) + count_read

# Sort the lists
sorted_origins = dict(sorted(origins_masterdict.items(), key = lambda origin_pair : origin_pair[1], reverse = True))
sorted_airlines = dict(sorted(airlines_masterdict.items(), key = lambda airline_pair : airline_pair[1], reverse = True))

print("Sorting origin airports:")
for origin, count in sorted_origins.items():
  print("{} arrivals from {} ({})".format(count, origin, airport_city_from_lid(origin)))

print("-"*20)
print("Sorting arriving airlines:")
for airline, count in sorted_airlines.items():
  print("{} arrivals via {}".format(count, airline))

In [None]:
# Testing above methods to list stats about daily arrival totals and cancellations
from datetime import date

airport_list = list_aadc_airports()

# Hold list of general stats dicts
masterstats = []

for current_port in airport_list:
  query_result = query_airport_aadc(current_port)
  if query_result:
    current_stats = get_daily_airport_arrival_counts(query_result)
    if current_stats:
      masterstats.append(current_stats)

# totals
totalflights = 0
cancelledflights = 0

for airport in masterstats:
  totalflights += int(airport.get("total_flights"))
  cancelledflights += int(airport.get("cancelled"))

daily_cancel_rate = round((float(cancelledflights) / float(totalflights)) * 100, 4)
print("Today's date: {}".format(date.today()))
print("Total flights today: {}".format(totalflights))
print("Cancelled flights today: {} ({}%)".format(cancelledflights, daily_cancel_rate))
print("Sorting airports by cancelled flights...")

# Sort by number cancelled
sorted_by_cancelled = sorted(masterstats, key = lambda item: item.get("cancelled"), reverse = True)
for airport in sorted_by_cancelled:
  if airport.get("total_flights") == 0:
    cancel_rate = 0.0000
  else:
    cancel_rate = round((float(airport.get("cancelled")) / float(airport.get("total_flights"))) * 100, 4)
  print("{} cancellations at {} ({}). {}% daily rate".format(airport.get("cancelled"), airport.get("lid"), airport_city_from_lid(airport.get("lid")), cancel_rate))

Today's date: 2022-11-15
Total flights today: 28380
Cancelled flights today: 366 (1.2896%)
Sorting airports by cancelled flights...
15 cancellations at PBI (West Palm Beach, Florida). 7.0755% daily rate
14 cancellations at BNA (Nashville, Tennessee). 3.4398% daily rate
14 cancellations at TEB (Teterboro, New Jersey). 5.2632% daily rate
13 cancellations at LAS (Las Vegas). 1.7735% daily rate
12 cancellations at AHN (Athens, Georgia). 11.6505% daily rate
11 cancellations at CMH (Columbus, Ohio). 6.4706% daily rate
11 cancellations at HPN (White Plains, New York). 8.3969% daily rate
11 cancellations at IAD (Washington, Dist. Of Columbia). 2.4017% daily rate
11 cancellations at MDW (Chicago, Illinois). 3.0556% daily rate
11 cancellations at RDU (Raleigh/durham, North Carolina). 4.3651% daily rate
10 cancellations at CYVR (Unknown). 3.003% daily rate
10 cancellations at DAL (Dallas, Texas). 2.8249% daily rate
9 cancellations at CYUL (Unknown). 3.01% daily rate
9 cancellations at ORD (Chicag