In [None]:
# HTTP 
import requests

# XML 
from bs4 import BeautifulSoup

# Airport Delay Status Endpoint
The Air Traffic Control System Command Center [FAQ](https://www.fly.faa.gov/FAQ/faq.jsp), at the bottom under "Developer's Questions" contains the following:


"I am building a web site, and would like access to your delay data. Is there some way for me to receive the data from you?

- Our airport status information is available in XML format. [Here](https://www.fly.faa.gov/AirportStatus.dtd) is the DTD. [Here](https://www.fly.faa.gov/flyfaa/xmlAirportStatus.jsp) is the status information."

This JSP file ends up redirecting to https://nasstatus.faa.gov/api/airport-status-information which holds the XML content. We can parse this content according to the DTD (document type definition) file. As an example, here is the content of airport-status-information at the time of writing:


```
<AIRPORT_STATUS_INFORMATION>
  <Update_Time>Tue Oct 18 03:10:16 2022 GMT</Update_Time>
  <Dtd_File>http://www.fly.faa.gov/AirportStatus.dtd</Dtd_File>
  <Delay_type>
    <Name>General Arrival/Departure Delay Info</Name>
    <Arrival_Departure_Delay_List>
      <Delay>
        <ARPT>DFW</ARPT>
        <Reason>TM INITIATIVES:NON-RNAV</Reason>
        <Arrival_Departure Type="Departure">
          <Min>15 minutes</Min>
          <Max>29 minutes</Max>
          <Trend>Decreasing</Trend>
        </Arrival_Departure>
      </Delay>
    </Arrival_Departure_Delay_List>
  </Delay_type>
</AIRPORT_STATUS_INFORMATION>
```






This first batch of **parse_** methods use the DTD information to parse the different types of delay lists we can encounter. The relevent lines of the DTD file are included as a comment at the top of each method. 
TODO: Parse DTD on the fly instead of hardcoding.

In [None]:
"""
<!ELEMENT CTOP_List                    (CTOP+)>
<!ELEMENT CTOP                         (Program_Name, Reason, CTOP_Start_Time, CTOP_End_Time, Avg, FCA_Data)>
<!ELEMENT FCA_Data                     (FCA+)>
<!ELEMENT FCA                          (FCA_Name, FCA_Start_DateTime, FCA_End_DateTime, (Line|Polygon|Circle), Floor, Ceiling)>
Also see: https://www.fly.faa.gov/What_s_New/XML_is_changing.jsp 
"""
def parse_ctop_list(ctop_list_tag):

  master_ctop_list = []

  # TODO
  # Loop through CTOP elements

  # Loop through children FCA elements

  return master_ctop_list

In [None]:
"""
<!ELEMENT Airport_Closure_List         (Airport*)>
<!ELEMENT Airport                      (ARPT, Reason, Start, Reopen)>
"""
def parse_closure_list(closure_list_tag):
  
  master_closure_list = []

  # Loop through the Airport elements
  airport_tags = closure_list_tag.find_all("airport")
  for cur_airport in airport_tags:

    # Grab the 4 attributes: airport name, closure reason, start and reopen datetimes
    cur_closure_dict = {
        "arpt" : "",
        "reason" : "",
        "start" : "",
        "reopen" : ""
    }

    cur_closure_dict["arpt"] = cur_airport.find("arpt").text.strip()
    cur_closure_dict["reason"] = cur_airport.find("reason").text.strip()
    cur_closure_dict["start"] = cur_airport.find("start").text.strip()
    cur_closure_dict["reopen"] = cur_airport.find("reopen").text.strip()

    # Append to master
    master_closure_list.append(cur_closure_dict)
  
  return master_closure_list

In [None]:
"""
<!ELEMENT Ground_Stop_List             (Program*)>
<!ELEMENT Program                      (ARPT, Reason, End_Time)>
"""
def parse_ground_stop_list(groundstop_list_tag):

  master_stop_list = []

  # Loop through Program elements
  program_tags = groundstop_list_tag.find_all("program")
  for stop_program in program_tags:

    # Grab airport name, reason, and end time
    cur_stop_dict = {
        "arpt" : "",
        "reason" : "",
        "endtime" : ""
    }

    cur_stop_dict["arpt"] = stop_program.find("arpt").text.strip()
    cur_stop_dict["reason"] = stop_program.find("reason").text.strip()
    cur_stop_dict["endtime"] = stop_program.find("end_time").text.strip()

    # Append to master
    master_stop_list.append(cur_stop_dict)

  return master_stop_list

In [None]:
"""
<!ELEMENT Ground_Delay_List            (Ground_Delay*)>
<!ELEMENT Ground_Delay                 (ARPT, Reason, Avg, Max)>
"""
def parse_ground_delay_list(grounddelay_list_tag):

  master_grounddelay_list = []

  # Loop through Ground_Delay elements
  ground_delay_tags = grounddelay_list_tag.find_all("ground_delay")
  for ground_delay in ground_delay_tags:

    # Get airport name, reason, average delay, max delay
    cur_ground_delay = {
        "arpt" : "",
        "reason" : "",
        "avg" : "",
        "max" : ""
    }

    cur_ground_delay["arpt"] = ground_delay.find("arpt").text.strip()
    cur_ground_delay["reason"] = ground_delay.find("reason").text.strip()
    cur_ground_delay["avg"] = ground_delay.find("avg").text.strip()
    cur_ground_delay["max"] = ground_delay.find("max").text.strip()

    # Append
    master_grounddelay_list.append(cur_ground_delay)

  return master_grounddelay_list

In [None]:
"""
<!ELEMENT Airspace_Flow_List           (Airspace_Flow*)>
<!ELEMENT Airspace_Flow                (CTL_Element, Reason, FCA_Start_DateTime, FCA_End_DateTime, AFP_StartTime, AFP_EndTime, (Line|Polygon|Circle), Avg, Floor, Ceiling)>

<!ELEMENT Line                         (Point,Point+)>
<!ELEMENT Polygon                      (PointsList)>
<!ELEMENT Circle                       (Center)>
<!ELEMENT PointsList                   (Point,Point+)>
<!ELEMENT Center                       EMPTY>
<!ELEMENT Point                        EMPTY>
<!ATTLIST Point Lat                    CDATA #REQUIRED>
<!ATTLIST Point Long                   CDATA #REQUIRED>
<!ATTLIST Center Lat                   CDATA #REQUIRED>
<!ATTLIST Center Long                  CDATA #REQUIRED>
<!ATTLIST Circle Radius                CDATA #REQUIRED>
"""
def parse_flow_list(flow_list_tag):
  # TODO
  return []

In [None]:
"""
<!ELEMENT Arrival_Departure_Delay_List (Delay*)>
<!ELEMENT Delay                        (ARPT, Reason, Arrival_Departure, Arrival_Departure?)>

<!ELEMENT Arrival_Departure            (Min, Max, Trend)>
<!ATTLIST Arrival_Departure Type       (Arrival|Departure) #REQUIRED>
"""
def parse_ad_delay_list(ad_list_tag):

  master_list = []
  
  # Loop through the Delay elements
  delay_tags = ad_list_tag.find_all("delay")
  for cur_delay in delay_tags:

    # As in the DTD excerpt and XML example above, there is a separate substructure for delays to arrivals and delays to departure (whichever exist(s))
    # We will extract the information (Min, Max, and Trend) from each into a subdictionary
    cur_ad_dict = {
        "arpt" : "",
        "reason" : "",
        "arrival_delay" : {},
        "departure_delay" : {}

    }

    # Grab the airport name and delay reason
    cur_ad_dict["arpt"] = cur_delay.find("arpt").text.strip()
    cur_ad_dict["reason"] = cur_delay.find("reason").text.strip()

    # If they exist, will be one structure for arrival delays and one for departure. Type attribute holds either "Arrival" or "Departure"
    arrivals_and_departs = cur_delay.find_all("arrival_departure")
    for arrive_or_depart in arrivals_and_departs:

      if arrive_or_depart.get("type").strip().lower() == "arrival":
        cur_ad_dict["arrival_delay"]["min"] = arrive_or_depart.find("min").text.strip()
        cur_ad_dict["arrival_delay"]["max"] = arrive_or_depart.find("max").text.strip()
        cur_ad_dict["arrival_delay"]["trend"] = arrive_or_depart.find("trend").text.strip()

      elif arrive_or_depart.get("type").strip().lower() == "departure":
        cur_ad_dict["departure_delay"]["min"] = arrive_or_depart.find("min").text.strip()
        cur_ad_dict["departure_delay"]["max"] = arrive_or_depart.find("max").text.strip()
        cur_ad_dict["departure_delay"]["trend"] = arrive_or_depart.find("trend").text.strip()

    # Append current delay to master list
    master_list.append(cur_ad_dict)

  return master_list

The above methods all support **get_airport_status_info**, the method which actually calls the endpoint:

In [None]:
# Main method which calls /api/airport-status-information
def get_airport_status_info():

  master_delay_dict = {
      "update_time" : "",
      "delays_by_type" : {}
  }

  # Get XML content
  req_headers = { "User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36" }
  response = requests.get(url = "https://nasstatus.faa.gov/api/airport-status-information", headers = req_headers)
  response.raise_for_status()

  xml_content = response.content
  soup = BeautifulSoup(xml_content, "lxml")

  # First grab the update time
  try:
    master_delay_dict["update_time"] = soup.airport_status_information.update_time.text.strip()
  except:
    print("Error reading update time from AIRPORT_STATUS_INFORMATION. Check XML structure for changes")
    return

  # Loop through Delay_type elements. As per the DTD, the following delay_types are possible: 
  # CTOP_List, Airport_Closure_List, Ground_Stop_List, Ground_Delay_List, Airspace_Flow_List, Arrival_Departure_Delay_List
  delay_types = soup.find_all("delay_type")
  for cur_delay_type in delay_types:

    """
    We expect 2 children: a "name" element and then one of the above listed types of elements. The name element is just a description of the type of delay list and is disregarded
    Determine which type of list we are in. We rely on this being the next sibling of the name element
    We also rely on there only being one list of each type, or the last (of each type) will be the only one recorded due to the dict structure of master_delay_dict["delays_by_type"]. May optimize
    """
    cur_delay_type_list = []
    list_name = ""
    list_type = ""
    list_name = cur_delay_type.find("name").text.strip()
    list_type = cur_delay_type.find("name").next_sibling.name.strip()

    # Parse the list with the correct helper method
    if list_type == "ctop_list":
      cur_delay_type_list = parse_ctop_list(cur_delay_type)
    elif list_type ==  "airport_closure_list":
      cur_delay_type_list = parse_closure_list(cur_delay_type)
    elif list_type ==  "ground_stop_list":
      cur_delay_type_list = parse_ground_stop_list(cur_delay_type)
    elif list_type ==  "ground_delay_list":
      cur_delay_type_list = parse_ground_delay_list(cur_delay_type)
    elif list_type ==  "airspace_flow_list":
      cur_delay_type_list = parse_flow_list(cur_delay_type)
    elif list_type ==  "arrival_departure_delay_list":
      cur_delay_type_list = parse_ad_delay_list(cur_delay_type)
    else:
      print("Encountered an unknown delay list type: {}. Will be appended with no data. Check XML structure for changes".format(list_type))

    # Add an entry to master_delay_dict["delays_by_type"] for each type of delay list we get. The list type is the key name and the list of delays is the value. Again, this will result in only the last list 
    # of each type being saved. May update to support multiple lists of the same type although I have not seen the API response structured in such a way. 
    master_delay_dict["delays_by_type"][list_type] = cur_delay_type_list

  return master_delay_dict



In [None]:
print(get_airport_status_info())

{'update_time': 'Tue Oct 18 17:47:35 2022 GMT', 'delays_by_type': {'arrival_departure_delay_list': [{'arpt': 'LAS', 'reason': 'RWY:Construction', 'arrival_delay': {}, 'departure_delay': {'min': '16 minutes', 'max': '30 minutes', 'trend': 'Increasing'}}], 'airport_closure_list': [{'arpt': 'SBA', 'reason': '!SBA 10/062 SBA AD AP CLSD DLY 0700-1200 2210180700-2210211200', 'start': 'Oct 18 at 07:00 UTC.', 'reopen': 'Oct 21 at 12:00 UTC.'}]}}
