# I/O
- Input: Select list of airports IDs
- Output: Unique list of flight IDs which departed from the selected airports

# Process
- Airports + date range --> Flights search 

# To do
- Pull flight IDs departing from select 
- Query ultimate list of flight identifiers to send to the cloud function.


In [1]:
import os
import sys

# if notebook is being run from src directory, change to root directory
if os.getcwd().split(os.sep)[-1] == 'invoke-app':
    try:
        sys.path.remove('')
        sys.path.append('../')
    except: pass
print(os.getcwd())

/workspaces/flight-ml-eta/1-data-ingest-service/invoke-app


In [2]:
import os
import json
import logging
import requests
from dotenv import load_dotenv
from datetime import datetime as dt
from datetime import timedelta 
import pandas as pd

from google.oauth2 import service_account
from google.cloud import storage
from google.cloud import firestore

from src.auth.FlightAware import FlightAwareAPI
# from src.io.read import *

FA_client = FlightAwareAPI()

identifier = 'AAL1707'

lookback_hours = 7*24
lookfoward_hours = 2*24

# ------ DERIVED TIMESTAMPS ------      
current_time_raw = dt.utcnow()

current_time = current_time_raw.strftime('%Y-%m-%dT%H:%M:%SZ')
query_start = (current_time_raw - timedelta(hours=lookback_hours)).strftime('%Y-%m-%dT%H:%M:%SZ')
query_end = (current_time_raw + timedelta(hours=lookfoward_hours)).strftime('%Y-%m-%dT%H:%M:%SZ')

In [4]:

from urllib.parse import urlparse, parse_qs
import time

def extract_cursor(next_link):
    # Parse the URL from the 'next' link
    parsed_url = urlparse(next_link)
    # Extract the query parameters as a dictionary
    query_params = parse_qs(parsed_url.query)
    # The cursor is under the 'cursor' key
    cursor = query_params.get('cursor', [None])[0]
    return cursor



def paginate_api(api, endpoint, params, data_key, max_pages=10):
    """
    Generic API pagination function.

    :param api: API class instance with a query method
    :param endpoint: API endpoint string
    :param params: Dictionary of parameters to pass to the API
    :param data_key: Key in the response that contains the data to extract
    :param max_pages: Maximum number of pages to fetch
    :return: List of extracted data from all pages
    """
    data = []
    for i in range(max_pages):
        response = api.query(endpoint, **params)
        data.extend(response.get(data_key, []))
        cursor = extract_cursor(response['links']['next']) if 'next' in response.get('links', {}) else None
        print(f'result set {i} finished')
        time.sleep(7)
        if not cursor:
            break
        params['cursor'] = cursor
    return data

def get_airport_departures(api, airport_id, start_datetime, end_datetime, max_pages=100):
    endpoint = f'/airports/{airport_id}/flights/departures'
    params = {'airline': 'AAL', 'end': end_datetime, 'start': start_datetime}

    departures = paginate_api(api, endpoint, params, 'departures', max_pages)
    return pd.json_normalize(departures)

def get_airport_flights(api, airport_id, start_datetime, end_datetime, max_pages=100):
    endpoint = f'/airports/{airport_id}/flights'
    params = {'airline': 'AAL', 'end': end_datetime, 'start': start_datetime}
    
    departures = paginate_api(api, endpoint, params, 'departures', max_pages)
    return pd.json_normalize(departures)

# Usage
flights_df = get_airport_flights(FA_client, 'KDFW', query_start, query_end)



result set 0 finished
result set 1 finished
result set 2 finished
result set 3 finished
result set 4 finished
result set 5 finished
result set 6 finished
result set 7 finished
result set 8 finished
result set 9 finished
result set 10 finished
result set 11 finished
result set 12 finished
result set 13 finished
result set 14 finished
result set 15 finished
result set 16 finished
result set 17 finished
result set 18 finished
result set 19 finished
result set 20 finished
result set 21 finished
result set 22 finished
result set 23 finished
result set 24 finished
result set 25 finished
result set 26 finished
result set 27 finished
result set 28 finished
result set 29 finished
result set 30 finished
result set 31 finished
result set 32 finished
result set 33 finished
result set 34 finished
result set 35 finished
result set 36 finished
result set 37 finished
result set 38 finished
result set 39 finished
result set 40 finished
result set 41 finished
result set 42 finished
result set 43 finishe

In [5]:
flights_df

Unnamed: 0,ident,ident_icao,ident_iata,actual_runway_off,actual_runway_on,fa_flight_id,operator,operator_icao,operator_iata,flight_number,...,origin.city,origin.airport_info_url,destination.code,destination.code_icao,destination.code_iata,destination.code_lid,destination.timezone,destination.name,destination.city,destination.airport_info_url
0,AAL175,AAL175,AA175,18L,,AAL175-1703101160-airline-248p,AAL,AAL,AA,175,...,Dallas-Fort Worth,/airports/KDFW,RJTT,RJTT,HND,,Asia/Tokyo,Tokyo Int'l (Haneda),Ota,/airports/RJTT
1,AAL2345,AAL2345,AA2345,17C,,AAL2345-1703101369-schedule-1102p,AAL,AAL,AA,2345,...,Dallas-Fort Worth,/airports/KDFW,KCVG,KCVG,CVG,CVG,America/New_York,Cincinnati/Northern Kentucky International Air...,Hebron,/airports/KCVG
2,AAL1662,AAL1662,AA1662,17C,,AAL1662-1703100844-airline-1367p,AAL,AAL,AA,1662,...,Dallas-Fort Worth,/airports/KDFW,KPHL,KPHL,PHL,PHL,America/New_York,Philadelphia Intl,Philadelphia,/airports/KPHL
3,AAL1739,AAL1739,AA1739,17C,,AAL1739-1703099884-airline-1865p,AAL,AAL,AA,1739,...,Dallas-Fort Worth,/airports/KDFW,KECP,KECP,ECP,ECP,America/Chicago,Northwest Florida Beaches Intl,Panama City,/airports/KECP
4,AAL1093,AAL1093,AA1093,18L,,AAL1093-1703101160-airline-223p,AAL,AAL,AA,1093,...,Dallas-Fort Worth,/airports/KDFW,KDEN,KDEN,DEN,DEN,America/Denver,Denver Intl,Denver,/airports/KDEN
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,AAL1322,AAL1322,AA1322,18L,34R,AAL1322-1702852989-airline-4892p,AAL,AAL,AA,1322,...,Dallas-Fort Worth,/airports/KDFW,KDEN,KDEN,DEN,DEN,America/Denver,Denver Intl,Denver,/airports/KDEN
1496,AAL2457,AAL2457,AA2457,18L,24L,AAL2457-1702852552-schedule-157p,AAL,AAL,AA,2457,...,Dallas-Fort Worth,/airports/KDFW,KLAX,KLAX,LAX,LAX,America/Los_Angeles,Los Angeles Intl,Los Angeles,/airports/KLAX
1497,AAL70,AAL70,AA70,17C,25R,AAL70-1702851682-airline-3177p,AAL,AAL,AA,70,...,Dallas-Fort Worth,/airports/KDFW,EDDF,EDDF,FRA,,Europe/Berlin,Frankfurt Int'l,Frankfurt am Main,/airports/EDDF
1498,AAL2283,AAL2283,AA2283,17C,22L,AAL2283-1702852989-airline-4836p,AAL,AAL,AA,2283,...,Dallas-Fort Worth,/airports/KDFW,KLIT,KLIT,LIT,LIT,America/Chicago,Clinton National,Little Rock,/airports/KLIT


In [8]:
flights_df['ident'].value_counts()

ident
AAL1685    4
AAL2749    4
AAL175     3
AAL2881    3
AAL1704    3
          ..
AAL2917    1
AAL320     1
AAL2367    1
AAL2989    1
AAL2283    1
Name: count, Length: 568, dtype: int64

In [9]:
flights_df['destination.city'].value_counts()

destination.city
Los Angeles    43
Phoenix        36
Miami          35
Chicago        32
San Antonio    32
               ..
Roatan          1
Huntsville      1
Lexington       1
Lubbock         1
Little Rock     1
Name: count, Length: 153, dtype: int64

In [10]:
import pygwalker as pyg
from IPython.display import HTML
HTML(pyg.walk(flights_df))


Box(children=(HTML(value='<div id="ifr-pyg-0" style="height: auto">\n    <head>\n        <meta http-equiv="Con…

TypeError: 'PygWalker' object is not subscriptable