Code to retrieve data via API


Include libraries

In [37]:
from datetime import datetime, timedelta
import pandas as pd
import requests
from pandas import json_normalize

Define list of airports we are interested in:

based on https://de.wikipedia.org/wiki/Liste_der_Verkehrsflugh%C3%A4fen_in_Deutschland

note: this one needs IATA codes, not icao!

In [38]:
iatalistD=["FDH", "PAD", "FMO", "FKB", "DRS", "DTM", "LEJ", "NRN", "BRE", "HHN", "NUE", "HAJ", "SXF", "CGN", "STR", "HAM", "TXL", "DUS", "MUC", "FRA"]
print(iatalistD)

['FDH', 'PAD', 'FMO', 'FKB', 'DRS', 'DTM', 'LEJ', 'NRN', 'BRE', 'HHN', 'NUE', 'HAJ', 'SXF', 'CGN', 'STR', 'HAM', 'TXL', 'DUS', 'MUC', 'FRA']


Start date : we can't get more than one year in the past :-(

In [39]:
# time range to retrieve (note: not sure yet if here stopdate is exclusive or not)
startdate="2023-12-19"


In [42]:
# Convert the startdate string to a datetime object
start_date_obj = datetime.strptime(startdate, "%Y-%m-%d")

# Calculate the stopdate as one calendar month after the startdate
stop_date_obj = start_date_obj + timedelta(days=2) # max query length in aviation-edge API is <31 days...

# Convert the stopdate back to a string in the same format as startdate
stopdate = stop_date_obj.strftime("%Y-%m-%d")

stopdate

'2023-12-21'

Use API key for the request

In [41]:
from google.colab import userdata
apikey=userdata.get('avedgekey')

Read URL, put the resulting json into a pandas data frame

In [49]:
url = f'https://aviation-edge.com/v2/public/flightsHistory?key={apikey}&code=HAM&type=departure&date_from={startdate}&date_to={stopdate}&limit=20'

print(url)
response = requests.get(url)
dictr = response.json()


https://aviation-edge.com/v2/public/flightsHistory?key=ebff7f-543e40&code=HAM&type=departure&date_from=2023-12-19&date_to=2023-12-21&limit=20


In [None]:
#print(dictr)

In [51]:
    if len(dictr) > 3:
      # Initialize an empty list to store the extracted data
      data = []

      for entry in dictr:
          departure = entry.get('departure', {}).get('icaoCode')
          firstseen = entry.get('departure', {}).get('scheduledTime')
          arrival = entry.get('arrival', {}).get('icaoCode')
          lastseen = entry.get('arrival', {}).get('scheduledTime')
          callsign = entry.get('flight', {}).get('icaoNumber')
          # print(f"icao24: {icao24}, icaoCode: {icaoCode} takeoffplan: {takeoffplan}, firstseen: {firstseen}, arrival: {arrival}, landingplan: {landingplan}, lastseen: {lastseen}, callsign: {callsign}")

          # Append the extracted data as a dictionary to the list
          data.append({
              'firstseen': firstseen,
              'departure': departure.upper(),
              'lastseen': lastseen,
              'arrival': arrival.upper(),
              'callsign': callsign
          })


      # Create a pandas DataFrame from the list of dictionaries
      pdAll = pd.DataFrame(data)

      # Convert 'firstseen' column to datetime objects
      pdAll['firstseen'] = pd.to_datetime(pdAll['firstseen'])

      # Extract year, month, and day as extra "day" column
      pdAll['day'] = pdAll['firstseen'].dt.strftime('%Y-%m-%d')

      # sort by day
      pdSorted = pdAll.sort_values(by=['day'])
      pdSorted.tail()



In [52]:
print(f'retrieved {len(pdAll)} flights departing at the selected airport')

retrieved 899 flights departing at the selected airport


Output the (beginning and the )end of the retrieved dataframe

In [53]:
pdAll.tail()

Unnamed: 0,firstseen,departure,lastseen,arrival,callsign,day
894,2023-12-21 21:00:00,EDDH,2023-12-22t06:30:00.000,OMDB,qfa8062,2023-12-21
895,2023-12-21 21:00:00,EDDH,2023-12-22t06:30:00.000,OMDB,uae62,2023-12-21
896,2023-12-21 21:30:00,EDDH,2023-12-21t22:55:00.000,LFSB,ezy1184,2023-12-21
897,2023-12-21 11:00:00,EDDH,2023-12-21t15:38:00.000,LCPH,sua711,2023-12-21
898,2023-12-21 18:05:00,EDDH,2023-12-21t19:03:00.000,EHAM,vjt763,2023-12-21


Sort the data frame by departure time

In [54]:
pdSorted = pdAll.sort_values(by=['day'])
pdSorted.tail()


Unnamed: 0,firstseen,departure,lastseen,arrival,callsign,day
683,2023-12-21 09:50:00,EDDH,2023-12-21t11:00:00.000,EIDW,aal8004,2023-12-21
682,2023-12-21 09:40:00,EDDH,2023-12-21t10:30:00.000,EKCH,bcy646,2023-12-21
681,2023-12-21 09:40:00,EDDH,2023-12-21t10:30:00.000,EKCH,tha6233,2023-12-21
688,2023-12-21 09:50:00,EDDH,2023-12-21t11:05:00.000,EDDM,sia2161,2023-12-21
898,2023-12-21 18:05:00,EDDH,2023-12-21t19:03:00.000,EHAM,vjt763,2023-12-21


Mount google drive to save the resulting csv file

In [55]:
from google.colab import drive
drive.mount('/content/drive')

import os

# where to save the data (and making shure that the directory exist)
folder_path = "/content/drive/My Drive/2024KomplexeNetze/DatenAE"
if not os.path.exists(folder_path):
    os.makedirs(folder_path)


Mounted at /content/drive


Last step: save the retrieved data into csv, with a name defined by start and stop date.

In [56]:
outputfile=startdate+"_"+stopdate+".csv"
file_path = os.path.join(folder_path, outputfile)
pdSorted.to_csv(file_path, index=False)  # ((saving without the DataFrame index))

In [57]:
print(f'Output file has been generated in {file_path}')

Output file has been generated in /content/drive/My Drive/2024KomplexeNetze/DatenAE/2023-12-19_2023-12-21.csv


--------------------------------------
Note: if we want, we could also get some airport info from provided by traffic, like geo coordinates, city, name.

TO DO: check the list of airports in the published covid-19 dataset - are they the same airports?
