# Collect flights data

In [2]:
# !pip install pyperclip

In [3]:
import requests
import pandas as pd
import pyperclip
import datetime

creating at first the empty flight_df

In [4]:
german_cities = ['Berlin', 'Frankfurt', 'Munich', 'Hanover', 'Düsseldorf']
airport_icao = ['EDDB', 'EDDF', 'EDDM', 'EDDV', 'EDDL']
city_icao = pd.DataFrame({
    'city': german_cities,
    'icao': airport_icao
})

# tomorrows_data
tomorrow_date = str(datetime.date.today() + datetime.timedelta(days=1))

flights_df = pd.DataFrame(columns = ['city', 'dep/arr', 'flight_number', 'status', 'scheduled_arrival_localtime', 'departure_city', 'departure_airport_icao', 'departure_airport_iata', 'scheduled_departure_localtime', 'arrival_city', 'arrival_airport_icao', 'arrival_airport_iata', 'airlines_name', 'aircraft_model'])
flights_df

Unnamed: 0,city,dep/arr,flight_number,status,scheduled_arrival_localtime,departure_city,departure_airport_icao,departure_airport_iata,scheduled_departure_localtime,arrival_city,arrival_airport_icao,arrival_airport_iata,airlines_name,aircraft_model


To collect data about flights landing to our cities of interest, I've signed up [RapidAPI](https://rapidapi.com/). Then selected Flight Data APIs and went to their most popular 'AeroDataBox API Documentaion'. Fortunately, accessing flight landings data is free.

You can now go to the Endpoints section of the API and test it. <br>
An endpoint is like a communication channel. Each endpoint can provide a different type of information. AeroDataBox’s endpoints are pretty self explanatory: you can use an endpoint to search airports by location, another one to search airports by free text, etc. <br>

Each endpoint has its own relevant parameters: to search airports by location, you have to pass a pair of coordinates, a radius distance and a maximum number of airports to be returned. You can fill the parameters directly on the browser and hit the “Test Endpoint” button to check whether your request is successful. <br>

An even more useful feature is to select the programming language and library you are using and let RapidAPI craft the code for you, in my case that is '(Python)Requests'. Then, I just copypasted the Code Snippets onto my api_call function

In [5]:
def api_call(city_icao, date_tomorrow):
    url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{city_icao}/{date_tomorrow}T08:00/{date_tomorrow}T20:00"
    
    querystring = {"withLeg":"true","direction":"Both","withCancelled":"true","withCodeshared":"false","withCargo":"false","withPrivate":"false","withLocation":"false"}

    headers = {
    'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
    'x-rapidapi-key': "your-key"
    }

    response = requests.request("GET", url, headers=headers, params=querystring)
    print(response.status_code)
    return response

In [6]:
def flight_data_cleanup(resp,city):
    departures = pd.json_normalize(resp.json()['departures'])
    arrivals = pd.json_normalize(resp.json()['arrivals'])
    
    dep_col_name = ['number', 'status', 'departure.scheduledTimeLocal', 'departure.airport.name', 'departure.airport.icao', 'departure.airport.iata', 'airline.name', 'aircraft.model']
    arr_col_name = ['number', 'status', 'arrival.scheduledTimeLocal', 'arrival.airport.name', 'arrival.airport.icao', 'arrival.airport.iata', 'airline.name', 'aircraft.model']
    
    for col in dep_col_name:
        if col not in departures.columns:
            departures[col] = None
            
    for col in arr_col_name:
        if col not in arrivals.columns:
            arrivals[col] = None
            
    departures = departures.loc[:, ['number', 'status', 'departure.scheduledTimeLocal', 'departure.airport.name', 'departure.airport.icao', 'departure.airport.iata', 'airline.name', 'aircraft.model']]
    arrivals = arrivals.loc[:,['number', 'status', 'arrival.scheduledTimeLocal', 'arrival.airport.name', 'arrival.airport.icao', 'arrival.airport.iata', 'airline.name', 'aircraft.model']]
    
    departures['dep/arr'] = str('departure')
    arrivals['dep/arr'] = str('arrival')
    departures['city'] = city
    arrivals['city'] = city
    
    departures.columns = ['flight_number', 'status', 'scheduled_departure_localtime', 'departure_city', 'departure_airport_icao', 'departure_airport_iata', 'airlines_name', 'aircraft_model', 'dep/arr', 'city'] 
    arrivals.columns = ['flight_number', 'status', 'scheduled_arrival_localtime', 'arrival_city', 'arrival_airport_icao', 'arrival_airport_iata', 'airlines_name', 'aircraft_model', 'dep/arr', 'city']
    
    departures = departures.loc[:,['city', 'dep/arr', 'flight_number', 'status', 'scheduled_departure_localtime', 'departure_city', 'departure_airport_icao', 'departure_airport_iata', 'airlines_name', 'aircraft_model']]
    arrivals = arrivals.loc[:,['city', 'dep/arr', 'flight_number', 'status', 'scheduled_arrival_localtime', 'arrival_city', 'arrival_airport_icao', 'arrival_airport_iata', 'airlines_name', 'aircraft_model']]
    
    flights = pd.concat([departures, arrivals])
    print(f'Departures: {len(departures)} Arrivals: {len(arrivals)} Flights: {len(flights)}')
    
    return flights

In [7]:
for a in range(len(city_icao)):
    print(city_icao['city'][a], city_icao['icao'][a])
    response = api_call(city_icao['icao'][a],tomorrow_date)
    fly = flight_data_cleanup(response, city_icao['city'][a])
    
    flights_df = pd.concat([flights_df, fly])
    print(len(fly), len(flights_df))

Berlin EDDB
200
Departures: 174 Arrivals: 176 Flights: 350
350 350
Frankfurt EDDF
200
Departures: 376 Arrivals: 425 Flights: 801
801 1151
Munich EDDM
200
Departures: 297 Arrivals: 282 Flights: 579
579 1730
Hanover EDDV
200
Departures: 24 Arrivals: 22 Flights: 46
46 1776
Düsseldorf EDDL
200
Departures: 159 Arrivals: 151 Flights: 310
310 2086


In [8]:
flights_df

Unnamed: 0,city,dep/arr,flight_number,status,scheduled_arrival_localtime,departure_city,departure_airport_icao,departure_airport_iata,scheduled_departure_localtime,arrival_city,arrival_airport_icao,arrival_airport_iata,airlines_name,aircraft_model
0,Berlin,departure,U2 5831,Unknown,,,,,2022-04-29 08:00+02:00,,,,easyJet,Airbus A320
1,Berlin,departure,EW 13,Unknown,,,,,2022-04-29 08:00+02:00,,,,Eurowings,Airbus A319
2,Berlin,departure,EW 9049,Unknown,,,,,2022-04-29 08:10+02:00,,,,Eurowings,Airbus A319
3,Berlin,departure,FR 196,Unknown,,,,,2022-04-29 08:15+02:00,,,,Ryanair,Boeing 737-800
4,Berlin,departure,LH 1927,Unknown,,,,,2022-04-29 08:20+02:00,,,,Lufthansa,Airbus A320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
146,Düsseldorf,arrival,JU 324,Expected,2022-04-29 19:50+02:00,,,,,,,,Air Serbia,Airbus A319
147,Düsseldorf,arrival,AF 1006,Expected,2022-04-29 19:55+02:00,,,,,,,,Air France,Embraer 190
148,Düsseldorf,arrival,X3 6643,Expected,2022-04-29 19:55+02:00,,,,,,,,TUIfly,Boeing 737-800 (winglets)
149,Düsseldorf,arrival,XQ 1730,Expected,2022-04-29 19:55+02:00,,,,,,,,SunExpress,Boeing 737-800 (winglets)


Now, to check  a particular city's flight information:

In [9]:
frankfurt_check = flights_df['city'] == 'Frankfurt'
flights_df.loc[frankfurt_check]

Unnamed: 0,city,dep/arr,flight_number,status,scheduled_arrival_localtime,departure_city,departure_airport_icao,departure_airport_iata,scheduled_departure_localtime,arrival_city,arrival_airport_icao,arrival_airport_iata,airlines_name,aircraft_model
0,Frankfurt,departure,LH 1456,Expected,,,,,2022-04-29 08:00+02:00,,,,Lufthansa,Airbus A320 (Sharklets)
1,Frankfurt,departure,LH 6,Expected,,,,,2022-04-29 08:00+02:00,,,,Lufthansa,Airbus A320 (Sharklets)
2,Frankfurt,departure,LH 812,Expected,,,,,2022-04-29 08:00+02:00,,,,Lufthansa,Airbus A320 (Sharklets)
3,Frankfurt,departure,LH 900,Expected,,,,,2022-04-29 08:00+02:00,,,,Lufthansa,Airbus A321 (Sharklets)
4,Frankfurt,departure,XQ 141,Expected,,,,,2022-04-29 08:00+02:00,,,,SunExpress,Boeing 737-800
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
420,Frankfurt,arrival,LH 119,Expected,2022-04-29 20:00+02:00,,,,,,,,Lufthansa,Airbus A320 (sharklets)
421,Frankfurt,arrival,LH 1301,Expected,2022-04-29 20:00+02:00,,,,,,,,Lufthansa,Airbus A320 (sharklets)
422,Frankfurt,arrival,LH 687,Expected,2022-04-29 20:00+02:00,,,,,,,,Lufthansa,Airbus A321
423,Frankfurt,arrival,LH 851,Expected,2022-04-29 20:00+02:00,,,,,,,,Lufthansa,Airbus A320 (sharklets)
