# Data Sources

In [98]:
import requests 
import pandas as pd
import json
from datetime import datetime, timedelta
import math
from itertools import product
import wbgapi as wb
import io

In [2]:
with open('keys.json', 'r') as KeysFile:
    data = json.load(KeysFile)

aviationStackAPIKey = data["Aviation Stack API KEY"]

## Country Data

In [3]:
def getCountryCodes(country: str):
    urlCountry = f"https://api.aviationstack.com/v1/countries?access_key={aviationStackAPIKey}"
    queryStringCountry = {"search": country}
    responseCountry = requests.get(urlCountry, params=queryStringCountry)
    responseCountry = responseCountry.json()
    responseCountry = responseCountry.get('data')
    codes = [c.get('country_iso2') for c in responseCountry]
    return codes

In [4]:
def getAirportsByCountry(code: str):
    urlAirport = f"https://api.aviationstack.com/v1/airports?access_key={aviationStackAPIKey}"

    offset = 0
    all_data = []
    
    while True:
        queryStringAirport = {"offset": offset, "country_iso": code, "type": "international"}
        responseAirport = requests.get(urlAirport, params=queryStringAirport)
        responseAirport = responseAirport.json()
        
        currentData = responseAirport.get('data', [])
        currentData = [airP for airP in currentData if airP.get('country_iso2') == code]
        all_data.extend(currentData)
        
        if offset + 100 >= responseAirport.get('pagination').get('total'):
            break
        
        offset += 100
    
    airports_df = pd.DataFrame(columns=['IATA', 'CountryCode', 'AirportName', 'Country'])
    
    for case in all_data:
        new_case = {'IATA': case.get('iata_code', ''),
                    'CountryCode': case.get('country_iso2', ''),
                    'AirportName': case.get('airport_name', ''),
                    'Country': case.get('country_name', '')}
        airports_df.loc[len(airports_df)] = new_case
        
    return airports_df


## Historical Flight Data

In [5]:
def getHistoricalFlights(departureIATA: str=None, arrivalIATA: str=None, date: str=None):
    url = f"https://api.aviationstack.com/v1/flights?access_key={aviationStackAPIKey}"
    queryString = {"limit": 100, "dep_iata": departureIATA, "arr_iata": arrivalIATA, "flight_date": date}
    queryString = {key: value for key, value in queryString.items() if value is not None}
    print(queryString)
    response = requests.get(url, params=queryString)
    if response.status_code != 200:
        print(response)
        return None

    response = response.json()
    response = response['data']
    flights_df = pd.DataFrame(columns=['Date', 'DepartureIATA', 'DepartureTime', 'ArrivalIATA', 'ArrivalTime', 'Aircraft', 'AirlineName', 'AirlineIATA', 'FlightNumberIATA'])
    for flight in response:
        if isinstance(flight, dict) and flight.get('flight', {}).get('codeshared') is None:
            new_case = {
                'Date': date,
                'DepartureIATA': flight.get('departure', {}).get('iata', None),
                'DepartureTime': flight.get('departure', {}).get('scheduled', None),
                'ArrivalIATA': flight.get('arrival', {}).get('iata', None),
                'ArrivalTime': flight.get('arrival', {}).get('scheduled', None),
                'Aircraft': flight.get('aircraft', {}).get('iata', None) if flight.get('aircraft') is not None else None,
                'AirlineName': flight.get('airline', {}).get('name', None),
                'AirlineIATA': flight.get('airline', {}).get('iata', None),
                'FlightNumberIATA': flight.get('flight', {}).get('iata', None)
            }
            flights_df.loc[len(flights_df)] = new_case

    return flights_df

### Get Country Codes for US Brazil Portugal Canada Italy France

In [20]:
US_codes = getCountryCodes('United States')
Brazil_codes = getCountryCodes('Brazil')
Portugal_codes = getCountryCodes('Portugal')
Mexico_codes = getCountryCodes('Mexico')
Italy_codes = getCountryCodes('Italy')
France_codes = getCountryCodes('France')

In [22]:
print(f"Country codes:\nUS: {US_codes}\nBrazil: {Brazil_codes}\nPortugal: {Portugal_codes}\nMexico: {Mexico_codes}\nItaly: {Italy_codes}\nFrance: {France_codes}")

Country codes:
US: ['UM', 'US']
Brazil: ['BR']
Portugal: ['PT']
Mexico: ['MX']
Italy: ['IT']
France: ['FR', 'MQ']


In [52]:
US_airports = ['JFK', 'ATL', 'DTW', 'LAX']
Italy_airports = ['FCO']
France_airports = ['CDG']
Brazil_airports = ['GRU']
Portugal_airports = ['LIS']
Mexico_airports = ['CUN']

In [53]:
US_Italy = list(product(US_airports, Italy_airports))
US_France = list(product(US_airports, France_airports))
US_Brazil = list(product(US_airports, Brazil_airports))
US_Portugal = list(product(US_airports, Portugal_airports))
US_Mexico = list(product(US_airports, Mexico_airports))
airport_combinations = US_Italy + US_France + US_Brazil + US_Portugal + US_Mexico

In [None]:
dates = [datetime(2024, 1, 1) + timedelta(days=i) for i in range(366)]
dates = [i.strftime("%Y-%m-%d") for i in dates]

In [71]:
getHistoricalFlights(departureIATA='JFK', arrivalIATA='CDG', date='2024-04-03')

{'limit': 100, 'dep_iata': 'JFK', 'arr_iata': 'CDG', 'flight_date': '2024-04-03'}


Unnamed: 0,Date,DepartureIATA,DepartureTime,ArrivalIATA,ArrivalTime,Aircraft,AirlineName,AirlineIATA,FlightNumberIATA
0,2024-04-03,JFK,2024-04-03T17:35:00+00:00,CDG,2024-04-04T06:55:00+00:00,B772,American Airlines,AA,AA44
1,2024-04-03,JFK,2024-04-03T16:30:00+00:00,CDG,2024-04-04T05:55:00+00:00,B77W,Air France,AF,AF1
2,2024-04-03,JFK,2024-04-03T17:30:00+00:00,CDG,2024-04-04T06:45:00+00:00,A359,Air France,AF,AF3
3,2024-04-03,JFK,2024-04-03T18:30:00+00:00,CDG,2024-04-04T08:05:00+00:00,B77W,Air France,AF,AF5
4,2024-04-03,JFK,2024-04-03T21:30:00+00:00,CDG,2024-04-04T11:05:00+00:00,B772,Air France,AF,AF7
5,2024-04-03,JFK,2024-04-03T23:15:00+00:00,CDG,2024-04-04T12:40:00+00:00,B77W,Air France,AF,AF9
6,2024-04-03,JFK,2024-04-03T17:07:00+00:00,CDG,2024-04-04T06:55:00+00:00,,JetBlue Airways,B6,B61407
7,2024-04-03,JFK,2024-04-03T19:30:00+00:00,CDG,2024-04-04T09:15:00+00:00,,Delta Air Lines,DL,DL262
8,2024-04-03,JFK,2024-04-03T22:30:00+00:00,CDG,2024-04-04T12:10:00+00:00,A333,Delta Air Lines,DL,DL264
9,2024-04-03,JFK,2024-04-03T08:05:00+00:00,CDG,2024-04-03T21:35:00+00:00,B764,Delta Air Lines,DL,DL266


In [54]:
365*len(airport_combinations)

7300

In [None]:
df_all_flights = pd.DataFrame(columns=['Date', 'DepartureIATA', 'DepartureTime', 'ArrivalIATA', 'ArrivalTime', 'Aircraft', 'AirlineName', 'AirlineIATA', 'FlightNumberIATA'])
for city_pair in airport_combinations:
    for current_date in dates:
        temp_df = getHistoricalFlights(departureIATA=city_pair[0], arrivalIATA=city_pair[1], date=current_date)
        df_all_flights = pd.concat([temp_df, df_all_flights], ignore_index=True)


{'limit': 100, 'dep_iata': 'BOS', 'arr_iata': 'FCO', 'flight_date': '2024-12-01'}


Unnamed: 0,Date,DepartureIATA,DepartureTime,ArrivalIATA,ArrivalTime,Aircraft,AirlineName,AirlineIATA,FlightNumberIATA
0,2024-12-01,BOS,2024-12-01T17:20:00+00:00,FCO,2024-12-02T07:15:00+00:00,A332,ITA Airways,AZ,AZ615
1,2024-12-01,BOS,2024-12-01T17:55:00+00:00,FCO,2024-12-02T07:55:00+00:00,A339,Delta Air Lines,DL,DL112


## Future Flights

In [8]:
def getFutureFlights(departureIATA: str=None, arrivalIATA: str=None, date: str=None, airlineIATA: str=None, flightNumber: str=None):
    url = f"https://api.aviationstack.com/v1/flightsFuture?access_key={aviationStackAPIKey}"
    if not ((departureIATA is None) ^ (arrivalIATA is None)):
        return "Invalid Request"
    
    if departureIATA:
        queryString = {
            "iataCode": departureIATA,
            "type": "departure",
            "date": date,
            "airline_iata": airlineIATA,
            "flight_number":flightNumber
        }
    
    else:
        queryString = {
            "iataCode": arrivalIATA,
            "type": "arrival",
            "date": date,
            "airline_iata": airlineIATA,
            "flight_number":flightNumber
        }
    
    queryString = {key: value for key, value in queryString.items() if value is not None}
    response = requests.get(url, params=queryString)
    if response.status_code != 200:
        print(response)
        return None
    response = response.json()
    response = response['data']
    response = [flight for flight in response if "codeshared" not in flight.keys()]
    df = pd.DataFrame(columns=['Date', 'Weekday', 'DepartureIATA', 'DepartureTime', 'ArrivalIATA', 'ArrivalTime', 'Aircraft', 'AirlineIATA', 'FlightNumberIATA'])
    
    for flight in response:
        new_case = {'Date': date, 
                    'Weekday': flight['weekday'],
                    'DepartureIATA': flight['departure']['iataCode'],
                    'DepartureTime': flight['departure']['scheduledTime'],
                    'ArrivalIATA': flight['arrival']['iataCode'],
                    'ArrivalTime': flight['arrival']['scheduledTime'],
                    'Aircraft': flight['aircraft']['modelCode'],
                    'AirlineIATA': flight['airline']['iataCode'],
                    'FlightNumberIATA': flight['flight']['iataNumber']}
        df.loc[len(df)] = new_case

    return df

In [None]:
getFutureFlights(departureIATA="ATL", date="2024-11-29", airlineIATA="DL")

## OECD API

In [None]:
url = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_KEI@DF_KEI,4.0/.M.PRVM.IX.BTE..?startPeriod=2024-01&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
response = requests.get(url).content

In [100]:
rawData = pd.read_csv(io.StringIO(response.decode('utf-8')))

In [104]:
rawData.head()

Unnamed: 0,STRUCTURE,STRUCTURE_ID,STRUCTURE_NAME,ACTION,REF_AREA,Reference area,FREQ,Frequency of observation,MEASURE,Measure,...,OBS_VALUE,Observation value,OBS_STATUS,Observation status,UNIT_MULT,Unit multiplier,DECIMALS,Decimals,BASE_PER,Base period
0,DATAFLOW,OECD.SDD.STES:DSD_KEI@DF_KEI(4.0),Key short-term economic indicators,I,LVA,Latvia,M,Monthly,PRVM,Production volume,...,113.460356,,A,Normal value,0,Units,1,One,2015,
1,DATAFLOW,OECD.SDD.STES:DSD_KEI@DF_KEI(4.0),Key short-term economic indicators,I,LVA,Latvia,M,Monthly,PRVM,Production volume,...,114.689613,,A,Normal value,0,Units,1,One,2015,
2,DATAFLOW,OECD.SDD.STES:DSD_KEI@DF_KEI(4.0),Key short-term economic indicators,I,AUT,Austria,M,Monthly,PRVM,Production volume,...,121.2025,,A,Normal value,0,Units,1,One,2015,
3,DATAFLOW,OECD.SDD.STES:DSD_KEI@DF_KEI(4.0),Key short-term economic indicators,I,BEL,Belgium,M,Monthly,PRVM,Production volume,...,127.456847,,A,Normal value,0,Units,1,One,2015,
4,DATAFLOW,OECD.SDD.STES:DSD_KEI@DF_KEI(4.0),Key short-term economic indicators,I,BEL,Belgium,M,Monthly,PRVM,Production volume,...,127.042578,,A,Normal value,0,Units,1,One,2015,
