# Data Sources

In [1]:
import requests
import pandas as pd
import json
from datetime import datetime, timedelta
import math
from itertools import product
import io

In [None]:
with open('keys.json', 'r') as KeysFile:
    data = json.load(KeysFile)

aviationStackAPIKey = data["Aviation Stack API KEY"]

## Country Data

In [None]:
def getCountryCodes(country: str):
    urlCountry = f"https://api.aviationstack.com/v1/countries?access_key={aviationStackAPIKey}"
    queryStringCountry = {"search": country}
    responseCountry = requests.get(urlCountry, params=queryStringCountry)
    responseCountry = responseCountry.json()
    responseCountry = responseCountry.get('data')
    codes = [c.get('country_iso2') for c in responseCountry]
    return codes

In [None]:
def getAirportsByCountry(code: str):
    urlAirport = f"https://api.aviationstack.com/v1/airports?access_key={aviationStackAPIKey}"

    offset = 0
    all_data = []

    while True:
        queryStringAirport = {"offset": offset, "country_iso": code, "type": "international"}
        responseAirport = requests.get(urlAirport, params=queryStringAirport)
        responseAirport = responseAirport.json()

        currentData = responseAirport.get('data', [])
        currentData = [airP for airP in currentData if airP.get('country_iso2') == code]
        all_data.extend(currentData)

        if offset + 100 >= responseAirport.get('pagination').get('total'):
            break

        offset += 100

    airports_df = pd.DataFrame(columns=['IATA', 'CountryCode', 'AirportName', 'Country'])

    for case in all_data:
        new_case = {'IATA': case.get('iata_code', ''),
                    'CountryCode': case.get('country_iso2', ''),
                    'AirportName': case.get('airport_name', ''),
                    'Country': case.get('country_name', '')}
        airports_df.loc[len(airports_df)] = new_case

    return airports_df


## Historical Flight Data

In [None]:
def getHistoricalFlights(departureIATA: str=None, arrivalIATA: str=None, date: str=None):
    url = f"https://api.aviationstack.com/v1/flights?access_key={aviationStackAPIKey}"
    queryString = {"limit": 100, "dep_iata": departureIATA, "arr_iata": arrivalIATA, "flight_date": date}
    queryString = {key: value for key, value in queryString.items() if value is not None}
    print(queryString)
    response = requests.get(url, params=queryString)
    if response.status_code != 200:
        print(response)
        return None

    response = response.json()
    response = response['data']
    flights_df = pd.DataFrame(columns=['Date', 'DepartureIATA', 'DepartureTime', 'ArrivalIATA', 'ArrivalTime', 'Aircraft', 'AirlineName', 'AirlineIATA', 'FlightNumberIATA'])
    for flight in response:
        if isinstance(flight, dict) and flight.get('flight', {}).get('codeshared') is None:
            new_case = {
                'Date': date,
                'DepartureIATA': flight.get('departure', {}).get('iata', None),
                'DepartureTime': flight.get('departure', {}).get('scheduled', None),
                'ArrivalIATA': flight.get('arrival', {}).get('iata', None),
                'ArrivalTime': flight.get('arrival', {}).get('scheduled', None),
                'Aircraft': flight.get('aircraft', {}).get('iata', None) if flight.get('aircraft') is not None else None,
                'AirlineName': flight.get('airline', {}).get('name', None),
                'AirlineIATA': flight.get('airline', {}).get('iata', None),
                'FlightNumberIATA': flight.get('flight', {}).get('iata', None)
            }
            flights_df.loc[len(flights_df)] = new_case

    return flights_df

### Get Country Codes for US Brazil Portugal Canada Italy France

In [None]:
US_codes = getCountryCodes('United States')
Brazil_codes = getCountryCodes('Brazil')
Portugal_codes = getCountryCodes('Portugal')
Mexico_codes = getCountryCodes('Mexico')
Italy_codes = getCountryCodes('Italy')
France_codes = getCountryCodes('France')

In [None]:
print(f"Country codes:\nUS: {US_codes}\nBrazil: {Brazil_codes}\nPortugal: {Portugal_codes}\nMexico: {Mexico_codes}\nItaly: {Italy_codes}\nFrance: {France_codes}")

Country codes:
US: ['UM', 'US']
Brazil: ['BR']
Portugal: ['PT']
Mexico: ['MX']
Italy: ['IT']
France: ['FR', 'MQ']


In [None]:
US_airports = ['JFK', 'ATL', 'DTW', 'LAX']
Italy_airports = ['FCO']
France_airports = ['CDG']
Brazil_airports = ['GRU']
Portugal_airports = ['LIS']
Mexico_airports = ['CUN']

In [None]:
US_Italy = list(product(US_airports, Italy_airports))
US_France = list(product(US_airports, France_airports))
US_Brazil = list(product(US_airports, Brazil_airports))
US_Portugal = list(product(US_airports, Portugal_airports))
US_Mexico = list(product(US_airports, Mexico_airports))
airport_combinations = US_Italy + US_France + US_Brazil + US_Portugal + US_Mexico

In [None]:
dates = [datetime(2024, 1, 1) + timedelta(days=i) for i in range(366)]
dates = [i.strftime("%Y-%m-%d") for i in dates]

In [None]:
getHistoricalFlights(departureIATA='JFK', arrivalIATA='CDG', date='2024-04-03')

{'limit': 100, 'dep_iata': 'JFK', 'arr_iata': 'CDG', 'flight_date': '2024-04-03'}


Unnamed: 0,Date,DepartureIATA,DepartureTime,ArrivalIATA,ArrivalTime,Aircraft,AirlineName,AirlineIATA,FlightNumberIATA
0,2024-04-03,JFK,2024-04-03T17:35:00+00:00,CDG,2024-04-04T06:55:00+00:00,B772,American Airlines,AA,AA44
1,2024-04-03,JFK,2024-04-03T16:30:00+00:00,CDG,2024-04-04T05:55:00+00:00,B77W,Air France,AF,AF1
2,2024-04-03,JFK,2024-04-03T17:30:00+00:00,CDG,2024-04-04T06:45:00+00:00,A359,Air France,AF,AF3
3,2024-04-03,JFK,2024-04-03T18:30:00+00:00,CDG,2024-04-04T08:05:00+00:00,B77W,Air France,AF,AF5
4,2024-04-03,JFK,2024-04-03T21:30:00+00:00,CDG,2024-04-04T11:05:00+00:00,B772,Air France,AF,AF7
5,2024-04-03,JFK,2024-04-03T23:15:00+00:00,CDG,2024-04-04T12:40:00+00:00,B77W,Air France,AF,AF9
6,2024-04-03,JFK,2024-04-03T17:07:00+00:00,CDG,2024-04-04T06:55:00+00:00,,JetBlue Airways,B6,B61407
7,2024-04-03,JFK,2024-04-03T19:30:00+00:00,CDG,2024-04-04T09:15:00+00:00,,Delta Air Lines,DL,DL262
8,2024-04-03,JFK,2024-04-03T22:30:00+00:00,CDG,2024-04-04T12:10:00+00:00,A333,Delta Air Lines,DL,DL264
9,2024-04-03,JFK,2024-04-03T08:05:00+00:00,CDG,2024-04-03T21:35:00+00:00,B764,Delta Air Lines,DL,DL266


In [None]:
365*len(airport_combinations)

7300

In [None]:
df_all_flights = pd.DataFrame(columns=['Date', 'DepartureIATA', 'DepartureTime', 'ArrivalIATA', 'ArrivalTime', 'Aircraft', 'AirlineName', 'AirlineIATA', 'FlightNumberIATA'])
for city_pair in airport_combinations:
    for current_date in dates:
        temp_df = getHistoricalFlights(departureIATA=city_pair[0], arrivalIATA=city_pair[1], date=current_date)
        df_all_flights = pd.concat([temp_df, df_all_flights], ignore_index=True)


{'limit': 100, 'dep_iata': 'BOS', 'arr_iata': 'FCO', 'flight_date': '2024-12-01'}


Unnamed: 0,Date,DepartureIATA,DepartureTime,ArrivalIATA,ArrivalTime,Aircraft,AirlineName,AirlineIATA,FlightNumberIATA
0,2024-12-01,BOS,2024-12-01T17:20:00+00:00,FCO,2024-12-02T07:15:00+00:00,A332,ITA Airways,AZ,AZ615
1,2024-12-01,BOS,2024-12-01T17:55:00+00:00,FCO,2024-12-02T07:55:00+00:00,A339,Delta Air Lines,DL,DL112


## Future Flights

In [None]:
def getFutureFlights(departureIATA: str=None, arrivalIATA: str=None, date: str=None, airlineIATA: str=None, flightNumber: str=None):
    url = f"https://api.aviationstack.com/v1/flightsFuture?access_key={aviationStackAPIKey}"
    if not ((departureIATA is None) ^ (arrivalIATA is None)):
        return "Invalid Request"

    if departureIATA:
        queryString = {
            "iataCode": departureIATA,
            "type": "departure",
            "date": date,
            "airline_iata": airlineIATA,
            "flight_number":flightNumber
        }

    else:
        queryString = {
            "iataCode": arrivalIATA,
            "type": "arrival",
            "date": date,
            "airline_iata": airlineIATA,
            "flight_number":flightNumber
        }

    queryString = {key: value for key, value in queryString.items() if value is not None}
    response = requests.get(url, params=queryString)
    if response.status_code != 200:
        print(response)
        return None
    response = response.json()
    response = response['data']
    response = [flight for flight in response if "codeshared" not in flight.keys()]
    df = pd.DataFrame(columns=['Date', 'Weekday', 'DepartureIATA', 'DepartureTime', 'ArrivalIATA', 'ArrivalTime', 'Aircraft', 'AirlineIATA', 'FlightNumberIATA'])

    for flight in response:
        new_case = {'Date': date,
                    'Weekday': flight['weekday'],
                    'DepartureIATA': flight['departure']['iataCode'],
                    'DepartureTime': flight['departure']['scheduledTime'],
                    'ArrivalIATA': flight['arrival']['iataCode'],
                    'ArrivalTime': flight['arrival']['scheduledTime'],
                    'Aircraft': flight['aircraft']['modelCode'],
                    'AirlineIATA': flight['airline']['iataCode'],
                    'FlightNumberIATA': flight['flight']['iataNumber']}
        df.loc[len(df)] = new_case

    return df

In [None]:
getFutureFlights(departureIATA="ATL", date="2024-11-29", airlineIATA="DL")

## OECD API

https://data-explorer.oecd.org/vis?fs[0]=Topic%2C1%7CEconomy%23ECO%23%7CShort-term%20economic%20statistics%23ECO_STS%23&pg=0&fc=Topic&bp=true&snb=54&vw=tb&df[ds]=dsDisseminateFinalDMZ&df[id]=DSD_KEI%40DF_KEI&df[ag]=OECD.SDD.STES&df[vs]=4.0&dq=.M.PRVM.IX.BTE..&lom=LASTNPERIODS&lo=5&to[TIME_PERIOD]=false&ly[cl]=TIME_PERIOD&ly[rw]=REF_AREA

https://www.oecd.org/en/data/insights/data-explainers/2024/09/api.html

In [8]:
urlConsumerConfidence= 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_CS,4.0/.M.ES......?startPeriod=2024-01&endPeriod=2024-09&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
responseConsumerConfidence = requests.get(urlConsumerConfidence).content
urlKeyEconomic = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_KEI@DF_KEI,4.0/.M.PRVM.IX.BTE..?startPeriod=2024-01&endPeriod=2024-09&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
responseKeyEconomic = requests.get(urlKeyEconomic).content
urlUnemployment = 'https://sdmx.oecd.org/public/rest/data/OECD.SDD.TPS,DSD_LFS@DF_IALFS_UNE_M,1.0/..._Z.Y._T.Y_GE15..M?startPeriod=2024-01&endPeriod=2024-09&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
responseUnemployment = requests.get(urlUnemployment).content
urlConsumerBaro ='https://sdmx.oecd.org/public/rest/data/OECD.SDD.STES,DSD_STES@DF_CSBAR,4.0/.M.......?startPeriod=2024-01&endPeriod=2024-09&dimensionAtObservation=AllDimensions&format=csvfilewithlabels'
responseConsumerBaro = requests.get(urlConsumerBaro).content

In [9]:
df_KeyEconomic = pd.read_csv(io.StringIO(responseKeyEconomic.decode('utf-8')))
df_Unemployment = pd.read_csv(io.StringIO(responseUnemployment.decode('utf-8')))
df_ConsumerConfidence = pd.read_csv(io.StringIO(responseConsumerConfidence.decode('utf-8')))
df_ConsumerBaro = pd.read_csv(io.StringIO(responseConsumerBaro.decode('utf-8')))

In [68]:
print(df_KeyEconomic.columns)

Index(['You have exceeded the number of requests for data downloads or very large data ranges permitted in the OECD Data API. Please contact us through the OECD Data Explorer (https://data-explorer.oecd.org) feedback form (indicating your IP address) to request more details or exceptions.'], dtype='object')


In [10]:
df_KeyEconomic = df_KeyEconomic[df_KeyEconomic['Reference area'].isin(['Italy', 'France', 'Mexico', 'Korea', 'Portugal'])]
df_Unemployment = df_Unemployment[df_Unemployment['Reference area'].isin(['Italy', 'France', 'Mexico', 'Korea', 'Portugal'])]
df_ConsumerConfidence = df_ConsumerConfidence[df_ConsumerConfidence['Reference area'].isin(['Italy', 'France', 'Mexico', 'Korea', 'Portugal'])]
df_ConsumerBaro = df_ConsumerBaro[df_ConsumerBaro['Reference area'].isin(['Italy', 'France', 'Mexico', 'Korea', 'Portugal'])]

In [11]:
df_Unemployment[['Reference area', 'TIME_PERIOD', 'OBS_VALUE']].sort_values('Reference area')

Unnamed: 0,Reference area,TIME_PERIOD,OBS_VALUE
185,France,2024-03,7.4
344,France,2024-09,7.5
182,France,2024-06,7.5
183,France,2024-05,7.5
184,France,2024-04,7.4
180,France,2024-08,7.5
186,France,2024-02,7.5
187,France,2024-01,7.5
181,France,2024-07,7.5
157,Italy,2024-07,6.3


In [12]:
df_ConsumerConfidence[['Reference area', 'TIME_PERIOD', 'OBS_VALUE']].sort_values('Reference area')

Unnamed: 0,Reference area,TIME_PERIOD,OBS_VALUE
243,France,2024-09,-26.6
69,France,2024-01,-21.4
70,France,2024-02,-29.6
71,France,2024-03,-29.1
72,France,2024-04,-34.4
73,France,2024-05,-30.4
74,France,2024-06,-32.6
75,France,2024-07,-38.5
76,France,2024-08,-27.7
210,Italy,2024-08,-18.0


In [13]:
df_ConsumerConfidence.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, 5 to 243
Data columns (total 34 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   STRUCTURE                 45 non-null     object 
 1   STRUCTURE_ID              45 non-null     object 
 2   STRUCTURE_NAME            45 non-null     object 
 3   ACTION                    45 non-null     object 
 4   REF_AREA                  45 non-null     object 
 5   Reference area            45 non-null     object 
 6   FREQ                      45 non-null     object 
 7   Frequency of observation  45 non-null     object 
 8   MEASURE                   45 non-null     object 
 9   Measure                   45 non-null     object 
 10  UNIT_MEASURE              45 non-null     object 
 11  Unit of measure           45 non-null     object 
 12  ACTIVITY                  45 non-null     object 
 13  Economic activity         45 non-null     object 
 14  ADJUSTMENT      

In [14]:
df_Unemployment.info()

<class 'pandas.core.frame.DataFrame'>
Index: 45 entries, 6 to 353
Data columns (total 34 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   STRUCTURE                 45 non-null     object 
 1   STRUCTURE_ID              45 non-null     object 
 2   STRUCTURE_NAME            45 non-null     object 
 3   ACTION                    45 non-null     object 
 4   REF_AREA                  45 non-null     object 
 5   Reference area            45 non-null     object 
 6   MEASURE                   45 non-null     object 
 7   Measure                   45 non-null     object 
 8   UNIT_MEASURE              45 non-null     object 
 9   Unit of measure           45 non-null     object 
 10  TRANSFORMATION            45 non-null     object 
 11  Transformation            45 non-null     object 
 12  ADJUSTMENT                45 non-null     object 
 13  Adjustment                45 non-null     object 
 14  SEX             

In [33]:
df_final = pd.merge(df_KeyEconomic[['Reference area', 'TIME_PERIOD', 'OBS_VALUE']], df_Unemployment[['Reference area', 'TIME_PERIOD', 'OBS_VALUE']], on=['Reference area', 'TIME_PERIOD'])
df_final = df_final.rename(columns={'OBS_VALUE_x': 'Production', 
                                  'OBS_VALUE_y': 'Unemployment'})
print(df_final)

   Reference area TIME_PERIOD  Production  Unemployment
0          France     2024-09   98.350045      7.500000
1          France     2024-08   99.105151      7.500000
2          France     2024-07   98.202947      7.500000
3          France     2024-06   97.898943      7.500000
4          France     2024-05   97.163451      7.500000
5          France     2024-04   99.242443      7.400000
6          France     2024-03   98.654049      7.400000
7          France     2024-02   98.752115      7.500000
8           Italy     2024-09   98.956049      6.000000
9           Italy     2024-08   99.377138      6.100000
10          Italy     2024-07   99.377138      6.300000
11          Italy     2024-06  100.324590      6.700000
12          Italy     2024-05   99.903500      6.700000
13          Italy     2024-04   99.377138      6.700000
14          Italy     2024-03  100.429862      6.900000
15          Italy     2024-02  100.956224      7.300000
16          Korea     2024-02  118.848752      2

In [34]:
# Merge with consumer confidence data
df_final = pd.merge(df_final, 
                   df_ConsumerConfidence[['Reference area', 'TIME_PERIOD', 'OBS_VALUE']], 
                   on=['Reference area', 'TIME_PERIOD'])

df_final.to_csv('economic_indicators.csv', index=False)

print(df_final.to_csv)

<bound method NDFrame.to_csv of    Reference area TIME_PERIOD  Production  Unemployment  OBS_VALUE
0          France     2024-09   98.350045      7.500000     -26.60
1          France     2024-08   99.105151      7.500000     -27.70
2          France     2024-07   98.202947      7.500000     -38.50
3          France     2024-06   97.898943      7.500000     -32.60
4          France     2024-05   97.163451      7.500000     -30.40
5          France     2024-04   99.242443      7.400000     -34.40
6          France     2024-03   98.654049      7.400000     -29.10
7          France     2024-02   98.752115      7.500000     -29.60
8           Italy     2024-09   98.956049      6.000000     -14.80
9           Italy     2024-08   99.377138      6.100000     -18.00
10          Italy     2024-07   99.377138      6.300000     -14.20
11          Italy     2024-06  100.324590      6.700000     -16.40
12          Italy     2024-05   99.903500      6.700000     -19.30
13          Italy     2024-04 

In [35]:
# Merge with consumer confidence data
df_final = pd.merge(df_final, 
                   df_ConsumerBaro[['Reference area', 'TIME_PERIOD', 'OBS_VALUE']], 
                   on=['Reference area', 'TIME_PERIOD'])

df_final.to_csv('economic_indicators.csv', index=False)

print(df_final.to_csv)

<bound method NDFrame.to_csv of    Reference area TIME_PERIOD  Production  Unemployment  OBS_VALUE_x  \
0          France     2024-09   98.350045      7.500000       -26.60   
1          France     2024-08   99.105151      7.500000       -27.70   
2          France     2024-07   98.202947      7.500000       -38.50   
3          France     2024-06   97.898943      7.500000       -32.60   
4          France     2024-05   97.163451      7.500000       -30.40   
5          France     2024-04   99.242443      7.400000       -34.40   
6          France     2024-03   98.654049      7.400000       -29.10   
7          France     2024-02   98.752115      7.500000       -29.60   
8           Italy     2024-09   98.956049      6.000000       -14.80   
9           Italy     2024-08   99.377138      6.100000       -18.00   
10          Italy     2024-07   99.377138      6.300000       -14.20   
11          Italy     2024-06  100.324590      6.700000       -16.40   
12          Italy     2024-05   

In [36]:
df_final = df_final.rename(columns={
    'OBS_VALUE_x': 'Confidence',
    'OBS_VALUE_y': 'Baro'
})

df_final.to_csv('economic_indicators.csv', index=False)

print(df_final)

   Reference area TIME_PERIOD  Production  Unemployment  Confidence      Baro
0          France     2024-09   98.350045      7.500000      -26.60  0.200768
1          France     2024-08   99.105151      7.500000      -27.70  0.356471
2          France     2024-07   98.202947      7.500000      -38.50 -0.123837
3          France     2024-06   97.898943      7.500000      -32.60 -0.123683
4          France     2024-05   97.163451      7.500000      -30.40  0.279064
5          France     2024-04   99.242443      7.400000      -34.40 -0.324518
6          France     2024-03   98.654049      7.400000      -29.10  0.154772
7          France     2024-02   98.752115      7.500000      -29.60 -0.446832
8           Italy     2024-09   98.956049      6.000000      -14.80  0.145720
9           Italy     2024-08   99.377138      6.100000      -18.00 -0.224698
10          Italy     2024-07   99.377138      6.300000      -14.20  0.278341
11          Italy     2024-06  100.324590      6.700000      -16