In [1]:
import datetime
from IPython.display import JSON
import pandas as pd
import requests
import API_keys
import awswrangler as wr
import sqlalchemy

In [2]:
def get_cities_dataframe():
    cities = pd.read_csv('data/cities.csv')
    return cities[:1]

In [69]:
cities = get_cities_dataframe()

In [53]:
jsons = get_jsons_from_AerodataboxAPI(cities['icao'].iloc[0])

In [54]:
JSON(jsons[0])

<IPython.core.display.JSON object>

In [55]:
dic = create_dictionary_from_json(jsons)

In [81]:
df = create_dataframe_from_dictionaries(dic)

In [82]:
df = df.assign(CityCountry = "FF")

In [83]:
df["Arrival (Local time)"] = pd.to_datetime(df["Arrival (Local time)"])

In [85]:
df

Unnamed: 0,From,Arrival (Local time),Arrival (UTC time),Terminal,Airline,Flight number,Aircraft model,CityCountry
0,Moscow,2021-11-27 01:35:00,2021-11-26 22:35,Unknown,Aeroflot,SU 2134,Airbus A321,FF
1,Cape Town,2021-11-27 05:35:00,2021-11-27 02:35,Unknown,Turkish,TK 45,Airbus A330-300,FF
2,Nur-Sultan (Astana),2021-11-27 06:10:00,2021-11-27 03:10,Unknown,Turkish,TK 355,Boeing 737-800,FF
3,Istanbul,2021-11-27 06:35:00,2021-11-27 03:35,Unknown,Turkish,TK 583,Airbus A330-200,FF
4,Muscat,2021-11-27 06:05:00,2021-11-27 03:05,Unknown,Turkish,TK 775,Boeing 737,FF
...,...,...,...,...,...,...,...,...
424,Tashkent,2021-11-27 20:10:00,2021-11-27 17:10,Unknown,Uzbekistan Airways,HY 273,Boeing 787,FF
425,Moscow,2021-11-27 16:00:00,2021-11-27 13:00,Unknown,Turkish,TK 414,Airbus A321,FF
426,Moscow,2021-11-27 19:45:00,2021-11-27 16:45,Unknown,Turkish,TK 418,Airbus A321,FF
427,Ufa,2021-11-27 15:30:00,2021-11-27 12:30,Unknown,Pobeda,DP 977,Boeing 737-800,FF


In [63]:
big_data = pd.DataFrame()

In [64]:
big_data = big_data.append(df,ignore_index=True)

In [65]:
big_data

Unnamed: 0,From,Arrival (Local time),Arrival (UTC time),Terminal,Airline,Flight number,Aircraft model,CityCountry
0,Moscow,2021-11-27 01:35,2021-11-26 22:35,Unknown,Aeroflot,SU 2134,Airbus A321,FF
1,Cape Town,2021-11-27 05:35,2021-11-27 02:35,Unknown,Turkish,TK 45,Airbus A330-300,FF
2,Nur-Sultan (Astana),2021-11-27 06:10,2021-11-27 03:10,Unknown,Turkish,TK 355,Boeing 737-800,FF
3,Istanbul,2021-11-27 06:35,2021-11-27 03:35,Unknown,Turkish,TK 583,Airbus A330-200,FF
4,Muscat,2021-11-27 06:05,2021-11-27 03:05,Unknown,Turkish,TK 775,Boeing 737,FF
...,...,...,...,...,...,...,...,...
424,Tashkent,2021-11-27 20:10,2021-11-27 17:10,Unknown,Uzbekistan Airways,HY 273,Boeing 787,FF
425,Moscow,2021-11-27 16:00,2021-11-27 13:00,Unknown,Turkish,TK 414,Airbus A321,FF
426,Moscow,2021-11-27 19:45,2021-11-27 16:45,Unknown,Turkish,TK 418,Airbus A321,FF
427,Ufa,2021-11-27 15:30,2021-11-27 12:30,Unknown,Pobeda,DP 977,Boeing 737-800,FF


In [3]:
def get_jsons_from_AerodataboxAPI(icao):
    today = datetime.date.today()
    tomorrow = str(today + datetime.timedelta(days=1))
    
    times = [["00:00","11:59"],["12:00","23:59"]]
    jsons = []
    for time in times:
        url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{icao}/{tomorrow}T{time[0]}/{tomorrow}T{time[1]}"

        querystring = {"withLeg":"true","direction":"Arrival","withCancelled":"false","withCodeshared":"true","withCargo":"false","withPrivate":"false","withLocation":"false"}

        headers = {
            'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
            'x-rapidapi-key': API_keys.Airports_key
            }

        jsons.append(requests.request("GET", url, headers=headers, params=querystring).json())
    return jsons

In [8]:
def create_dictionary_from_json(jsons):
    dictionary = {"From_city":[],"Arrival_Local_time":[], "Arrival_UTC_time":[],"Terminal":[],"Airline":[],"Flight_number":[],"Aircraft_model":[]}
    for j in jsons:
        for flight in j['arrivals']:
            try:
                dictionary["From_city"].append(flight['departure']["airport"]['name'])
            except:
                dictionary["From_city"].append("Unknown")
            try:
                dictionary["Arrival_Local_time"].append(flight['arrival']["scheduledTimeLocal"][:-6])
            except:
                dictionary["Arrival_Local_time"].append("Unknown")
            try:
                dictionary["Arrival_UTC_time"].append(flight['arrival']["scheduledTimeUtc"][:-1])
            except:
                dictionary["Arrival_UTC_time"].append("Unknown")
            try:
                dictionary["Terminal"].append(flight['arrival']["terminal"])
            except:
                dictionary["Terminal"].append("Unknown")
            try:
                dictionary["Airline"].append(flight['airline']["name"])
            except:
                dictionary["Airline"].append("Unknown")
            try:
                dictionary["Flight_number"].append(flight['number'])
            except:
                dictionary["Flight_number"].append("Unknown")
            try:
                dictionary["Aircraft_model"].append(flight['aircraft']["model"])
            except:
                dictionary["Aircraft_model"].append("Unknown")
    return dictionary

In [9]:
def create_dataframe_from_dictionaries(dictionary):
    flights = pd.DataFrame.from_dict(dictionary)
    return flights

In [10]:
def get_all_info(cities):
    big_table = pd.DataFrame()
    for index, city in cities.iterrows():
        jsons = get_jsons_from_AerodataboxAPI(city['icao'])
        dictionary = create_dictionary_from_json(jsons)
        dataframe = create_dataframe_from_dictionaries(dictionary)
        dataframe = dataframe.assign(CityCountry = city['CityCountry'])
        dataframe = dataframe.assign(icao = city['icao'])
        dataframe["Arrival_Local_time"] = pd.to_datetime(dataframe["Arrival_Local_time"])
        dataframe["Arrival_UTC_time"] = pd.to_datetime(dataframe["Arrival_UTC_time"])
    big_table = big_table.append(dataframe,ignore_index=True)
    return big_table

In [11]:
big_table = get_all_info(get_cities_dataframe())

In [12]:
big_table

Unnamed: 0,From_city,Arrival_Local_time,Arrival_UTC_time,Terminal,Airline,Flight_number,Aircraft_model,CityCountry,icao
0,Málaga,2021-11-27 00:05:00,2021-11-26 21:05:00,Unknown,Turkish,TK 1304,Airbus A321,"Istanbul, TR",LTFM
1,Bucharest,2021-11-27 00:05:00,2021-11-26 21:05:00,Unknown,Turkish,TK 1046,Airbus A321,"Istanbul, TR",LTFM
2,Skopje,2021-11-27 00:10:00,2021-11-26 21:10:00,Unknown,Turkish,TK 1006,Boeing 737-800,"Istanbul, TR",LTFM
3,Paris,2021-11-27 00:10:00,2021-11-26 21:10:00,Unknown,Turkish,TK 1828,Airbus A330-300,"Istanbul, TR",LTFM
4,Athens,2021-11-27 00:10:00,2021-11-26 21:10:00,Unknown,Turkish,TK 1844,Airbus A321,"Istanbul, TR",LTFM
...,...,...,...,...,...,...,...,...,...
424,İzmir,2021-11-27 23:50:00,2021-11-27 20:50:00,Unknown,Turkish,TK 2337,Boeing 777-300ER,"Istanbul, TR",LTFM
425,Belgrade,2021-11-27 23:55:00,2021-11-27 20:55:00,Unknown,Turkish,TK 1084,Boeing 737,"Istanbul, TR",LTFM
426,Hatay,2021-11-27 23:55:00,2021-11-27 20:55:00,Unknown,Turkish,TK 2255,Airbus A321,"Istanbul, TR",LTFM
427,Baku,2021-11-27 23:55:00,2021-11-27 20:55:00,Unknown,Turkish,TK 335,Airbus A321,"Istanbul, TR",LTFM


In [17]:
def create_mysql_table(big_table):
    schema="gans"
    host="127.0.0.1"
    user="root"
    password=API_keys.mysqlPassword
    port=3306
    con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'
    big_table.to_sql('flights',con=con,if_exists='append',index=False)

In [19]:
create_mysql_table(big_table)

## Create pipeline

In [None]:
p = (get_cities_dataframe()
     .pipe(clean_unit_price)
     .pipe(tofloat,col_to_float)
     .pipe(change_col_name,old,new)
     .pipe(delete_orderlines_not_in_orders)
     .pipe(create_total_sum)
     .pipe(deleting_outliers,orders)
)