In [None]:
# import api keys
from api_keys import *

## Flights API

In [None]:
from datetime import datetime, timedelta

airport_icoa = "EDDB"
to_local_time = datetime.now().strftime('%Y-%m-%dT%H:00')
from_local_time = (datetime.now() + timedelta(hours=9)).strftime('%Y-%m-%dT%H:00')
url = f"https://aerodatabox.p.rapidapi.com/flights/airports/icao/{airport_icoa}/{to_local_time}/{from_local_time}"

import requests

querystring = {"withLeg":"true","withCancelled":"true","withCodeshared":"true","withCargo":"true","withPrivate":"false","withLocation":"false"}

headers = {
    'x-rapidapi-host': "aerodatabox.p.rapidapi.com",
    'x-rapidapi-key': flight_api_key
    }

response = requests.request("GET", url, headers=headers, params=querystring)

from IPython.display import JSON
response.json()

In [None]:
### Option 1
arrivals_berlin = response.json()['arrivals']

def get_flight_info(flight_json):
    # terminal
    try: terminal = flight_json['arrival']['terminal']
    except: terminal = None
    # aircraft
    try: aircraft = flight_json['aircraft']['model']
    except: aircraft = None

    return {
        'dep_airport':flight_json['departure']['airport']['name'],
        'sched_arr_loc_time':flight_json['arrival']['scheduledTimeLocal'],
        'terminal':terminal,
        'status':flight_json['status'],
        'aircraft':aircraft,
        'icao_code':airport_icoa
    }


import pandas as pd
# [get_flight_info(flight) for flight in arrivals_berlin]
arrivals_berlin = pd.DataFrame([get_flight_info(flight) for flight in arrivals_berlin])
arrivals_berlin

## Wheather API

In [None]:
city = "Berlin"
country = "DE"
response = requests.get(f'http://api.openweathermap.org/data/2.5/forecast/?q={city},{country}&appid={OWM_key}&units=metric&lang=en')
from IPython.display import JSON

# JSON(response.json())
response.status_code

In [None]:
forecast_api = response.json()['list']
# look for the fields that could be relevant: 
# better field descriptions https://www.weatherbit.io/api/weather-forecast-5-day

weather_info = []

# datetime, temperature, wind, prob_perc, rain_qty, snow = [], [], [], [], [], []
for forecast_3h in forecast_api: 
    weather_hour = {}
    # datetime utc
    weather_hour['datetime'] = forecast_3h['dt_txt']
    # temperature 
    weather_hour['temperature'] = forecast_3h['main']['temp']
    # wind
    weather_hour['wind'] = forecast_3h['wind']['speed']
    # probability precipitation 
    try: weather_hour['prob_perc'] = float(forecast_3h['pop'])
    except: weather_hour['prob_perc'] = 0
    # rain
    try: weather_hour['rain_qty'] = float(forecast_3h['rain']['3h'])
    except: weather_hour['rain_qty'] = 0
    # wind 
    try: weather_hour['snow'] = float(forecast_3h['snow']['3h'])
    except: weather_hour['snow'] = 0
    weather_hour['municipality_iso_country'] = city + ',' + country
    weather_info.append(weather_hour)
    
weather_data = pd.DataFrame(weather_info)
weather_data.head()

## Population data

In [None]:
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import unicodedata

cities = ['Berlin','Paris','Amsterdam','Barcelona','Rome','Lisbon','Prague','Vienna','Madrid']

def City_info(soup):
    
    ret_dict = {}
    ret_dict['city'] = soup.h1.get_text()
    
    if soup.select_one('.mergedrow:-soup-contains("Mayor")>.infobox-label') != None:
        i = soup.select_one('.mergedrow:-soup-contains("Mayor")>.infobox-label')
        mayor_name_html = i.find_next_sibling()
        mayor_name = unicodedata.normalize('NFKD',mayor_name_html.get_text())
        ret_dict['mayor']  = mayor_name
    
    if soup.select_one('.mergedrow:-soup-contains("City")>.infobox-label') != None:
        j =  soup.select_one('.mergedrow:-soup-contains("City")>.infobox-label')
        area = j.find_next_sibling('td').get_text()
        ret_dict['city_size'] = unicodedata.normalize('NFKD',area)

    if soup.select_one('.mergedtoprow:-soup-contains("Elevation")>.infobox-data') != None:
        k = soup.select_one('.mergedtoprow:-soup-contains("Elevation")>.infobox-data')
        elevation_html = k.get_text()
        ret_dict['elevation'] = unicodedata.normalize('NFKD',elevation_html)
    
    if soup.select_one('.mergedtoprow:-soup-contains("Population")') != None:
        l = soup.select_one('.mergedtoprow:-soup-contains("Population")')
        c_pop = l.findNext('td').get_text()
        ret_dict['city_population'] = c_pop
    
    if soup.select_one('.infobox-label>[title^=Urban]') != None:
        m = soup.select_one('.infobox-label>[title^=Urban]')
        u_pop = m.findNext('td')
        ret_dict['urban_population'] = u_pop.get_text()

    if soup.select_one('.infobox-label>[title^=Metro]') != None:
        n = soup.select_one('.infobox-label>[title^=Metro]')
        m_pop = n.findNext('td')
        ret_dict['metro_population'] = m_pop.get_text()
    
    if soup.select_one('.latitude') != None:
        o = soup.select_one('.latitude')
        ret_dict['lat'] = o.get_text()

    if soup.select_one('.longitude') != None:    
        p = soup.select_one('.longitude')
        ret_dict['long'] = p.get_text()
    
    return ret_dict

list_of_city_info = []
for city in cities:
    url = 'https://en.wikipedia.org/wiki/{}'.format(city)
    web = requests.get(url,'html.parser')
    soup = bs(web.content)
    list_of_city_info.append(City_info(soup))
df_cities = pd.DataFrame(list_of_city_info)
# df_cities = df_cities.set_index('city')
df_cities

In [None]:
df_cities['municipality_iso_country'] = [
    'Berlin,DE',
    'Paris,FR',
    'Amsterdam,NL',
    'Barcelona,ES',
    'Rome,IT',
    'Lisbon,PT',
    'Prague,CZE',
    'Vienna,AT',
    'Madrid,ES'
]

## Airports data

In [None]:
import pandas as pd

airports_cities = (
pd.read_csv('airports.csv')
    .query('type == "large_airport"')
    .filter(['name','latitude_deg','longitude_deg','iso_country','iso_region','municipality','gps_code','iata_code'])
    .rename(columns={'gps_code':'icao_code'})
    .assign(municipality_iso_country = lambda x: x['municipality'] + ',' + x['iso_country'])
)
airports_cities.head()

## Check the tables

In [None]:
arrivals_berlin.head()

In [None]:
weather_data.head()

In [None]:
df_cities.head()

In [None]:
airports_cities.head()

In [None]:
# airports_cities.merge(arrivals_berlin, on='icao_code', how='inner').merge(weather_data, on='municipality_iso_country', how='inner').head()

## Update data into database

First run this code in mysql:

In [None]:
'''
DROP DATABASE gans;
CREATE DATABASE IF NOT EXISTS gans; 
USE gans;

DROP TABLE IF EXISTS cities;
CREATE TABLE IF NOT EXISTS cities (
    city VARCHAR(200),
    mayor TEXT,
    city_size TEXT, 
    elevation TEXT, 
    city_population TEXT, 
    urban_population TEXT, 
    metro_population TEXT, 
    latitude TEXT, 
    longitude TEXT, 
	municipality_iso_country varchar(200),
    PRIMARY KEY(municipality_iso_country)
); 

DROP TABLE IF EXISTS airports;
CREATE TABLE IF NOT EXISTS airports(
	name text, 
    latitude_deg float, 
    longitude_deg float, 
    iso_country varchar(10), 
    iso_region varchar(10),
    municipality text, 
    icao_code varchar(4), 
    iata_code varchar(6), 
    municipality_iso_country varchar(200),
    primary key(icao_code)
    -- foreign key (municipality_iso_country) references cities(municipality_iso_country)
);


DROP TABLE IF EXISTS weather; 
CREATE TABLE IF NOT EXISTS weather (
	weather_id int auto_increment, 
    datetime datetime, 
    temperature float, 
    wind float, 
    prob_perc float, 
    rain_qty float, 
    snow integer, 
    municipality_iso_country varchar(200),
    primary key(weather_id),
    foreign key (municipality_iso_country) references cities(municipality_iso_country)
);

DROP TABLE IF EXISTS arrivals; 
CREATE TABLE IF NOT EXISTS arrivals(
	arrivals_id int auto_increment, 
    dep_airport text, 
    sched_arr_loc_time datetime, 
    terminal text, 
    status text, 
	aircraft text, 
    icao_code varchar(4),
    primary key (arrivals_id)
    -- foreign key (icao_code) references airports(icao_code)
);


USE gans;
SELECT * FROM arrivals;
SELECT * FROM weather;
SELECT * FROM cities;
SELECT * FROM airports;
'''


### `sqlalchemy`

#### Establish the connection

In [None]:
import pandas as pd
import sqlalchemy

## If you are running locally:

In [None]:
# my local database
schema="gans"
host="127.0.0.1"
user="root"
password="password"
port=3306
con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

## If you are running on the AWS instance:

In [None]:
# my AWS instance
schema="gans"
host=""
user="admin"
password="password"
port=3306
con = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

#### Update the tables

In [None]:
(
df_cities
    # .dropna()
    .rename(
        columns={
            'lat':'latitude',
            'long':'longitude'
            }
        )
    .to_sql('cities', con=con, if_exists='append', index=False)
    )

In [None]:
(
airports_cities
    .dropna()
    .to_sql('airports', if_exists='append', con=con, index=False)
)

In [None]:
(
    weather_data
    .assign(datetime = lambda x: pd.to_datetime(x['datetime']))
    .to_sql('weather', if_exists='append', con=con, index=False)
)

In [None]:
import numpy as np
(
arrivals_berlin
    .replace({np.nan},'unknown')
    .assign(sched_arr_loc_time = lambda x: pd.to_datetime(x['sched_arr_loc_time']))
    .to_sql('arrivals', if_exists='append', con=con, index=False))