# TIME ZONE CODE

In [386]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

## Airports Time scraping


The data of the time zone registred for each city is obtained from the web Zeitverschiebung ('https://www.zeitverschiebung.net/'), where it could be calculated the time difference between departure and arrival cities.

First of all, we will get the airports time zone data from the API of Aviation Edge Database ('https://aviation-edge.com/').

In [387]:
#API route
aed_city = 'https://aviation-edge.com/v2/public/cityDatabase?key=25034e-9edde7&codeIataCity='
#Flight to be requested (example)
city_code = 'MAD'
url = (f'{aed_city}{city_code}')
print(url)
html_city = requests.get(url).json()
html_city

https://aviation-edge.com/v2/public/cityDatabase?key=25034e-9edde7&codeIataCity=MAD


[{'GMT': '1',
  'cityId': 4421,
  'codeIataCity': 'MAD',
  'codeIso2Country': 'ES',
  'geonameId': 3117735,
  'latitudeCity': 40.416691,
  'longitudeCity': -3.700345,
  'nameCity': 'Madrid',
  'timezone': 'Europe/Madrid'}]

In [388]:
html_city_dic = html_city[0]
html_city_dic

{'GMT': '1',
 'cityId': 4421,
 'codeIataCity': 'MAD',
 'codeIso2Country': 'ES',
 'geonameId': 3117735,
 'latitudeCity': 40.416691,
 'longitudeCity': -3.700345,
 'nameCity': 'Madrid',
 'timezone': 'Europe/Madrid'}

In [389]:
city_df = pd.DataFrame(html_city_dic, index=[0])
city_df

Unnamed: 0,GMT,cityId,codeIataCity,codeIso2Country,geonameId,latitudeCity,longitudeCity,nameCity,timezone
0,1,4421,MAD,ES,3117735,40.416691,-3.700345,Madrid,Europe/Madrid


In [390]:
timezone = city_df.loc[0,'timezone']
print(timezone)

Europe/Madrid


Afterwards, we need to do web scraping in Zeitverschiebung to get the time difference between cities.

import os

os.environ['PATH'] = f'{os.environ["PATH"]}:{os.getcwd()}/drivers'
    
os.environ['PATH']

In [391]:
driver = webdriver.Chrome('/home/viki/Bootcamp/drivers/chromedriver')
driver.get('https://www.zeitverschiebung.net/es/')

In [392]:
#Click cookies button
cookies_button = driver.find_element_by_class_name("css-flk0bs")
driver.execute_script("arguments[0].click();", cookies_button)


In [393]:
departure_timezone = 'Europe/Madrid'
arrival_timezone = 'America/Anchorage'

#Introduce departure time zone for the search
departure_box = driver.find_element_by_id('diff_first')
departure_box.send_keys(departure_timezone)
wait = WebDriverWait(driver, 10)
#box = wait.until(EC.expected_conditions.element_to_be_clickable(departure_box))
el = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, "//*[@class='form-control']")))
el.send_keys(u'\ue007')
#wait = WebDriverWait(driver, 10)
#box = wait.until(EC.element_to_be_clickable((By.ID, 'diff_first')))
#departure_box.send_keys(Keys.ARROW_DOWN)
#box.send_keys(u'\ue007')


ElementNotInteractableException: Message: element not interactable
  (Session info: chrome=89.0.4389.114)


In [None]:
#Introduce arrival time zone for the search                                
arrival_box = driver.find_element_by_id('diff_second')
arrival_box.send_keys(arrival_timezone)

departure_box.send_keys(Keys.ARROW_DOWN)
arrival_box.send_keys(u'\ue007')


In [None]:
#Get hour difference data

hour_diff = driver.find_element_by_class_name('difference-stunden.text-center').text
hour_diff

In [None]:
type(hour_diff)

In [None]:
hour_diff[1]

In [None]:
hour_diff = re.sub(r'[^-\d]', '', hour_diff[0])
hour_diff

In [None]:
#Click display forecast button
display_button = driver.find_element_by_class_name("btn.btn-default.btn-sm")

display_button.click()

In [None]:
#Get the information of the weather in 7 days, and introduce it in a list
table = driver.find_element_by_class_name('panel.panel-primary.article')

rows = table.find_elements_by_tag_name('table')

weather_raw = []
for row in rows:
    cells = row.find_elements_by_tag_name('td')
    weather_raw.append(cells[1].text)

print(weather_raw)

In [None]:
#Divide the list in a list of list, by days
weather_raw_list = [[x] for x in weather_raw]
print(weather_raw_list)

In [None]:
#Divide each sublist by the different weather features
weather_raw_divided = [re.split('[,.]', y) for x in weather_raw_list for y in x]
print(weather_raw_divided)

In [None]:
'''
Function to order the data as we need and fill the empty data with 0. 
This will help when the data will be introduce in a dataset 
The data will be ordered as follows:
1 - Forecast
2 - High temperature
3 - Low temperature
4 - Probability of precipitation
5 - Wind
6 - Barometric pressure



'''

def reorder_func(day_weather_list):
    reordered_weather = [day_weather_list[0], day_weather_list[1], day_weather_list[2]]
    for i in range(2, len(day_weather_list)):
        if 'precipitation' in day_weather_list[i]:
            reordered_weather.append(day_weather_list[i])
    if len(reordered_weather)< 4:
        reordered_weather.append('0')

    for i in range(2, len(day_weather_list)):
        if 'wind' in day_weather_list[i]:
            reordered_weather.append(day_weather_list[i])
    if len(reordered_weather)< 5:
        reordered_weather.append('0')

    for i in range(2, len(day_weather_list)):
        if 'pressure' in day_weather_list[i]:
            reordered_weather.append(day_weather_list[i])
    if len(reordered_weather)< 6:
        reordered_weather.append('0')

    return reordered_weather

In [None]:
ordered_weather =[]
for x in weather_raw_divided:
    sublist = reorder_func(x)
    ordered_weather.append(sublist)
print(ordered_weather)

In [None]:
#Introduce all the data in a dataset
cols = ['Forecast', 'High temperature (ºC)', 'Low temperature (ºC)', 
        'Probability of precipitation (%)', 'Wind', 'Barometric pressure (mb)']
weather_df = pd.DataFrame(ordered_weather, columns = cols)
display(weather_df)

In [None]:
#Clean the data in Forecast
def not_day(text):
    short_text = ' '.join(text.split(' ')[:-1])
    return short_text
weather_df['Forecast'] = weather_df['Forecast'].apply(not_day)

In [None]:
#Clean High temperature, Low temperature, Probability of precipitation and Barometric pressure
#Keeping only numbers
def only_num(text):
    result = (re.findall('\d+', text))
    return ''.join(result)

weather_df['High temperature (ºC)'] = weather_df['High temperature (ºC)'].apply(only_num)
weather_df['Low temperature (ºC)'] = weather_df['Low temperature (ºC)'].apply(only_num)
weather_df['Probability of precipitation (%)'] = weather_df['Probability of precipitation (%)'].apply(only_num)
weather_df['Barometric pressure (mb)'] = weather_df['Barometric pressure (mb)'].apply(only_num)
display(weather_df)