# FLIGHTS INFORMATION CODE

In [200]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import re
import numpy as np
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By

## Flightradar24 scraping


The data related to the flights requested is obtained from the web Flightradar24 ('https://www.flightradar24.com/'). We will use selenium to proceed to keep the data.

In [201]:
#Flightradar flight search route
flight_web = 'https://www.flightradar24.com/data/flights'
#Flight to be requested (example)
flight = 'AZ1795'
#Date of the flight defined
day = '01'
month = 'May'
year = '2021'

date_search = f'{day} {month[0:3]} {year}'
date_search


'01 May 2021'

In [202]:
#Define the driver
driver = webdriver.Chrome('/home/viki/Bootcamp/drivers/chromedriver')
driver.get(flight_web)

In [203]:
#Click cookies button
cookies_button = driver.find_element_by_class_name("btn.btn-blue")
driver.execute_script("arguments[0].click();", cookies_button)

In [204]:
flight_box = driver.find_element_by_id('searchFlight')
flight_box.send_keys(flight)
WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.CLASS_NAME, "tt-dataset-aircraftList")))
flight_selection = driver.find_element_by_class_name('tt-dataset-aircraftList').click()
#flight_box = driver.find_element_by_id('searchFlight')
#flight_box.send_keys(Keys.ENTER)
                            

In [222]:
table = driver.find_element_by_css_selector('tbody')
rows = table.find_elements_by_class_name('data-row')
flight_raw = []
for row in rows:
    cells = row.find_elements_by_tag_name('td')
    for cell in cells:
        flight_raw.append(str(cell.text))

print(flight_raw)

['—\n02 May 2021\n-\nScheduled\nSTD\n17:20\nATD\n—\nSTA\n18:25\nFROM\nRome (FCO)\nTO\nPalermo (PMO)', '', '', '', '', '', '', '', '', '', '', '', '', '', '—\n01 May 2021\n-\nScheduled\nSTD\n17:20\nATD\n—\nSTA\n18:25\nFROM\nRome (FCO)\nTO\nPalermo (PMO)', '', '', '', '', '', '', '', '', '', '', '', '', '', '—\n30 Apr 2021\n-\nScheduled\nSTD\n17:20\nATD\n—\nSTA\n18:25\nFROM\nRome (FCO)\nTO\nPalermo (PMO)', '', '', '', '', '', '', '', '', '', '', '', '', '', '—\n29 Apr 2021\n-\nScheduled\nSTD\n17:20\nATD\n—\nSTA\n18:25\nFROM\nRome (FCO)\nTO\nPalermo (PMO)', '', '', '', '', '', '', '', '', '', '', '', '', '', '—\n28 Apr 2021\n-\nScheduled\nSTD\n17:20\nATD\n—\nSTA\n18:25\nFROM\nRome (FCO)\nTO\nPalermo (PMO)', '', '', '', '', '', '', '', '', '', '', '', '', '', '—\n27 Apr 2021\n-\nScheduled\nSTD\n17:20\nATD\n—\nSTA\n18:25\nFROM\nRome (FCO)\nTO\nPalermo (PMO)', '', '', '', '', '', '', '', '', '', '', '', '', '', '—\n26 Apr 2021\n-\nScheduled\nSTD\n17:20\nATD\n—\nSTA\n18:25\nFROM\nRome (FCO)\n

In [None]:
flight_lists = [flight_raw[x:x+14] for x in range(0, len(flight_raw), 14)]
print(flight_lists)

In [None]:
flight_df_raw = pd.DataFrame(flight_lists)
display(flight_df_raw)

In [None]:
cols = ['Date', 'From', 'To', 'Aircraft', 'Flight Time', 'Scheduled Time Departure', 
        'Actual Time Departure', 'Scheduled Time Arrival', 'Status']
flight_df_clean = flight_df_raw[[2,3,4,5,6,7,8,9,11]]
flight_df_clean.columns = cols
flight_df_clean

In [None]:
flight_df_clean[['Departure city', 'Departure code']] = flight_df_clean['From'].str.split('(',expand=True)
flight_df_clean[['Arrival city', 'Arrival code']] = flight_df_clean['To'].str.split('(',expand=True)
flight_df_clean

In [None]:
flight_df_clean['Departure code'] = flight_df_clean['Departure code'].map(lambda x: x.rstrip(')'))
flight_df_clean['Arrival code'] = flight_df_clean['Arrival code'].map(lambda x: x.rstrip(')'))
flight_df_clean

In [None]:
flight_df = flight_df_clean[['Date', 'Departure city', 'Departure code', 'Arrival city', 'Arrival code', 
                             'Aircraft', 'Flight Time', 'Scheduled Time Departure', 'Actual Time Departure', 
                             'Scheduled Time Arrival', 'Status']]
flight_df

## Airports information (dataframe)

There are three different dataframes that contains airports information. Let's see the information that is contained in each one.

In [None]:
    input_ele = driver.find_element_by_css_selector('input#autoCampusId');
    input_ele.sendKeys(campus_name)

    WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.CSS_SELECTOR , "div.ac_results li"))

    auto_suggest_list = driver.find_elements_by_css_selector('div.ac_results li')

    for item in auto_suggest_list:

        if(item.text.strip().upper() != campus_name.strip().upper()):
            input_ele.sendKeys(Keys.DOWN)
        else:
            input_ele.sendKeys(Keys.ENTER)
            break

In [None]:
airports = pd.read_csv('data/airports-extended.csv', header = None)
airports

In [None]:
airports1 = pd.read_csv('data/airports (1).csv')
airports1

In [None]:
world_airports = pd.read_csv('data/world-airports.csv')
world_airports

In [None]:
world_airports.columns

We will work with world_airports dataframe, as it is the one that have more information and better ordered.

In [None]:
#Search of the city of departure
departure_code = flight_df.loc['iataCode','departure']
print(departure_code)

In [None]:
#Location of the airport with that iata code in word_airports dataframe 
departure_airport = world_airports.loc[world_airports['iata_code'] == departure_code]
departure_airport

In [None]:
departure_airport.columns

In [None]:
departure_city = departure_airport.loc[departure_airport.index[0] ,'municipality']
#departure_city = departure_airport.iloc[0]['municipality']
print(departure_city)

In [None]:
#Search of the city of arrival
arrival_code = flight_df.loc['iataCode','arrival']
print(arrival_code)

In [None]:
#Location of the airport with that iata code in word_airports dataframe 
arrival_airport = world_airports.loc[world_airports['iata_code'] == arrival_code]
arrival_airport