In [1]:
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException

In [2]:
def get_flight_info(flight):
    """
    Function to get departure, arrival and flight duration for a found flight
    """
    flight_info = flight.find_elements_by_xpath('.//div[@class="sc-hAXbOi jocxib flight-information"]')
    departure = flight_info[0].find_element_by_xpath('.//span[@class="sc-cfWELz iIRsqM"]').text
    arrival = flight_info[1].find_element_by_xpath('.//span[@class="sc-cfWELz iIRsqM"]').text
    flight_time = flight.find_element_by_xpath('.//span[@class="sc-hCaUpS cTjSBD"]').text
    arrival = arrival.replace('\n', '')
    
    info = {
        'departure': departure,
        'arrival': arrival,
        'duration': flight_time
    }
    return info
    

In [3]:
def get_stops_info(flight):    
    stop_button = flight.find_element_by_xpath('.//a[@class="sc-bRBYWo bhewGY"]')
    stop_button.click()    
    st = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, '//div[@class="iataCode"]//span[@class="time"]')))
    
    stops = driver.find_elements_by_xpath('//section[@class="sc-jWxkHr fPpyxb"]')
    stops_info = []
    conns_info = []
    
    direct_flight = True
    if len(stops) > 1:        
        connections = driver.find_elements_by_xpath('//div[@class="sc-bYTsla hjShjO"]')
        direct_flight = False
        
    for s, stop in enumerate(stops):        
        dep = stop.find_element_by_xpath('.//div[@class="sc-geAPOV biWURZ"]')
        dep_city = dep.find_element_by_xpath('.//div[@class="iataCode"]//span').text
        dep_time = dep.find_element_by_xpath('.//div[@class="iataCode"]//span[@class="time"]').text
        dep_aport =  dep.find_element_by_xpath('.//span[@class="ariport-name"]').text

        flight_time = stop.find_element_by_xpath('.//div[@class="sc-bJTOcE iJlaOT"]//span[@class="time"]').text
        airplane = stop.find_element_by_xpath('.//span[@class="airplane-code"]').text

        arr = stop.find_element_by_xpath('.//div[@class="sc-PLyBE iVAtbp"]')
        arr_city = arr.find_element_by_xpath('.//div[@class="iataCode"]//span').text
        arr_time = arr.find_element_by_xpath('.//div[@class="iataCode"]//span[@class="time"]').text
        arr_aport =  arr.find_element_by_xpath('.//span[@class="ariport-name"]').text
                
        stop_info = {
            'departure': {
                'city': dep_city,
                'time': dep_time,
                'airport': dep_aport,
            },
            'arrival': {
                'city': arr_city,
                'time': arr_time,
                'airport': arr_aport,
            },
            'duration': flight_time,
            'airplane': airplane,
            'connection': '',
            'connection_time': ''
        }
        
        if stop != stops[-1]:
            stop_info['connection'] = connections[s].find_element_by_xpath('.//span[@class="connection-text"]').text
            stop_info['connection_time'] = connections[s].find_element_by_xpath('.//span[@class="time"]').text            
        
        stops_info.append(
            stop_info
        )
        
    driver.find_element_by_xpath('//button[@class="MuiButtonBase-root MuiIconButton-root sc-fBuWsC cNJQnZ"]').click()
        
    return stops_info        
    
    
    
    

In [4]:
def get_flares(flight):
    flight.find_element_by_xpath('.//div[@class="sc-hizQCF mUhbx"]').click()
    flares = driver.find_elements_by_xpath('//div[@class="sc-dTsoBL izUUnZ"]')
    flares_info = []
    
    for f in flares:        
        flares_info.append(
            f.find_element_by_xpath('.//span[@class="sc-ileJJU bxvQhO displayAmount"]').text
        )
        
    return flares_info
    

In [5]:
def scrap_flights(driver):
    delay = 5
    flights_info = []
    flights = driver.find_elements_by_xpath('//li[@class="sc-dvpmds cfVFEa"]')
    print(f'Found {len(flights)} flights.')
    for flight in flights:
        fligh_info = get_flight_info(flight)        
        fligh_info['stops'] = get_stops_info(flight)        
        fligh_info['flares'] = get_flares(flight)
        
        flights_info.append(fligh_info)
        
    return flights_info

In [6]:
url = 'https://www.latamairlines.com/cl/es/ofertas-vuelos?dataFlight=%7B%22tripTypeSelected%22%3A%7B%22label%22%3A%22Ida%20y%20Vuelta%22%2C%22value%22%3A%22RT%22%7D%2C%22cabinSelected%22%3A%7B%22label%22%3A%22Economy%22%2C%22value%22%3A%22Economy%22%7D%2C%22passengerSelected%22%3A%7B%22adultQuantity%22%3A1%2C%22childrenQuantity%22%3A0%2C%22infantQuantity%22%3A0%7D%2C%22originSelected%22%3A%7B%22id%22%3A%22BOG_CO_AIRPORT%22%2C%22name%22%3A%22El%20Dorado%20Intl.%22%2C%22city%22%3A%22Bogot%C3%A1%22%2C%22cityIsoCode%22%3A%22BOG%22%2C%22country%22%3A%22Colombia%22%2C%22iata%22%3A%22BOG%22%2C%22latitude%22%3A4.70159%2C%22longitude%22%3A-74.1469%2C%22timezone%22%3A-5%2C%22tz%22%3A%22America%2FBogota%22%2C%22type%22%3A%22AIRPORT%22%2C%22countryAlpha2%22%3A%22CO%22%2C%22airportIataCode%22%3A%22BOG%22%7D%2C%22destinationSelected%22%3A%7B%22id%22%3A%22MAD_ES_AIRPORT%22%2C%22name%22%3A%22Barajas%20Intl.%22%2C%22city%22%3A%22Madrid%22%2C%22cityIsoCode%22%3A%22MAD%22%2C%22country%22%3A%22Espa%C3%B1a%22%2C%22iata%22%3A%22MAD%22%2C%22latitude%22%3A40.471926%2C%22longitude%22%3A-3.56264%2C%22timezone%22%3A1%2C%22tz%22%3A%22Europe%2FMadrid%22%2C%22type%22%3A%22AIRPORT%22%2C%22countryAlpha2%22%3A%22ES%22%2C%22airportIataCode%22%3A%22MAD%22%7D%2C%22dateGoSelected%22%3A%222021-08-20T17%3A00%3A00.000Z%22%2C%22dateReturnSelected%22%3A%222021-08-28T17%3A00%3A00.000Z%22%2C%22redemption%22%3Afalse%7D&sort=RECOMMENDED'
options = webdriver.ChromeOptions()
options.binary_location = r'C:\Program Files (x86)\BraveSoftware\Brave-Browser\Application\brave.exe'
options.add_argument('--incognito')
driver = webdriver.Chrome(executable_path='chromedriver.exe', options=options)
driver.get(url)
delay = 10
try:
    vuelo = WebDriverWait(driver, delay).until(EC.presence_of_element_located((By.XPATH, '//li[@class="sc-dvpmds cfVFEa"]')))
    flights_info = scrap_flights(driver)
except TimeoutException:
    print('Page take too long to load.')
driver.close()

Found 3 flights.


In [7]:
flights_info

[{'departure': '13:00',
  'arrival': '13:55+1',
  'duration': '17 h 55 min',
  'stops': [{'departure': {'city': 'BOG',
     'time': '13:00',
     'airport': 'El Dorado Intl.'},
    'arrival': {'city': 'GRU', 'time': '21:10', 'airport': 'Guarulhos Intl.'},
    'duration': '6 h 10 min',
    'airplane': 'Airbus A321',
    'connection': 'Conexión Sao Paulo',
    'connection_time': '2 h'},
   {'departure': {'city': 'GRU',
     'time': '23:10',
     'airport': 'Guarulhos Intl.'},
    'arrival': {'city': 'MAD', 'time': '13:55', 'airport': 'Barajas Intl.'},
    'duration': '9 h 45 min',
    'airplane': 'Boeing B787-9',
    'connection': '',
    'connection_time': ''}],
  'flares': ['$333.678', '$369.068', '$425.693', '$1.480.622']},
 {'departure': '17:56',
  'arrival': '17:10+1',
  'duration': '16 h 14 min',
  'stops': [{'departure': {'city': 'BOG',
     'time': '17:56',
     'airport': 'El Dorado Intl.'},
    'arrival': {'city': 'LIM', 'time': '20:55', 'airport': 'J Chavez Intl.'},
    'durat