In [1]:
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver import Edge, EdgeOptions
from selenium.webdriver.common.by import By

from datetime import timedelta as td
from datetime import datetime as dt
from time import sleep
from os import getcwd

import pandas as pd

pd.options.display.max_rows = None

In [9]:
def scraper(browser, departures:list, arrivals:list) -> pd.DataFrame:
    flights = pd.DataFrame()

    for _from in departures:
        for _to in arrivals:

            tempdf = browser.get_flights(_from, _to)
            tempdf[["from", "to"]] = _from, _to

            flights = pd.concat([flights, tempdf])
    return flights

def handle_with(flights:pd.DataFrame, columns:list, hotel_prices:list=["hotels_price"]) -> pd.DataFrame:
    flights.rename(columns={n: name for n, name in enumerate(columns)}, inplace=True)

    if hotel_prices:
        flights.drop(flights[(flights["where"] == "") | flights.hotels_price.isna()].index, inplace=True)
        flights = flights[~flights.hotels_price.str.contains("h")]

    for column in ["how_much"] + hotel_prices:
        flights.drop(flights[flights[column].isna()].index, inplace=True)
        flights.loc[:, column] = flights[column].apply(lambda p: p.replace("R$", "").replace(",", "") if p else p).astype(int)

    return flights.sort_values("how_much").reset_index(drop=True).drop_duplicates()

In [3]:
class Flights(Edge):
    def __init__(self):
        _options = EdgeOptions()
        _options.add_argument("-inprivate")

        super().__init__(options=_options)
        self.maximize_window()

        self.webwait = WebDriverWait(self, 20)
        self.get("https://www.google.com/travel/explore")

    def findby_txt(self, tag:str, text:str) -> WebElement:
        return self.find_element(By.XPATH, f"//{tag}[contains(text(), '{text}')]")

    def findby_arialabel(self, tag:str, text:str) -> WebElement:
        return self.webwait.until(EC.element_to_be_clickable((By.XPATH, f"//{tag}[@aria-label='{text}']")))

    def js_click(self, web_element:WebElement) -> None:
        self.execute_script("arguments[0].click()", web_element)

    def wait(self) -> None:
        sleep(2)

        while self.find_element(By.XPATH, "//div[@aria-label='Loading results']").is_displayed():
            sleep(1)

    def print(self, imgname:str) -> None:
        imgname = f"{getcwd()}/imgs/{imgname}.png"
        self.find_element(By.XPATH, "//div[@role='region']").screenshot(imgname)

    def zoom(self, direction:str="in", times:int=1) -> None:

        [self.find_element(By.XPATH, f"//button[@aria-label='Zoom {direction}']").click() for _ in range(times)]
        self.wait()

    def get_flights(self, _from:str, _to:str) -> pd.DataFrame:
        for w, where in [("from", _from), ("to", _to)]:
            place = self.findby_arialabel("input", f"Where {w}?")

            self.wait()
            place.clear()
            place.send_keys(where)
            self.findby_arialabel("li", where).click()

        self.wait()
        self.zoom()
        try:
            self.print(f"{_from} __ {_to} __")
        except: pass

        return pd.DataFrame([ flight.text.encode("ascii", "replace").decode("ascii").split("\n") for flight in
            self.webwait.until(EC.element_to_be_clickable((By.TAG_NAME, "ol"))).find_elements(By.TAG_NAME, "li") ])

    def show_calendar(self, kind:str) -> None:
        self.js_click(self.find_element(By.XPATH, "//div[@data-min='0']"))
        self.js_click(self.findby_txt("span", kind))
        self.wait()

    def __del__(self):
        self.close()
        self.quit()

browser = Flights()

In [4]:
departures = ["Rio de Janeiro", "São Paulo"]
arrivals = ["Europe", "Southern Europe", "Northern Europe", "Western Europe", "Eastern Europe"]

##### Round Trip | Flexible Dates

In [21]:
allflights = pd.DataFrame()

for date in ["1 week", "2 weeks", "All"]:
    browser.show_calendar("Flexible")

    for text in [date, "May", "Done"]:
        browser.js_click(browser.findby_txt("span", text))

    flights = handle_with(scraper(browser, departures, arrivals),
        ["where", "when", "how_much", "stops", "flight_time", "hotels_price", "need_car"])

    flights[["when", "until"]] = flights.when.apply(lambda dt: pd.Series(dt.split("???")))
    allflights = pd.concat([allflights, flights])

##### Round Trip | Specific Dates

In [None]:
allflights = pd.DataFrame()

for date in ["2025-04-18", "2025-04-19"]:
    for days in [7, 14]:

        dt_return = (dt.strptime(dt_departure := date, r"%Y-%m-%d") + td(days)).strftime(r"%Y-%m-%d")
        browser.show_calendar("Specific")

        for flight_date in [dt_departure, dt_return]:
            browser.find_element(By.XPATH, f'//div[@data-iso="{flight_date}"]').click()

        browser.js_click(browser.findby_txt("span", "Done"))
        browser.wait()

        flights = handle_with(scraper(browser, departures, arrivals),
            ["where", "how_much", "stops", "flight_time", "hotels_price", "need_car"])

        flights.loc[:, ["when", "until"]] = dt_departure, dt_return
        allflights = pd.concat([allflights, flights])

In [5]:
for _type in ["Round trip", "One way"]:
    browser.js_click(browser.findby_txt("span", _type))

browser.wait()

##### One Way | Flexible Dates

In [6]:
browser.show_calendar("Flexible")

for text in ["May", "Done"]:
    browser.js_click(browser.findby_txt("span", text))

flights = handle_with(scraper(browser, departures, arrivals),
    ["where", "when", "how_much", "stops", "flight_time", "need_car"], [])

##### One Way | Specific Dates

In [None]:
allflights = pd.DataFrame()

for date in [(dt.strptime("2025-05-22", r"%Y-%m-%d") + td(7)).strftime(r"%Y-%m-%d") for _ in range(1)]:
    browser.show_calendar("Specific")

    browser.find_element(By.XPATH, f'//div[@data-iso="{date}"]').click()
    browser.js_click(browser.findby_txt("span", "Done"))
    browser.wait()

    flights = scraper(browser, departures, arrivals)
    flights.loc[:, ["when"]] = date

    allflights = pd.concat([allflights, flights])
allflights = handle_with(flights, ["where", "how_much", "stops", "flight_time", "need_car"], [])

In [18]:
browser.__del__()