In [3]:
import selenium
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import requests

In [45]:
def create_google_flights_url(origin, destination, start_date, end_date=None, seats=1, airline=None):

    if end_date is None:
        query = f"One way flights to {destination} from {origin} on {start_date} {seats} seat{'s' if seats > 1 else ''}"
    elif end_date:
        query = f"Round trip flights to {destination} from {origin} on {start_date} through {end_date} {seats} seat{'s' if seats > 1 else ''}"
    if airline:
        query += f" on {airline}"
    
    # Format the query into the URL
    base_url = "https://www.google.com/travel/flights?q="
    query = query.replace(" ", "%20")  # URL encode spaces as %20
    return f"{base_url}{query}"

def gfscrape(origin, destination, start_date, end_date=None):
    flights = []

    url = create_google_flights_url(origin, destination, start_date, end_date)
    print(f'Search link: {url}')
    print("\n")
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')
    top_flights = soup.find('ul', class_='Rk10dc')

    for flight in top_flights.find_all('div', class_='KhL0De'):
        flight_info = {}
        
        # Airline Information
        flight_info['airline'] = flight.find('div', class_='sSHqwe').span.text.strip()
        
        # Flight Times (using aria-labels for accuracy)
        time_container = flight.find('span', {'aria-label': lambda x: x and 'Leaves' in x})
        times = [span.text for span in time_container.find_all('span', role='text')]
        flight_info['departure_time'], flight_info['arrival_time'] = times if len(times) >= 2 else (None, None)
        
        # Duration (specific class combination)
        duration_div = flight.find('div', class_='gvkrdb')
        flight_info['duration'] = duration_div.text if duration_div else None
        
        # Airports (with proper code separation)
        airport_full = [div.span.text.strip() for div in flight.find_all('div', class_='QylvBf')]
        airport_codes = [code[0:3] for code in airport_full]
        airport_names = [name[3:] for name in airport_full]

        flight_info['departure_airport'] = f"{airport_codes[0]} ({airport_names[0]})" if airport_codes and airport_names else None
        flight_info['arrival_airport'] = f"{airport_codes[1]} ({airport_names[1]})" if len(airport_codes) > 1 and len(airport_names) > 1 else None
        
        # Stops Information
        flight_info['stops'] = 'Nonstop' if flight.find('span', {'aria-label': 'Nonstop flight.'}) else None
        
        # CO2 Emissions (specific class targeting)
        co2_div = flight.find('div', class_='PtgtFe')
        percentage_div = flight.find('div', class_='N6PNV')
        flight_info['co2_emissions'] = {
            'amount': co2_div.text if co2_div else None,
            'percentage': percentage_div.text if percentage_div else None
        }
        
        # Price (more specific selector)
        price_span = flight.select_one('.YMlIz.FpEdX span')
        # price_span = flight.find('div', class_='FpEdX', attrs={'aria-label': True})
        if price_span:
            flight_info['price'] = price_span.text.strip()
            flight_info['currency'] = price_span['aria-label'].split()[1]  # Extract currency
        else:
            flight_info['price'] = None
        
        flights.append(flight_info)

    # Print results
    for flight in flights:
        for key, value in flight.items():
            print(f"{key.upper()}: {value}")
        print("\n" + "="*50 + "\n")

In [62]:
url = gfscrape(
    origin="Chicago",
    destination="DEN",
    start_date="2025-03-15"
)

Search link: https://www.google.com/travel/flights?q=One%20way%20flights%20to%20DEN%20from%20Chicago%20on%202025-03-15%201%20seat


AIRLINE: Frontier
DEPARTURE_TIME: 6:40 AM
ARRIVAL_TIME: 8:32 AM
DURATION: 2 hr 52 min
DEPARTURE_AIRPORT: MDW (Chicago Midway International Airport)
ARRIVAL_AIRPORT: DEN (Denver International Airport)
STOPS: Nonstop
CO2_EMISSIONS: {'amount': '112 kg CO2e', 'percentage': '-15% emissions'}
PRICE: $39
CURRENCY: US


AIRLINE: Frontier
DEPARTURE_TIME: 6:50 AM
ARRIVAL_TIME: 8:53 AM
DURATION: 3 hr 3 min
DEPARTURE_AIRPORT: ORD (Chicago O'Hare International Airport)
ARRIVAL_AIRPORT: DEN (Denver International Airport)
STOPS: Nonstop
CO2_EMISSIONS: {'amount': '100 kg CO2e', 'percentage': '-24% emissions'}
PRICE: $39
CURRENCY: US


AIRLINE: Southwest
DEPARTURE_TIME: 2:45 PM
ARRIVAL_TIME: 4:30 PM
DURATION: 2 hr 45 min
DEPARTURE_AIRPORT: ORD (Chicago O'Hare International Airport)
ARRIVAL_AIRPORT: DEN (Denver International Airport)
STOPS: Nonstop
CO2_EMISSIONS: {'amount':