In [34]:
import selenium
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import requests

In [32]:
def create_google_flights_url(origin, destination, start_date, end_date=None, seats=1, airline=None):

    if end_date is None:
        query = f"Flights to {destination} from {origin} on {start_date} {seats} seat{'s' if seats > 1 else ''}"
    elif end_date:
        query = f"Flights to {destination} from {origin} on {start_date} through {end_date} {seats} seat{'s' if seats > 1 else ''}"
    if airline:
        query += f" on {airline}"
    
    # Format the query into the URL
    base_url = "https://www.google.com/travel/flights?q="
    query = query.replace(" ", "%20")  # URL encode spaces as %20
    return f"{base_url}{query}"

def gfscrape(origin, destination, start_date, end_date=None):

    url = create_google_flights_url(origin, destination, start_date, end_date)
    soup = BeautifulSoup(requests.get(url).content, 'html.parser')

    for flight in soup.find_all('div', class_='KhL0De'):
        flight_info = {}
        
        # Airline Information
        flight_info['airline'] = flight.find('div', class_='sSHqwe').span.text.strip()
        
        # Flight Times (using aria-labels for accuracy)
        time_container = flight.find('span', {'aria-label': lambda x: x and 'Leaves' in x})
        times = [span.text for span in time_container.find_all('span', role='text')]
        flight_info['departure_time'], flight_info['arrival_time'] = times if len(times) >= 2 else (None, None)
        
        # Duration (specific class combination)
        duration_div = flight.find('div', class_='gvkrdb')
        flight_info['duration'] = duration_div.text if duration_div else None
        
        # Airports (with proper code separation)
        airport_codes = [div.span.text.strip() for div in flight.find_all('div', class_='QylvBf')]
        airport_names = [div.find('span', class_='eoY5cb').text 
                        for div in flight.find_all('div', {'jsname': 'bN97Pc'}) 
                        if div.find('span', class_='eoY5cb')]
        
        flight_info['departure_airport'] = f"{airport_codes[0]} ({airport_names[0]})" if airport_codes and airport_names else None
        flight_info['arrival_airport'] = f"{airport_codes[1]} ({airport_names[1]})" if len(airport_codes) > 1 and len(airport_names) > 1 else None
        
        # Stops Information
        flight_info['stops'] = 'Nonstop' if flight.find('span', {'aria-label': 'Nonstop flight.'}) else None
        
        # CO2 Emissions (specific class targeting)
        co2_div = flight.find('div', class_='PtgtFe')
        percentage_div = flight.find('div', class_='N6PNV')
        flight_info['co2_emissions'] = {
            'amount': co2_div.text if co2_div else None,
            'percentage': percentage_div.text if percentage_div else None
        }
        
        # Price (more specific selector)
        price_span = flight.find('span', class_='YMlIz', attrs={'aria-label': True})
        if price_span:
            flight_info['price'] = price_span.text.strip()
            flight_info['currency'] = price_span['aria-label'].split()[1]  # Extract currency
        else:
            flight_info['price'] = None
        
        flights.append(flight_info)

    # Print results
    for flight in flights:
        for key, value in flight.items():
            print(f"{key.upper()}: {value}")
        print("\n" + "="*50 + "\n")

In [33]:
url = gfscrape(
    origin="LAS",
    destination="IND",
    start_date="2025-03-24"
)

https://www.google.com/travel/flights?q=Flights%20to%20IND%20from%20LAS%20on%202025-03-24%201%20seat
AIRLINE: Spirit
DEPARTURE_TIME: 10:05 PM
ARRIVAL_TIME: 4:37 AM+1
DURATION: 3 hr 32 min
DEPARTURE_AIRPORT: LASHarry Reid International Airport (10:05 PM on Mon, Mar 24)
ARRIVAL_AIRPORT: INDIndianapolis International Airport (4:37 AM on Tue, Mar 25)
STOPS: Nonstop
CO2_EMISSIONS: {'amount': '211 kg CO2e', 'percentage': '-21% emissions'}
PRICE: None


AIRLINE: Allegiant
DEPARTURE_TIME: 9:04 AM
ARRIVAL_TIME: 3:37 PM
DURATION: 3 hr 33 min
DEPARTURE_AIRPORT: LASHarry Reid International Airport (9:04 AM on Mon, Mar 24)
ARRIVAL_AIRPORT: INDIndianapolis International Airport (3:37 PM on Mon, Mar 24)
STOPS: Nonstop
CO2_EMISSIONS: {'amount': '189 kg CO2e', 'percentage': '-29% emissions'}
PRICE: None


AIRLINE: Southwest
DEPARTURE_TIME: 6:00 AM
ARRIVAL_TIME: 12:30 PM
DURATION: 3 hr 30 min
DEPARTURE_AIRPORT: LASHarry Reid International Airport (6:00 AM on Mon, Mar 24)
ARRIVAL_AIRPORT: INDIndianapolis