In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

import pandas as pd
from datetime import datetime
import time
import os

In [None]:
driver = webdriver.Chrome()
driver.get("https://www.buddhaair.com/")

wait = WebDriverWait(driver, 20)

In [None]:
one_way = wait.until(
    EC.element_to_be_clickable(
        (By.ID, "one_way")
    )
)
one_way.click()

print("✔ Trip type: One Way")

In [None]:
# Open nationality dropdown
nationality_input = wait.until(
    EC.element_to_be_clickable((By.NAME, "strNationality"))
)
nationality_input.click()
time.sleep(1)

# Click Nepal
nepal_option = wait.until(
    EC.element_to_be_clickable((By.ID, "Nepal"))
)
nepal_option.click()

print("✔ Nationality selected: Nepal")




In [None]:
# Open traveller dropdown (optional)
traveller_dropdown = wait.until(
    EC.element_to_be_clickable(
        (By.XPATH, "//div[contains(@class,'traveller-no')]//input")
    )
)
traveller_dropdown.click()

print("✔ Travellers confirmed: 1 Adult (default)")


In [None]:
# ---------------------------
# FROM: Kathmandu (KTM)
# ---------------------------
from_input = wait.until(
    EC.element_to_be_clickable((By.NAME, "strSectorFrom"))
)
from_input.click()

from_option = wait.until(
    EC.element_to_be_clickable((
        By.XPATH, "//div[@id='from-airplane']//em[normalize-space()='KTM']/parent::a"
    ))
)
from_option.click()

print("✔ From selected: Kathmandu (KTM)")


# ---------------------------
# TO: Biratnagar (BIR)
# ---------------------------
to_input = wait.until(
    EC.element_to_be_clickable((By.NAME, "strSectorTo"))
)
to_input.click()

to_option = wait.until(
    EC.element_to_be_clickable((
        By.XPATH, "//div[@id='to-airplane']//em[normalize-space()='BIR']/parent::a"
    ))
)
to_option.click()

print("✔ To selected: Biratnagar (BIR)")


In [None]:

# ---------------------------
# CLICK DEPARTURE DATE INPUT
# ---------------------------
date_input = wait.until(
    EC.element_to_be_clickable((By.NAME, "date"))
)
date_input.click()

time.sleep(1)  # allow calendar to render

# ---------------------------
# CLICK TODAY'S DATE
# mx-datepicker marks today with class 'today'
# ---------------------------
today_date = wait.until(
    EC.element_to_be_clickable(
        (By.CSS_SELECTOR, ".mx-calendar-content .today")
    )
)
today_date.click()

print("✔ Departure date set to today")

# ---------------------------
# CLICK SEARCH FLIGHTS BUTTON
# ---------------------------
search_button = wait.until(
    EC.element_to_be_clickable(
        (By.XPATH, "//button[contains(@class,'btn-search')]")
    )
)
search_button.click()

print("✔ Search Flights clicked")

wait.until(EC.url_contains("/search/flight"))
print("✔ Redirected to:", driver.current_url)

wait.until(
    EC.presence_of_element_located(
        (By.XPATH, "//section[@id='flight-card']")
    )
)

print("✔ Flight results loaded")

wait.until(EC.url_contains("/search/flight"))
print("✔ Redirected to:", driver.current_url)

In [None]:
wait.until(
    EC.presence_of_element_located(
        (By.XPATH, "//section[@id='flight-card']")
    )
)

print("✔ Flight results loaded")

In [103]:
# Wait for results to fully load
time.sleep(5)

# Try this corrected extraction approach
flights = []

# Find all flight sections - using the parent container
flight_containers = driver.find_elements(By.XPATH, "//section[@id='flight-card']")

print(f"Found {len(flight_containers)} flight containers")

for idx, container in enumerate(flight_containers):
    try:
        # Scroll into view
        driver.execute_script("arguments[0].scrollIntoView(true);", container)
        time.sleep(0.5)
        
        flight_data = {}
        
        # METHOD 1: Try to extract using text-based search within the container
        container_text = container.text
        
        # Look for flight number pattern
        import re
        
        # Flight number (U4 followed by numbers)
        flight_match = re.search(r'U4\s*\d+', container_text)
        flight_data['Flight No'] = flight_match.group() if flight_match else "N/A"
        
        # Time pattern (HH:MM)
        time_pattern = r'\b\d{1,2}:\d{2}\b'
        times = re.findall(time_pattern, container_text)
        
        if len(times) >= 2:
            flight_data['Departure Time'] = times[0]
            flight_data['Arrival Time'] = times[1]
        else:
            flight_data['Departure Time'] = "N/A"
            flight_data['Arrival Time'] = "N/A"
        
        # Duration (e.g., "35m")
        duration_match = re.search(r'\b\d+m\b', container_text)
        flight_data['Duration'] = duration_match.group() if duration_match else "N/A"
        
        # Prices (NPR followed by numbers with commas/decimals)
        price_matches = re.findall(r'NPR\s*[\d,]+\.?\d*', container_text)
        flight_data['Prices'] = " | ".join(price_matches) if price_matches else "N/A"
        
        # Baggage
        baggage_match = re.search(r'\b\d+\s*KG\b', container_text)
        flight_data['Baggage'] = baggage_match.group() if baggage_match else "N/A"
        
        # METHOD 2: Try direct element extraction as fallback
        if flight_data['Flight No'] == "N/A":
            try:
                flight_elem = container.find_element(By.XPATH, ".//p[contains(., 'U4')]")
                flight_data['Flight No'] = flight_elem.text.strip()
            except:
                pass
        
        if flight_data['Departure Time'] == "N/A":
            try:
                # Look for any span with time-like content
                spans = container.find_elements(By.TAG_NAME, "span")
                for span in spans:
                    text = span.text.strip()
                    if re.match(r'\d{1,2}:\d{2}', text) and len(text) == 5:
                        if 'Departure Time' not in flight_data or flight_data['Departure Time'] == "N/A":
                            flight_data['Departure Time'] = text
                        else:
                            flight_data['Arrival Time'] = text
                            break
            except:
                pass
        
        # Add metadata
        flight_data['From'] = "Kathmandu"
        flight_data['To'] = "Biratnagar"
        flight_data['Scraped Date'] = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
        flight_data['Route'] = "KTM-BIR"
        
        flights.append(flight_data)
        
        print(f"✓ Flight {idx+1}: {flight_data.get('Flight No', 'N/A')} | "
              f"{flight_data.get('Departure Time', 'N/A')} → {flight_data.get('Arrival Time', 'N/A')} | "
              f"{flight_data.get('Prices', 'N/A')[:30]}...")
        
    except Exception as e:
        print(f"⚠ Error with flight {idx+1}: {str(e)}")
        continue

# Display results
if flights:
    print(f"\nSuccessfully extracted {len(flights)} flights!")
    
    for i, flight in enumerate(flights[:3], 1):  # Show first 3
        print(f"\nFlight {i}:")
        for key, value in flight.items():
            if value != "N/A":
                print(f"  {key}: {value}")
    
    # Save to CSV
    import pandas as pd
    df = pd.DataFrame(flights)
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"flights_extracted_{timestamp}.csv"
    df.to_csv(filename, index=False)
    print(f"\n✔ Data saved to {filename}")
    
else:
    print("❌ No flights extracted!")
    
    # Try one more approach: look at the entire page structure
    print("\nTrying final approach - looking at page structure...")
    
    # Get all text and filter for flight info
    all_text = driver.find_element(By.TAG_NAME, "body").text
    lines = all_text.split('\n')
    
    flight_lines = [line for line in lines if 'U4' in line or 'NPR' in line or re.search(r'\d{1,2}:\d{2}', line)]
    
    print(f"\nFound {len(flight_lines)} potential flight info lines:")
    for line in flight_lines[:20]:  # Show first 20
        print(f"  {line}")

Found 7 flight containers
✓ Flight 1: U4 701 | 08:00 → 08:35 | NPR 5,399.99 | NPR 5,576.99...
✓ Flight 2: U4 705 | 12:00 → 12:35 | NPR 5,399.99 | NPR 5,576.99...
✓ Flight 3: U4 707 | 13:45 → 14:20 | NPR 5,399.99 | NPR 5,576.99...
✓ Flight 4: U4 709 | 15:00 → 15:35 | NPR 5,399.99 | NPR 5,576.99...
✓ Flight 5: U4 711 | 16:55 → 17:30 | NPR 5,399.99 | NPR 5,576.99...
✓ Flight 6: U4 713 | 18:05 → 18:40 | NPR 5,399.99 | NPR 5,576.99...
✓ Flight 7: U4 715 | 19:00 → 19:35 | NPR 5,399.99 | NPR 5,576.99...

Successfully extracted 7 flights!

Flight 1:
  Flight No: U4 701
  Departure Time: 08:00
  Arrival Time: 08:35
  Duration: 35m
  Prices: NPR 5,399.99 | NPR 5,576.99
  From: Kathmandu
  To: Biratnagar
  Scraped Date: 2026-01-07 16:11:02
  Route: KTM-BIR

Flight 2:
  Flight No: U4 705
  Departure Time: 12:00
  Arrival Time: 12:35
  Duration: 35m
  Prices: NPR 5,399.99 | NPR 5,576.99
  From: Kathmandu
  To: Biratnagar
  Scraped Date: 2026-01-07 16:11:03
  Route: KTM-BIR

Flight 3:
  Flight No: U

In [None]:
df