## 1. Set Up Environment

In [2]:
from bs4 import BeautifulSoup as bs
from datetime import date
from datetime import timedelta
import pandas as pd
import re
import requests
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

## 2. Expedia Scraper

In [6]:
# Create ChromeOptions instance
chrome_options = webdriver.ChromeOptions()

# Adding argument to disable the AutomationControlled flag
chrome_options.add_argument("--disable-blink-features=AutomationControlled")

# Exclude the collection of enable-automation switches
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])

# Turn-off userAutomationExtension
chrome_options.add_experimental_option("useAutomationExtension", False) 

chrome_driver = "C:/ChromeDrive/chromedriver"
browser = webdriver.Chrome(chrome_driver, options=chrome_options)
url = "https://www.expedia.ca/"

In [7]:
flight_inputs = {'Roundtrip': 'ROUND_TRIP', 'One-way': 'ONE_WAY', 'Multi-city': 'MULTI_CITY', 
                 'Economy': (Keys.DOWN, Keys.ENTER), 'Premium economy': (Keys.DOWN, Keys.DOWN, Keys.ENTER), 
                 'Business class': (Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.ENTER), 
                 'First class': (Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.ENTER)}


def expedia_flight(browser=browser, type_='Roundtrip', class_='Economy', leave="YVR", going="TPE", 
                   depart=date.today(), return_=date.today()+timedelta(days=1)):
    
    # Handle out of bound dates
    if (depart - date.today()).days < 0:
        print("depart date cannot be earlier the current date")
        return
    elif (return_ - depart).days < 0:
        print("return date cannot be earlier than depart date")
        return
    elif ((depart - date.today()).days > 330) or ((return_ - date.today()).days > 330):
        print("Selected dates are too far in the future")
        return
    else:
        pass
    
    # Change the property of the navigator value for webdriver to undefined
    browser.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") 
    browser.get(url)

    ### Select "Flights" tab ########################################################################################
    flight_xpath = '//a[@aria-controls="search_form_product_selector_flights"]'
    flight_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, flight_xpath)))
    flight_element.click()
    
    ### Select "type" of flight #####################################################################################
    type_xpath = '//a[@aria-controls="FlightSearchForm_' + flight_inputs[type_] + '"]'
    type_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, type_xpath)))
    type_element.click()

    # Select ticket "class"
    class_xpath = '//button[@id="cabin_class"]'
    class_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, class_xpath)))
    class_element.click()
    time.sleep(1)
    class_element.send_keys(flight_inputs[class_])

    ### Select Leaving from and Going to locations ##################################################################
    leave_xpath = '//button[@aria-label="Leaving from"]'
    leave_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, leave_xpath)))
    leave_element.clear
    leave_element.click()
    leaveDrop_xpath = '//input[@data-stid="origin_select-menu-input"]'  
    leaveDrop_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, leaveDrop_xpath)))
    leaveDrop_element.click()
    time.sleep(1)
    leaveDrop_element.send_keys(leave)
    time.sleep(1)
    leaveDrop_element.send_keys(Keys.DOWN, Keys.ENTER)
    
    going_xpath = '//button[@aria-label="Going to"]'
    going_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, going_xpath)))
    going_element.clear
    going_element.click()
    goingDrop_xpath = '//input[@data-stid="destination_select-menu-input"]'
    goingDrop_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, goingDrop_xpath)))
    goingDrop_element.click()
    time.sleep(1)
    goingDrop_element.send_keys(going)
    time.sleep(1)
    goingDrop_element.send_keys(Keys.DOWN, Keys.ENTER)
    
    ### Select Dates ###############################################################################################
    
    # First: click the Dates container which opens up the calender
    date_xpath = '//button[starts-with(@aria-label, "Date") and contains(@aria-label, "Sep")]'
    date_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, date_xpath)))
    date_element.click()
    
    # Second: go to the right calendar pages and click the dates we want
    display_date_depart = (date.today() + timedelta(days=14)) # expedia always adds 2 weeks
    display_date_return = (date.today() + timedelta(days=15))
    depart_month = depart.strftime('%b')
    return_month = return_.strftime('%b')
    month_diff_depart = (depart.year - display_date_depart.year)*12 + (depart.month - display_date_depart.month)
    month_diff_return = (return_.year - depart.year)*12 + (return_.month - depart.month)
    
    if type_ == 'Roundtrip':
        # Find and click the depart date
        if month_diff_depart < 0:
            datePrev_xpath = '//button[@data-stid="date-picker-paging"][1] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-previous-button"]'
            datePrev_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, datePrev_xpath)))
            datePrev_element.click()
            time.sleep(2)
        elif month_diff_depart > 0:
            dateNext_xpath = '//button[@data-stid="date-picker-paging"][2] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-next-button"]' 
            dateNext_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, dateNext_xpath)))
            for i in range(0, month_diff_depart):
                time.sleep(1)
                dateNext_element.click()
                time.sleep(1)      
        else:
            pass
        pickDepart_xpath = '//button[@data-day="' + str(depart.day) + '" and contains(@aria-label, "' + depart_month + '")]\
        | //div[starts-with(text(), "' + str(depart.day) + '")]'
        browser.find_element_by_xpath(pickDepart_xpath).click()
        time.sleep(2)
        
        # Find and click the return date
        if month_diff_return > 0:
            for i in range(0, month_diff_return):
                time.sleep(1)
                dateNext_element.click()
                time.sleep(1)
        else:
            pass
        pickReturn_xpath = '//button[@data-day="' + str(return_.day) + '" and contains(@aria-label, "' + return_month + '")]\
        | //div[starts-with(text(), "' + str(return_.day) + '")]'
        browser.find_element_by_xpath(pickReturn_xpath).click()
        time.sleep(2)    
        
        # Click done on the calender to save selection and close
        done_xpath = '//button[@data-stid="apply-date-picker"] | //button[contains(text(), "Done")]'
        done_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, done_xpath)))
        done_element.click() 
        
    elif type_ == 'One-way':
        # Find and click the depart date
        if month_diff_depart < 0:
            datePrev_xpath = '//button[@data-stid="date-picker-paging"][1] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-previous-button"]'
            datePrev_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, datePrev_xpath)))
            datePrev_element.click()
            time.sleep(2)
        elif month_diff_depart > 0:
            dateNext_xpath = '//button[@data-stid="date-picker-paging"][2] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-next-button"]' 
            dateNext_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, dateNext_xpath)))
            for i in range(0, month_diff_depart):
                time.sleep(1)
                dateNext_element.click()
                time.sleep(1)      
        else:
            pass
        pickDepart_xpath = '//button[@data-day="' + str(depart.day) + '" and contains(@aria-label, "' + depart_month + '")]\
        | //div[starts-with(text(), "' + str(depart.day) + '")]'
        browser.find_element_by_xpath(pickDepart_xpath).click()
        time.sleep(2)
        
        # Click done on the calender to save selection and close
        done_xpath = '//button[@data-stid="apply-date-picker"] | //button[contains(text(), "Done")]'
        done_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, done_xpath)))
        done_element.click() 
    
    else:
        print('something broke')
        return
    
    ### Click "Search" and wait for results ########################################################################          
    search_xpath = '//button[@id="search_button"]'
    search_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, search_xpath)))
    time.sleep(1)
    search_element.click()
    time.sleep(15)

In [8]:
expedia_flight(type_='One-way', depart=date(2023,10,5), return_=date(2023,11,17))