## 1. Set Up Environment

In [19]:
from bs4 import BeautifulSoup as bs
from datetime import date
from datetime import timedelta
import pandas as pd
import re
import requests
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

## 2. Expedia Scraper

In [20]:
# Create ChromeOptions instance
chrome_options = webdriver.ChromeOptions()

# Adding argument to disable the AutomationControlled flag
chrome_options.add_argument("--disable-blink-features=AutomationControlled")

# Exclude the collection of enable-automation switches
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])

# Turn-off userAutomationExtension
chrome_options.add_experimental_option("useAutomationExtension", False) 

chrome_driver = "C:/ChromeDrive/chromedriver"
url = "https://www.expedia.ca/"

In [43]:
flight_inputs = {'Roundtrip': 'ROUND_TRIP', 'One-way': 'ONE_WAY', 'Multi-city': 'MULTI_CITY', 
                 'Economy': (Keys.DOWN, Keys.ENTER), 'Premium economy': (Keys.DOWN, Keys.DOWN, Keys.ENTER), 
                 'Business class': (Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.ENTER), 
                 'First class': (Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.DOWN, Keys.ENTER)}


def expedia_flight(type_='Roundtrip', class_='Economy', leave="YVR", going="TPE", 
                   depart=date.today(), return_=date.today()+timedelta(days=1), trav=[1,0,0,0]):
    
    ### Handle out of bound dates ###################################################################################
    if (depart - date.today()).days < 0:
        print("depart date cannot be earlier the current date")
        return
    elif (return_ - depart).days < 0:
        print("return date cannot be earlier than depart date")
        return
    elif ((depart - date.today()).days > 330) or ((return_ - date.today()).days > 330):
        print("Selected dates are too far in the future")
        return
    else:
        pass
    
    ### Handle invalid traveller counts #############################################################################
    if trav[0] < 1:
        print("must have at least 1 adult selected")
        return
    elif sum(trav) > 6 or trav[0] > 6 or trav[1] > 5:
        print("total travellers can't exceed 6")
        return
    elif trav[2] > 0 and trav[3] > 0:
        print("infants must be either ALL on lap, or ALL on seat")
        return
    elif trav[2] > 4 or trav[3] > 4:
        print("infants cannot exceed 4")
        return
    else:
        pass
    
    ### If there are children and/or infants selected ###############################################################
    if trav[1] > 0:
        num = 1
        child_ages = []
        for i in range(0, trav[1]):
            print(f"Enter the age of child {num} (ages: 2-17)")
            child_ages.append(input())
            num += 1
        child_12_up = 0
        for i in child_ages:
            if int(i) >= 12:
                child_12_up += 1
        # For every infant on lap, there must be a traveller (age 12+) for them
        if (trav[0] + child_12_up) < trav[2]:
            print("Not enough age 12+ travellers selected for infants on lap")
            return
        
    if (trav[0] + trav[1]) < trav[2]:
        print("Not enough age 12+ travellers selected for infants on lap")
        return 
    
    if trav[2] > 0:
        num = 1
        infant_ages = []
        for i in range(0, trav[2]):
            print(f"Enter the age of infant {num} (ages: 0-1)")
            infant_ages.append(input())
            num += 1
            
    if trav[3] > 0:
        num = 1
        infant_ages = []
        for i in range(0, trav[3]):
            print(f"Enter the age of infant {num} (ages: 0-1)")
            infant_ages.append(input())
            num += 1
    
    ### Request the webpage #########################################################################################
    browser = webdriver.Chrome(chrome_driver, options=chrome_options)
    # Change the property of the navigator value for webdriver to undefined
    browser.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") 
    browser.get(url)

    ### Select "Flights" tab ########################################################################################
    flight_xpath = '//a[@aria-controls="search_form_product_selector_flights"]'
    flight_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, flight_xpath)))
    flight_element.click()
    
    ### Select "type" of flight #####################################################################################
    type_xpath = '//a[@aria-controls="FlightSearchForm_' + flight_inputs[type_] + '"]'
    type_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, type_xpath)))
    type_element.click()

    ### Select ticket "class" #######################################################################################
    class_xpath = '//button[@id="cabin_class"]'
    class_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, class_xpath)))
    class_element.click()
    time.sleep(1)
    class_element.send_keys(flight_inputs[class_])

    ### Select Leaving from and Going to locations ##################################################################
    leave_xpath = '//button[@aria-label="Leaving from"]'
    leave_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, leave_xpath)))
    leave_element.clear
    leave_element.click()
    leaveDrop_xpath = '//input[@data-stid="origin_select-menu-input"]'  
    leaveDrop_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, leaveDrop_xpath)))
    leaveDrop_element.click()
    time.sleep(1)
    leaveDrop_element.send_keys(leave)
    time.sleep(1)
    leaveDrop_element.send_keys(Keys.DOWN, Keys.ENTER)
    
    going_xpath = '//button[@aria-label="Going to"]'
    going_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, going_xpath)))
    going_element.clear
    going_element.click()
    goingDrop_xpath = '//input[@data-stid="destination_select-menu-input"]'
    goingDrop_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, goingDrop_xpath)))
    goingDrop_element.click()
    time.sleep(1)
    goingDrop_element.send_keys(going)
    time.sleep(1)
    goingDrop_element.send_keys(Keys.DOWN, Keys.ENTER)
    
    ### Select Dates ###############################################################################################
    
    # First: click the Dates container which opens up the calender
    date_xpath = '//button[starts-with(@aria-label, "Date") and contains(@aria-label, "Sep")]'
    date_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, date_xpath)))
    date_element.click()
    
    # Second: go to the right calendar pages and click the dates we want
    display_date_depart = (date.today() + timedelta(days=14)) # expedia always adds 2 weeks
    display_date_return = (date.today() + timedelta(days=15))
    depart_month = depart.strftime('%b')
    return_month = return_.strftime('%b')
    month_diff_depart = (depart.year - display_date_depart.year)*12 + (depart.month - display_date_depart.month)
    month_diff_return = (return_.year - depart.year)*12 + (return_.month - depart.month)
    
    if type_ == 'Roundtrip':
        # Find and click the depart date
        if month_diff_depart < 0:
            datePrev_xpath = '//button[@data-stid="date-picker-paging"][1] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-previous-button"]'
            datePrev_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, datePrev_xpath)))
            datePrev_element.click()
            time.sleep(2)
        elif month_diff_depart > 0:
            dateNext_xpath = '//button[@data-stid="date-picker-paging"][2] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-next-button"]' 
            dateNext_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, dateNext_xpath)))
            for i in range(0, month_diff_depart):
                time.sleep(1)
                dateNext_element.click()
                time.sleep(1)      
        else:
            pass
        pickDepart_xpath = '//button[@data-day="' + str(depart.day) + '" and contains(@aria-label, "' + depart_month + '")]\
        | //div[starts-with(text(), "' + str(depart.day) + '")]'
        browser.find_element_by_xpath(pickDepart_xpath).click()
        time.sleep(2)
        
        # Find and click the return date
        if month_diff_return > 0:
            for i in range(0, month_diff_return):
                time.sleep(1)
                dateNext_element.click()
                time.sleep(1)
        else:
            pass
        pickReturn_xpath = '//button[@data-day="' + str(return_.day) + '" and contains(@aria-label, "' + return_month + '")]\
        | //div[starts-with(text(), "' + str(return_.day) + '")]'
        browser.find_element_by_xpath(pickReturn_xpath).click()
        time.sleep(2)    
        
        # Click done on the calender to save selection and close
        done_xpath = '//button[@data-stid="apply-date-picker"] | //button[contains(text(), "Done")]'
        done_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, done_xpath)))
        done_element.click() 
        
    elif type_ == 'One-way':
        # Find and click the depart date
        if month_diff_depart < 0:
            datePrev_xpath = '//button[@data-stid="date-picker-paging"][1] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-previous-button"]'
            datePrev_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, datePrev_xpath)))
            datePrev_element.click()
            time.sleep(2)
        elif month_diff_depart > 0:
            dateNext_xpath = '//button[@data-stid="date-picker-paging"][2] | //button[@data-stid=\
            "uitk-calendar-navigation-controls-next-button"]' 
            dateNext_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, dateNext_xpath)))
            for i in range(0, month_diff_depart):
                time.sleep(1)
                dateNext_element.click()
                time.sleep(1)      
        else:
            pass
        pickDepart_xpath = '//button[@data-day="' + str(depart.day) + '" and contains(@aria-label, "' + depart_month + '")]\
        | //div[starts-with(text(), "' + str(depart.day) + '")]'
        browser.find_element_by_xpath(pickDepart_xpath).click()
        time.sleep(2)
        
        # Click done on the calender to save selection and close
        done_xpath = '//button[@data-stid="apply-date-picker"] | //button[contains(text(), "Done")]'
        done_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, done_xpath)))
        done_element.click() 
    
    else:
        print('something broke')
        return
    
    ### Select Travellers ##########################################################################################
    trav_xpath = '//button[@data-stid="open-room-picker"]'
    trav_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, trav_xpath)))
    trav_element.click()
    time.sleep(1)
    
    trav_add_xpath = {1: '//*[starts-with(@aria-label, "Increase the number of adults")]/..',
                     2: '//*[starts-with(@aria-label, "Increase the number of children")]/..',
                     3: '//*[starts-with(@aria-label, "Increase the number of infants on lap")]/..',
                     4: '//*[starts-with(@aria-label, "Increase the number of infants in seat")]/..'}
    
    x = 1
    for traveller_type in trav:
        if x == 1:
            for i in range(0, traveller_type - 1):
                browser.find_element_by_xpath(trav_add_xpath[x]).click()
                time.sleep(1)
        elif x == 2:
            for i in range(0, traveller_type):
                browser.find_element_by_xpath(trav_add_xpath[x]).click()
                time.sleep(1)
                select = Select(browser.find_element_by_id(f"age-traveler_selector_children_age_selector-{i}"))
                select.select_by_value(child_ages[i])
                time.sleep(1)
        else:
            for i in range(0, traveller_type):
                browser.find_element_by_xpath(trav_add_xpath[x]).click()
                time.sleep(1)
                select = Select(browser.find_element_by_id(f"age-traveler_selector_infant_age_selector-{i}"))
                select.select_by_value(infant_ages[i])
                time.sleep(1)
        x += 1
    
    trav_done_xpath = '//button[@id="travelers_selector_done_button"]'
    trav_done_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, trav_done_xpath)))
    trav_done_element.click() 
    
    ### Click "Search" and wait for results ########################################################################          
    search_xpath = '//button[@id="search_button"]'
    search_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, search_xpath)))
    time.sleep(1)
    search_element.click()
    time.sleep(10)

In [None]:
expedia_flight(type_='One-way', class_='Business class', depart=date(2023,10,5), return_=date(2023,11,17), trav=[1,0,1,0])