## 1. Set Up Environment

In [1]:
from bs4 import BeautifulSoup as bs
from datetime import date
from datetime import datetime
from datetime import timedelta
import pandas as pd
import pyodbc
import re
import requests
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time

## 2. Skyscanner Scraper

### 2.1 Change Some Preliminary Options For ChromeDriver To Try And Avoid Bot Detection

In [2]:
# Create ChromeOptions instance
chrome_options = webdriver.ChromeOptions()

# Adding argument to disable the AutomationControlled flag
chrome_options.add_argument("--disable-blink-features=AutomationControlled")

# Adding argument for incognito mode
chrome_options.add_argument("--incognito")

# Exclude the collection of enable-automation switches
chrome_options.add_experimental_option("excludeSwitches", ["enable-automation"])

# Turn-off userAutomationExtension
chrome_options.add_experimental_option("useAutomationExtension", False) 

# Initializing a list with two Useragents
useragentarray = ["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)\
                    Chrome/115.0.0.0 Safari/537.36",
                 "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)\
                     Chrome/114.0.0.0 Safari/537.36"]

chrome_driver = "C:/ChromeDrive/chromedriver"

### 2.2 Create The Main Function To Scrape Skyscanner Flights

In [3]:
flight_inputs = {'economy': 'Economy', 
                 'premium economy': 'Premium economy', 
                 'business class': 'Business Class', 
                 'first class': 'First Class'}


def skyscanner_flight(class_='economy', leave="tvr", going="tpe", 
                   depart=date.today(), return_=date.today()+timedelta(days=1), 
                   trav=[1,0], direct=False):
    
    ### Handle out of bound dates ###################################################################################
    if (depart - date.today()).days < 0:
        print("depart date cannot be earlier the current date")
        return
    elif (return_ - depart).days < 0:
        print("return date cannot be earlier than depart date")
        return
    elif ((depart - date.today()).days > 330) or ((return_ - date.today()).days > 330):
        print("Selected dates are too far in the future")
        return
    else:
        pass
    
    ### Handle invalid traveller counts #############################################################################
    if trav[0] < 1:
        print("must have at least 1 adult selected")
        return
    elif sum(trav) > 16 or trav[0] > 8 or trav[1] > 8:
        print("total travellers can't exceed 16")
        return
    else:
        pass
    
    ### If there are children selected ##############################################################################
    if trav[1] > 0:
        num = 1
        child_ages = []
        for i in range(0, trav[1]):
            print(f"Enter the age of child {num} (ages: 0-15)")
            child_ages.append(input())
            num += 1
    
    ### Request the webpage #########################################################################################
    url = "https://www.skyscanner.ca/"
    browser = webdriver.Chrome(chrome_driver, options=chrome_options)
    # Change the property of the navigator value for webdriver to undefined
    browser.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") 
    browser.implicitly_wait(0.5)
    
    for i in range(len(useragentarray)):
        # Setting user agent iteratively as Chrome 116 and 115
        browser.execute_cdp_cmd("Network.setUserAgentOverride", {"userAgent": useragentarray[i]})
        print(browser.execute_script("return navigator.userAgent;"))
        browser.get(url)

    ### Select ticket "class" and Travellers ########################################################################
    trav_class_xpath = '//button[@aria-label="Select number of travelers and cabin class"]'
    trav_class_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, trav_class_xpath)))
    trav_class_element.click()
    time.sleep(1)
    select = Select(browser.find_element_by_id("search-controls-cabin-class-dropdown"))
    select.select_by_visible_text(flight_inputs[class_])
    time.sleep(1)
    
    trav_add_xpath = {1: '//button[@aria-controls="adult-nudger"][2]',
                     2: '//button[@aria-controls="children-nudger"][2]'}
    
    x = 1
    for traveller_type in trav:
        if x == 1:
            for i in range(0, traveller_type - 1):
                browser.find_element_by_xpath(trav_add_xpath[x]).click()
                time.sleep(1)
        else:
            for i in range(0, traveller_type):
                browser.find_element_by_xpath(trav_add_xpath[x]).click()
                time.sleep(1)
                select = Select(browser.find_element_by_xpath(f'//select[@aria-label="Age of child {i+1}"]'))
                select.select_by_value(child_ages[i])
                time.sleep(1)
    
    ### Select Leaving from and Going to locations ##################################################################
    leave_xpath = '//input[@aria-controls="originInput-menu"]'
    leave_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, leave_xpath)))
    leave_element.clear
    leave_element.click()
    time.sleep(1)
    leave_element.send_keys(leave)
    time.sleep(1)
    leave_element.send_keys(Keys.DOWN, Keys.ENTER)
    
    going_xpath = '//input[@aria-controls="destinationInput-menu"]'
    going_element = WebDriverWait(browser, 5).until(EC.presence_of_element_located((By.XPATH, going_xpath)))
    going_element.clear
    going_element.click()
    time.sleep(1)
    going_element.send_keys(going)
    time.sleep(1)
    going_element.send_keys(Keys.DOWN, Keys.ENTER)
    
    browser.quit()

In [None]:
skyscanner_flight(class_='business class', leave='ord', going='ark', trav=[2,1])