## Import libraries:

In [291]:
from datetime import datetime, timedelta
from pathlib import Path
import os
import pandas as pd
import time
import requests
import json
import random

## Define functions:

In [620]:
def IsFilenameAlreadyExisting(filename):
    data_files_existing = os.listdir(directory_actual) + os.listdir(directory_plan)
    if filename in data_files_existing:
        return True
    return False
        
def pauseRandomlyLong():
    delay = round(random.randint(31, 53) / float(17),2)
    time.sleep(delay)

def pauseRandomlyShort():
    delay = round(random.randint(17, 31) / float(17),2)
    time.sleep(delay)
    
def GetIcelandairURL(dest, date_out_str):
    url= f"""https://www.icelandair.com/api/instantSearch/v1/bestPrice/byDay/return/multipleReturnsPerDeparture?
    departure=KEF
    &arrival={dest}
    &locale=is-IS
    &period=0
    &tripDuration=1
    &tripDurationFlexibility=21
    &fromDate={date_out_str}
    &fallbackToRouteCurrency=true
    """
    return url.replace("\n", "").replace(" ", "")

def SampleDataFromPlay(dataFromRequest, samplingList):
    data_home = dataFromRequest.json()['data']['lowestPrices']['homebound']
    data_out = dataFromRequest.json()['data']['lowestPrices']['outbound']

    for out in data_out:
        for home in data_home:
            if home['date'] <= out['date']: continue

            cost = int(home['price']) + int(out['price'])

            samplingList.append(
                {'Airline': 'Play',
                 'C_Date': today_str,
                 'DateOut': out['date'],
                 'DateBack': home['date'],
                 'Destination': dest,
                 'Price': cost})
            
    return samplingList

def SampleDataFromIcelandair(samplingList):
    dates_back = sorted(response.json()['inbound'].keys())

    for date_back_str in dates_back:
        samplingList.append(
        {'Airline': 'Icelandair',
         'C_Date': today_str,
         'DateOut': date_out_str,
         'DateBack': date_back_str,
         'Destination': dest,
         'Price': response.json()['inbound'][date_back_str]['totalFareAmount']})
    
    return samplingList


## Initialize directories:

In [474]:
dir_play = os.path.join('PlaneTicketData', 'Play')
dir_iceair = os.path.join('PlaneTicketData', 'Icelandair')

Path('PlaneTicketData').mkdir(parents=True, exist_ok=True)
Path(dir_play).mkdir(parents=True, exist_ok=True)
Path(dir_iceair).mkdir(parents=True, exist_ok=True)

## Steal prices from Play:

In [619]:
today = datetime.now()
today_str = today.strftime('%Y-%m-%d')

random.seed(today)

numberOfWeeks = 12
dests = ['ALC', 'AMS', 'BCN', 'BER',
         'CDG', 'CPH', 'STN', 'TFS']

target_filename = f"PlaneTicketPrice_Play_{today_str}.csv"
target_filename_path = os.path.join('PlaneTicketData', 'Play') 

if target_filename not in os.listdir(target_filename_path):

    prices = []

    for dest in dests:

        url = "https://flyplay.com/api/graphql"
        data = {
          "operationName": "SearchFormCalendarPrices",
          "variables": {
            "origin": "KEF",
            "destination": dest,
            "startingDate": {
              "month": today.month,
              "year": today.year
            },
            "currency": "ISK",
            "numberOfWeeks": numberOfWeeks,
            "roundTrip": True
          },
          "query": "query SearchFormCalendarPrices($origin: String!, $destination: String!, $startingDate: InputStartingDate!, $currency: String!, $numberOfWeeks: Int, $roundTrip: Boolean!) {\n  lowestPrices(\n    origin: $origin\n    destination: $destination\n    startingDate: $startingDate\n    currency: $currency\n    numberOfWeeks: $numberOfWeeks\n    roundTrip: $roundTrip\n  ) {\n    outbound {\n      date\n      price\n      __typename\n    }\n    homebound {\n      date\n      price\n      __typename\n    }\n    __typename\n  }\n}\n"
        }

        response = requests.post(url, json=data)

        if (response.status_code == 200):
            print(f"Connection to {dest} established.")

            prices = SampleDataFromPlay(response, prices)
            
            print(f"Data sampling for {dest} completed.")

        pauseRandomlyLong()
    
    if prices:
        df = pd.DataFrame(prices)
        df.to_csv(os.path.join(target_filename_path, target_filename), index=False)
        print(f'Data has been saved to file: {target_filename}')
    else:
        print("Data sampling failed.")

else:
    print('File already exists.')

Connection to ALC established.
Data sampling for ALC completed.
Connection to AMS established.
Data sampling for AMS completed.
Connection to BCN established.
Data sampling for BCN completed.
Connection to BER established.
Data sampling for BER completed.
Connection to CDG established.
Data sampling for CDG completed.
Connection to CPH established.
Data sampling for CPH completed.
Connection to STN established.
Data sampling for STN completed.
Connection to TFS established.
Data sampling for TFS completed.
Data has been saved to file: PlaneTicketPrice_Play_2021-10-16.csv


## Steal prices from Icelandair:

In [623]:
today = datetime.now()
today_str = today.strftime('%Y-%m-%d')

number_of_days = 21
random.seed(today)

dests = ['AMS', 'BER', 'BOS', 'ORD', 'DEN',
         'DUB', 'FRA', 'CPH', 'LHR', 'MAN', 
         'MUC', 'NYC', 'MCO', 'OSL', 'CDG', 
         'SEA', 'ARN', 'TFS', 'YTO', 'IAD',]

target_filename = f"PlaneTicketPrice_Icelandair_{today_str}.csv"
target_filename_path = os.path.join('PlaneTicketData', 'Icelandair') 

if target_filename not in os.listdir(target_filename_path):

    startTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"Data sampling started at {startTime}.")
    
    prices = []

    for dest in dests:
        
        for i in range(0, number_of_days):
            date_out = today + timedelta(days=i)
            date_out_str = date_out.strftime('%Y-%m-%d')

            url = GetIcelandairURL(dest, date_out_str)
            
            response = requests.get(url)
            pauseRandomlyShort()
            
            if (response.status_code == 200):
                
                print(f"{date_out_str}: Connection to {dest} established ({i+1}/{number_of_days})")
                
                prices = SampleDataFromIcelandair(prices)
            
        print(f"Data sampling for {dest} completed.")
        pauseRandomlyLong()
    
    endTime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    print(f"Data sampling finished at {endTime}.")
    if prices:
        df = pd.DataFrame(prices)
        df.to_csv(os.path.join(target_filename_path, target_filename), index=False)
        print(f'Data has been saved to file: {target_filename}')
    else:
        print("No data was sampled in this run.")

else:
    print('File already exists.')

Data sampling started at 2021-10-16 17:05:38.
2021-10-16: Connection to BOS established (1/21)
2021-10-17: Connection to BOS established (2/21)
2021-10-18: Connection to BOS established (3/21)
2021-10-19: Connection to BOS established (4/21)
2021-10-20: Connection to BOS established (5/21)
2021-10-21: Connection to BOS established (6/21)
2021-10-22: Connection to BOS established (7/21)
2021-10-23: Connection to BOS established (8/21)
2021-10-24: Connection to BOS established (9/21)
2021-10-25: Connection to BOS established (10/21)
2021-10-26: Connection to BOS established (11/21)
2021-10-27: Connection to BOS established (12/21)
2021-10-28: Connection to BOS established (13/21)
2021-10-29: Connection to BOS established (14/21)
2021-10-30: Connection to BOS established (15/21)
2021-10-31: Connection to BOS established (16/21)
2021-11-01: Connection to BOS established (17/21)
2021-11-02: Connection to BOS established (18/21)
2021-11-03: Connection to BOS established (19/21)
2021-11-04: C