
### Python Script: TripAdvisor Integration with Azure ML

Load these necessayr packages 

In [3]:
# Loading relevant data packages
import pandas as pd
import requests
import json
import csv
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime
%matplotlib inline
pd.set_option('display.max_columns', None)


This initiates a flight search API request to TripAdvisor's API using the specified parameters like source and destination airport codes, departure date, itinerary type, and more. The API request is constructed with the provided inputs and sent with necessary headers for authentication.

In [14]:
# API parameters
api_key = "04add5b7cdmshd4b470181d45798p1428bbjsn3f4be7e144f4"
api_host = "tripadvisor16.p.rapidapi.com"
url = "https://tripadvisor16.p.rapidapi.com/api/v1/flights/searchFlights"

# API customer inputs (this will come from UI when ready)

sourceAirportCode = 'EWR'
destinationAirportCode = 'DAL'
outDate= "2024-06-22"
itineraryType= "ONE_WAY"
sortOrder= 'PRICE'
classOfService= "ECONOMY"
pageNumber= '1'
currencyCode= 'USD'

#sourceAirportCode = "BOI"
#destinationAirportCode = "TPA"
#outDate = "2024-06-22"
#itineraryType = "ROUND_TRIP"
#sortOrder = "PRICE"
#returnDate = "2024-06-29"
#numAdults = "1"
#numSeniors = "0"
#classOfService = "ECONOMY"
#pageNumber = "1" # for now fetch just first page to save on query number

# build API inputs
querystring = {"sourceAirportCode":sourceAirportCode,"destinationAirportCode":destinationAirportCode,\
               "date":outDate, "itineraryType":itineraryType,"sortOrder":sortOrder,\
               "classOfService":classOfService,"pageNumber":pageNumber, "currencyCode":currencyCode}

headers = {"X-RapidAPI-Key": api_key,\
           "X-RapidAPI-Host": api_host}

In [15]:
# fetch data from API
response = requests.get(url, headers=headers, params=querystring)

In [16]:
# convert json to python dict
response_text = json.loads(response.text)

The following extracts flight details from a flight search API response, handling default inputs denoted by "0" where necessary. It iterates through each flight, extracting information such as departure and arrival times, airline details, flight numbers, origin and destination airports, flight duration, and distance. The extracted data is structured into a pandas DataFrame for further analysis, assuming a single purchase link per flight and defaulting certain fields like arrival delay and cancellation status.

In [17]:
# Extract flights information
flights_data = []

for flight in response_text['data']['flights']:
    segments = flight['segments']
    for segment in segments:
        legs = segment['legs']
        for leg in legs:
            departure_time = datetime.fromisoformat(leg['departureDateTime'])
            arrival_time = datetime.fromisoformat(leg['arrivalDateTime'])
            air_time_minutes = (arrival_time - departure_time).total_seconds() / 60
            
            departure_datetime = datetime.fromisoformat(leg['departureDateTime'])
            fl_date = departure_datetime.strftime("%Y-%m-%d")
            
            flight_info = {
                'FL_DATE': str(fl_date),
                'AIRLINE': str(flight['purchaseLinks'][0]['partnerSuppliedProvider']['displayName']), 
                'AIRLINE_CODE': leg['operatingCarrier']['code'],
                'DOT_CODE': "0",
                'FL_NUMBER': leg['flightNumber'],
                'ORIGIN': leg['originStationCode'],
                'DEST': leg['destinationStationCode'],
                'AIR_TIME': air_time_minutes,
                'DISTANCE': leg['distanceInKM'],
                #'total_price': flight['purchaseLinks'][0]['totalPrice'],  # Assuming only one purchase link per flight
                'ARR_DELAY': "0",
                'CANCELLED': "0"
                
            }
            flights_data.append(flight_info)

# Create DataFrame
flights_df = pd.DataFrame(flights_data)
flights_df

Unnamed: 0,FL_DATE,AIRLINE,AIRLINE_CODE,DOT_CODE,FL_NUMBER,ORIGIN,DEST,AIR_TIME,DISTANCE,ARR_DELAY,CANCELLED
0,2024-06-22,Alaska Airlines,AS,0,477,EWR,SEA,372.0,3858.7183,0,0
1,2024-06-23,Alaska Airlines,AS,0,588,SEA,DAL,235.0,2687.8003,0,0
2,2024-06-22,Alaska Airlines,AS,0,281,EWR,SEA,370.0,3858.7183,0,0
3,2024-06-23,Alaska Airlines,AS,0,588,SEA,DAL,235.0,2687.8003,0,0
4,2024-06-22,Alaska Airlines,AS,0,299,EWR,SEA,363.0,3858.7183,0,0
5,2024-06-23,Alaska Airlines,AS,0,588,SEA,DAL,235.0,2687.8003,0,0


### THE COLUMNS BELOW ARE NEEDED FOR THE INPUT ###

This is a check to ensure the columns "DOT_CODE", "ARR_DELAY", and "CANCELLED" have a default value of "0" and are in a string format. All ensure to check all columns are there.

In [18]:
flights_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 11 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   FL_DATE       6 non-null      object 
 1   AIRLINE       6 non-null      object 
 2   AIRLINE_CODE  6 non-null      object 
 3   DOT_CODE      6 non-null      object 
 4   FL_NUMBER     6 non-null      int64  
 5   ORIGIN        6 non-null      object 
 6   DEST          6 non-null      object 
 7   AIR_TIME      6 non-null      float64
 8   DISTANCE      6 non-null      float64
 9   ARR_DELAY     6 non-null      object 
 10  CANCELLED     6 non-null      object 
dtypes: float64(2), int64(1), object(8)
memory usage: 656.0+ bytes


In [20]:
#api key connecting to the Azure ML model 
ml_api_key = 'MJ62jbpd130GjJbvoMrI7hHS3PCzIPVm'

The  following is code that calls the Azure ML model that was deployed from Rutvij's device. The model is a Boosted Decision Tree Regression which takes in relevant columns and predits the arrival delay of a route. The scored labels output is the predicted arrival delay. 

In [21]:
import urllib.request
import json
import os
import ssl

def allowSelfSignedHttps(allowed):
    # bypass the server certificate verification on client side
    if allowed and not os.environ.get('PYTHONHTTPSVERIFY', '') and getattr(ssl, '_create_unverified_context', None):
        ssl._create_default_https_context = ssl._create_unverified_context

allowSelfSignedHttps(True) # this line is needed if you use self-signed certificate in your scoring service.

flights_df


# Convert dataframe to a list of dictionaries
data_to_send = flights_df.to_dict(orient='records')

data = {
    "Inputs": {
        "input1": data_to_send
    },
    "GlobalParameters": {}
}

body = str.encode(json.dumps(data))

url = 'http://d16ea803-be49-45c8-a407-8b01a550f87e.eastus2.azurecontainer.io/score'

# Replace this with the primary/secondary key or AMLToken for the endpoint
api_key = ml_api_key # You need to define ml_api_key

if not api_key:
    raise Exception("A key should be provided to invoke the endpoint")


headers = {'Content-Type':'application/json', 'Authorization':('Bearer '+ api_key)}

req = urllib.request.Request(url, body, headers)

try:
    response = urllib.request.urlopen(req)
    result = response.read()
    print(result)
    # Decode bytes to string
    json_string = result.decode('utf-8')
    # Convert string to dictionary
    data = json.loads(json_string)
    # Extract the list of dictionaries from the JSON
    results = data['Results']['WebServiceOutput0']
    # Convert the list of dictionaries to DataFrame
    result_df = pd.DataFrame(results)
    result_df
except urllib.error.HTTPError as error:
    print("The request failed with status code: " + str(error.code))
    print(error.info())
    print(error.read().decode("utf8", 'ignore'))

b'{"Results": {"WebServiceOutput0": [{"FL_DATE": "2024-06-22", "AIRLINE": "Alaska Airlines", "AIRLINE_CODE": "AS", "DOT_CODE": 0, "FL_NUMBER": 477, "ORIGIN": "EWR", "DEST": "SEA", "AIR_TIME": 372.0, "DISTANCE": 3858.7183, "ARR_DELAY": 0.0, "CANCELLED": 0, "Scored Labels": -17.374024449168747}, {"FL_DATE": "2024-06-23", "AIRLINE": "Alaska Airlines", "AIRLINE_CODE": "AS", "DOT_CODE": 0, "FL_NUMBER": 588, "ORIGIN": "SEA", "DEST": "DAL", "AIR_TIME": 235.0, "DISTANCE": 2687.8003, "ARR_DELAY": 0.0, "CANCELLED": 0, "Scored Labels": -16.68689525299471}, {"FL_DATE": "2024-06-22", "AIRLINE": "Alaska Airlines", "AIRLINE_CODE": "AS", "DOT_CODE": 0, "FL_NUMBER": 281, "ORIGIN": "EWR", "DEST": "SEA", "AIR_TIME": 370.0, "DISTANCE": 3858.7183, "ARR_DELAY": 0.0, "CANCELLED": 0, "Scored Labels": -17.374024449168747}, {"FL_DATE": "2024-06-23", "AIRLINE": "Alaska Airlines", "AIRLINE_CODE": "AS", "DOT_CODE": 0, "FL_NUMBER": 588, "ORIGIN": "SEA", "DEST": "DAL", "AIR_TIME": 235.0, "DISTANCE": 2687.8003, "ARR_

In [22]:
#formatted response as a dataframe 
result_df

Unnamed: 0,FL_DATE,AIRLINE,AIRLINE_CODE,DOT_CODE,FL_NUMBER,ORIGIN,DEST,AIR_TIME,DISTANCE,ARR_DELAY,CANCELLED,Scored Labels
0,2024-06-22,Alaska Airlines,AS,0,477,EWR,SEA,372.0,3858.7183,0.0,0,-17.374024
1,2024-06-23,Alaska Airlines,AS,0,588,SEA,DAL,235.0,2687.8003,0.0,0,-16.686895
2,2024-06-22,Alaska Airlines,AS,0,281,EWR,SEA,370.0,3858.7183,0.0,0,-17.374024
3,2024-06-23,Alaska Airlines,AS,0,588,SEA,DAL,235.0,2687.8003,0.0,0,-16.686895
4,2024-06-22,Alaska Airlines,AS,0,299,EWR,SEA,363.0,3858.7183,0.0,0,-17.374024
5,2024-06-23,Alaska Airlines,AS,0,588,SEA,DAL,235.0,2687.8003,0.0,0,-16.686895
