In [1]:
import json
import pandas as pd

In [2]:
from amadeus import Client, ResponseError

amadeus = Client(
    client_id='Iypd66o7JF7XLjpO8Qkq4ryb0Vp0bSKb',
    client_secret='d49teX5dV3xPA2SS'
)

In [3]:
try:
    response = amadeus.shopping.flight_offers_search.get(
        originLocationCode='AKL',
        destinationLocationCode='SYD',
        departureDate='2024-04-12',
        adults=1)
#     print(response.data)
except ResponseError as error:
    print(error)

In [4]:
def parse_duration(duration_str):
    """
    Parses a duration string in ISO 8601 format and returns the total duration in minutes.
    """
    hours = 0
    minutes = 0
    if 'H' in duration_str:
        hours = int(duration_str.split('H')[0].replace('PT', ''))
        if 'M' in duration_str:
            minutes = int(duration_str.split('H')[1].replace('M', ''))
    elif 'M' in duration_str:
        minutes = int(duration_str.replace('PT', '').replace('M', ''))
    return hours * 60 + minutes

# Function to categorize departure time
def time_of_day(hour):
    if 0 <= hour < 6:
        return 'Early Morning'
    elif 6 <= hour < 12:
        return 'Morning'
    elif 12 <= hour < 17:
        return 'Afternoon'
    elif 17 <= hour < 21:
        return 'Evening'
    else:
        return 'Night'
    
# Function to categorize flights as Short-haul, Medium-haul, or Long-haul
def categorize_flight_haul(duration):
    if duration <= 180: # Short-haul flights are less than or equal to 3 hours
        return 'Short-haul'
    elif duration <= 360: # Medium-haul flights are between 3 to 6 hours
        return 'Medium-haul'
    else: # Long-haul flights are more than 6 hours
        return 'Long-haul'
    
def is_overnight_flight(row):
    """
    Determine if a flight is an overnight flight.
    A flight is considered overnight if it arrives on the next day after it departs.
    """
    departure_hours = row['DepartureDateTime'].hour
    arrival_hours = row['ArrivalDateTime'].hour
    return ((arrival_hours < departure_hours) & (row['TotalDuration'] > 60)) | ((arrival_hours + 24 - departure_hours) * 60 < row['TotalDuration'])

def parse_all_data(flight_data):
    # Initialize data holders
    segments_data = []
    prices_data = []
    flights_data = []
    fares_data = []
    
    itinerary_id_counter = 1
    flight_id_counter = 1
    
    for offer in flight_data:
        for itinerary in offer["itineraries"]:
            # Parse itinerary information
            segments = itinerary["segments"]
            itinerary_duration = parse_duration(itinerary["duration"])
            # Parse segment information for each segment in the itinerary
            for segment in segments:
                aircraft_code = segment["aircraft"]["code"] if "aircraft" in segment else "Unknown"
                segment_duration_minutes = parse_duration(segment["duration"])
                segments_data.append({
                    "ItineraryID": itinerary_id_counter,
                    "FlightID": flight_id_counter,
                    "Duration": segment_duration_minutes,
                    "Departure": segment["departure"]["iataCode"],
                    "Arrival": segment["arrival"]["iataCode"],
                    "CarrierCode": segment["carrierCode"],
                    "DepartureDateTime": segment["departure"]["at"],
                    "ArrivalDateTime": segment["arrival"]["at"],
                    "NumberOfStops": segment["numberOfStops"],
                    "AircraftCode": aircraft_code
                })
                
                # Add fare details for each segment if available
                for traveler_pricing in offer["travelerPricings"]:
                    for fare_detail in traveler_pricing["fareDetailsBySegment"]:
                        if fare_detail["segmentId"] == segment["id"]:
                            fares_data.append({
                                "ItineraryID": itinerary_id_counter,
                                "FlightID": flight_id_counter,
                                "Cabin": fare_detail["cabin"],
                                "Class": fare_detail["class"],
                                "FareBasis": fare_detail["fareBasis"],
                                "IncludedCheckedBagsQuantity": fare_detail.get("includedCheckedBags", {}).get("quantity", 0)
                            })
                
                flight_id_counter += 1
            
            # Parse flight information based on the whole itinerary
            flights_data.append({
                "ItineraryID": itinerary_id_counter,
                "Departure": segments[0]["departure"]["iataCode"],
                "Arrival": segments[-1]["arrival"]["iataCode"],
                "DepartureDateTime": segments[0]["departure"]["at"],
                "ArrivalDateTime": segments[-1]["arrival"]["at"],
                "TotalDuration": itinerary_duration,
                "NumberOfStops": len(segments) - 1,
            })
            
            # Prices for each itinerary
            prices_data.append({
                "ItineraryID": itinerary_id_counter,
                "Currency": offer["price"]["currency"],
                "Total": offer["price"]["total"],
                "Base": offer["price"]["base"],
            })

            itinerary_id_counter += 1
            
    segments_data = pd.DataFrame(segments_data)
    segments_data['DepartureDateTime'] = pd.to_datetime(segments_data['DepartureDateTime'])
    segments_data['ArrivalDateTime'] = pd.to_datetime(segments_data['ArrivalDateTime'])
    
    flights_data = pd.DataFrame(flights_data)
    flights_data['DepartureDateTime'] = pd.to_datetime(flights_data['DepartureDateTime'])
    flights_data['ArrivalDateTime'] = pd.to_datetime(flights_data['ArrivalDateTime'])
    flights_data['Departure_TimeOfDay'] = flights_data['DepartureDateTime'].dt.hour.apply(time_of_day)
    flights_data['IsWeekend_Departure'] = flights_data['DepartureDateTime'].dt.dayofweek >= 5
    flights_data['FlightHaul'] = flights_data['TotalDuration'].apply(categorize_flight_haul)
    flights_data['IsNonStop'] = flights_data['NumberOfStops'] == 0
    flights_data['IsOverNightFlight'] = flights_data.apply(is_overnight_flight, axis=1)
    
    prices_data = pd.DataFrame(prices_data)
    prices_data['Total'] = prices_data['Total'].astype(float)
    prices_data['Base'] = prices_data['Base'].astype(float)
    prices_data['FareCategory'] = pd.qcut(prices_data['Total'], q=3, labels=['Low', 'Medium', 'High'])
    prices_data['TotalZScore'] = (prices_data['Total'] - prices_data['Total'].mean()) / prices_data['Total'].std()

    fares_data = pd.DataFrame(fares_data)
    fares_data['FlightsPerItinerary'] = fares_data.groupby('ItineraryID')['FlightID'].transform('count')
    
    return segments_data, prices_data, flights_data, fares_data

segments_data, prices_data, flights_data, fares_data = parse_all_data(response.data)

In [5]:
fares_data

Unnamed: 0,ItineraryID,FlightID,Cabin,Class,FareBasis,IncludedCheckedBagsQuantity,FlightsPerItinerary
0,1,1,ECONOMY,O,OEESLZ0X,0,1
1,2,2,ECONOMY,C,CLOW1,0,1
2,3,3,ECONOMY,C,CLOW1,0,1
3,4,4,ECONOMY,S,SBAGN21,1,1
4,5,5,ECONOMY,S,SBAGN21,1,1
...,...,...,...,...,...,...,...
174,97,175,ECONOMY,V,HBAGNF,1,2
175,98,176,ECONOMY,H,HBAGNF,1,2
176,98,177,ECONOMY,V,HBAGNF,1,2
177,99,178,ECONOMY,Y,YSE00DNN,2,2


In [6]:
prices_data.sample(5)

Unnamed: 0,ItineraryID,Currency,Total,Base,FareCategory,TotalZScore
87,88,EUR,551.89,493.0,High,0.848975
2,3,EUR,183.38,93.0,Low,-0.807781
59,60,EUR,283.7,209.0,Medium,-0.35676
26,27,EUR,272.52,156.0,Low,-0.407023
68,69,EUR,301.31,205.0,High,-0.277588


In [7]:
segments_data.head(10)

Unnamed: 0,ItineraryID,FlightID,Duration,Departure,Arrival,CarrierCode,DepartureDateTime,ArrivalDateTime,NumberOfStops,AircraftCode
0,1,1,220,AKL,SYD,LA,2024-04-12 07:25:00,2024-04-12 09:05:00,0,789
1,2,2,225,AKL,SYD,JQ,2024-04-12 15:55:00,2024-04-12 17:40:00,0,320
2,3,3,230,AKL,SYD,JQ,2024-04-12 06:15:00,2024-04-12 08:05:00,0,320
3,4,4,215,AKL,SYD,NZ,2024-04-12 12:50:00,2024-04-12 14:25:00,0,32Q
4,5,5,220,AKL,SYD,NZ,2024-04-12 16:20:00,2024-04-12 18:00:00,0,32Q
5,6,6,220,AKL,SYD,NZ,2024-04-12 19:50:00,2024-04-12 21:30:00,0,32Q
6,7,7,220,AKL,SYD,NZ,2024-04-12 07:00:00,2024-04-12 08:40:00,0,32Q
7,8,8,215,AKL,SYD,NZ,2024-04-12 09:00:00,2024-04-12 10:35:00,0,77W
8,9,9,215,AKL,OOL,JQ,2024-04-12 07:00:00,2024-04-12 08:35:00,0,320
9,9,10,90,OOL,SYD,JQ,2024-04-12 12:45:00,2024-04-12 14:15:00,0,321


In [35]:
flights_data.head(10)

Unnamed: 0,ItineraryID,Departure,Arrival,DepartureDateTime,ArrivalDateTime,TotalDuration,NumberOfStops,Departure_TimeOfDay,IsWeekend_Departure,FlightHaul,IsNonStop,IsOverNightFlight
0,1,AKL,SYD,2024-04-12 07:25:00,2024-04-12 09:05:00,220,0,Morning,False,Medium-haul,True,False
1,2,AKL,SYD,2024-04-12 15:55:00,2024-04-12 17:40:00,225,0,Afternoon,False,Medium-haul,True,False
2,3,AKL,SYD,2024-04-12 06:15:00,2024-04-12 08:05:00,230,0,Morning,False,Medium-haul,True,False
3,4,AKL,SYD,2024-04-12 12:50:00,2024-04-12 14:25:00,215,0,Afternoon,False,Medium-haul,True,False
4,5,AKL,SYD,2024-04-12 16:20:00,2024-04-12 18:00:00,220,0,Afternoon,False,Medium-haul,True,False
5,6,AKL,SYD,2024-04-12 19:50:00,2024-04-12 21:30:00,220,0,Evening,False,Medium-haul,True,False
6,7,AKL,SYD,2024-04-12 07:00:00,2024-04-12 08:40:00,220,0,Morning,False,Medium-haul,True,False
7,8,AKL,SYD,2024-04-12 09:00:00,2024-04-12 10:35:00,215,0,Morning,False,Medium-haul,True,False
8,9,AKL,SYD,2024-04-12 15:30:00,2024-04-12 22:15:00,525,1,Afternoon,False,Long-haul,False,False
9,10,AKL,SYD,2024-04-12 07:00:00,2024-04-12 22:15:00,1035,1,Morning,False,Long-haul,False,False


In [None]:
# from datetime import datetime

# # Define the datetime format
# fmt = '%Y-%m-%dT%H:%M:%S'

# # Parse the datetime strings
# start = datetime.strptime('2024-04-05T23:30:00', fmt)
# end = datetime.strptime('2024-04-06T00:20:00', fmt)

# # Calculate the duration in minutes
# duration = (end - start).total_seconds() / 60
# duration

In [8]:
response.data[6]

{'type': 'flight-offer',
 'id': '7',
 'source': 'GDS',
 'instantTicketingRequired': False,
 'nonHomogeneous': False,
 'oneWay': False,
 'lastTicketingDate': '2024-03-22',
 'lastTicketingDateTime': '2024-03-22',
 'numberOfBookableSeats': 7,
 'itineraries': [{'duration': 'PT3H40M',
   'segments': [{'departure': {'iataCode': 'AKL',
      'terminal': 'I',
      'at': '2024-04-12T07:00:00'},
     'arrival': {'iataCode': 'SYD',
      'terminal': '1',
      'at': '2024-04-12T08:40:00'},
     'carrierCode': 'NZ',
     'number': '101',
     'aircraft': {'code': '32Q'},
     'operating': {'carrierCode': 'NZ'},
     'duration': 'PT3H40M',
     'id': '118',
     'numberOfStops': 0,
     'blacklistedInEU': False}]}],
 'price': {'currency': 'EUR',
  'total': '200.67',
  'base': '155.00',
  'fees': [{'amount': '0.00', 'type': 'SUPPLIER'},
   {'amount': '0.00', 'type': 'TICKETING'}],
  'grandTotal': '200.67',
  'additionalServices': [{'amount': '45.00', 'type': 'CHECKED_BAGS'}]},
 'pricingOptions': {'