### Import JSONs from Folders

In [1]:
# 1) Sift Through JSONS and import all of them into pandas DF 

# Data location
data_location = "/Users/dunya/Desktop/capstone_bfm/airport_hubs"



In [2]:
import os     
from pprintpp import pprint

# LINK => https://stackoverflow.com/questions/19932130/iterate-through-folders-then-subfolders-and-print-filenames-with-path-to-text-f                                                                                                               

# Gather list of file paths into array

def list_files(dir):                                                                                                  
    r = []                                              # list to store paths                                                                    
    subdirs = [x[0] for x in os.walk(dir)]              # collect subfolders    => AIRPORT_HUBS                                                   
    for subdir in subdirs:                                                                                            
        files = os.walk(subdir).__next__()[2]           # collect subsubfolders => DATES                                                     
        if (len(files) > 0):                                                                                          
            for file in files:                                                                                        
                r.append(os.path.join(subdir, file))    # append file paths to list                                                                      
    return r  

In [3]:
# Get array of file paths
path_list = list_files(data_location)

# Check file path length
# (3 Dates) x (5 Destinations) x (30 Hubs) => 450 JSONs
pprint(len(path_list))

pprint(path_list[1])

450
'/Users/dunya/Desktop/capstone_bfm/airport_hubs/DXB/2023-11-22T00:00:00/BOM.json'


### Define Helper Methods/Functions

In [4]:
# Helper to create dictionary of the description lists from BFM

def make_dict(obj_list):
    new_dict = {}
    for obj in obj_list:
        reference_num = obj["id"]
        new_dict[f"{reference_num}"] = obj
    return new_dict

In [5]:
# Helper to append Schedules to Legs

def append_schedules(leg_list, sched_list):

    # Make dictionaries so its easier to access
    schedule_dict = make_dict(sched_list)

    new_leg_list = []
    for leg in leg_list:
        leg_sched_ref = leg["schedules"][0]["ref"]  # get reference number
        new_leg = schedule_dict[str(leg_sched_ref)] # key into schedules using ref
        new_leg_list.append(new_leg)                # append
    return new_leg_list

In [6]:
# Helper to append Legs to Itineraries

import copy

def append_legs(itinerary_list, leg_list):

    # Make dictionaries so its easier to access
    leg_dict = make_dict(leg_list)

    # Copy itinerary list to add leg info to
    new_itinerary_list = copy.deepcopy(itinerary_list)

    for itin in new_itinerary_list:
        itin_leg_ref = itin["legs"][0]["ref"]        # get reference number
        leg_info = leg_dict[str(itin_leg_ref)]       # key into legs using ref
        itin["legs"] = [leg_info]                    # attach leg info to itin

    return new_itinerary_list

### Extract Relevant Features from JSONs

In [7]:
# Function to extract relevant features from combined BFM Descriptions

def extract_features(itinerary_list):
    prepped_itineraries = []
    for itin in itinerary_list:
        # pprint(itin) # => Original Itinerary
        prepped_itinerary = {}
        # Get features from each item for "legs" portion
        destination_iata = itin["legs"][0]["arrival"]["airport"]
        destination_time = itin["legs"][0]["arrival"]["time"]
        destination_country = itin["legs"][0]["arrival"]["country"]
        origin_iata = itin["legs"][0]["departure"]["airport"]
        origin_time = itin["legs"][0]["departure"]["time"]
        origin_country = itin["legs"][0]["departure"]["country"]
        flight_duration = itin["legs"][0]["elapsedTime"]
        flight_frequency = itin["legs"][0]["frequency"]
        flight_distance = itin["legs"][0]["totalMilesFlown"]
        stop_count = itin["legs"][0]["stopCount"]
        aircraft_type = itin["legs"][0]["carrier"]["equipment"]["code"] # I think

        # Get features from each item for "pricing" portion
        total_price = itin["pricingInformation"][0]["fare"]["totalFare"]["totalPrice"]
        total_tax = itin["pricingInformation"][0]["fare"]["totalFare"]["totalTaxAmount"]
        validating_carrier = itin["pricingInformation"][0]["fare"]["validatingCarrierCode"]
        booking_code = itin["pricingInformation"][0]["fare"]["passengerInfoList"][0]["passengerInfo"]["fareComponents"][0]["segments"][0]["segment"]["bookingCode"]
        cabin_code = itin["pricingInformation"][0]["fare"]["passengerInfoList"][0]["passengerInfo"]["fareComponents"][0]["segments"][0]["segment"]["cabinCode"]
        seats_available = itin["pricingInformation"][0]["fare"]["passengerInfoList"][0]["passengerInfo"]["fareComponents"][0]["segments"][0]["segment"]["seatsAvailable"]

        # Organize into new object of relevant features
        prepped_itinerary["origin"] = origin_iata
        prepped_itinerary["origin_country"] = origin_country
        prepped_itinerary["departure_time"] = origin_time 
        prepped_itinerary["destination"] = destination_iata
        prepped_itinerary["destination_country"] = destination_country
        prepped_itinerary["arrival_time"] = destination_time
        prepped_itinerary["duration"] = flight_duration
        prepped_itinerary["distance"] = flight_distance
        prepped_itinerary["frequency"] = flight_frequency
        prepped_itinerary["num_stops"] = stop_count
        prepped_itinerary["aircraft_type"] = aircraft_type
        prepped_itinerary["validating_carrier"] = validating_carrier
        prepped_itinerary["booking_code"] = booking_code
        prepped_itinerary["cabin_code"] = cabin_code
        prepped_itinerary["num_seats_available"] = seats_available
        prepped_itinerary["total_tax"] = total_tax
        prepped_itinerary["total_price"] = total_price

        # Append to list
        prepped_itineraries.append(prepped_itinerary)
    return prepped_itineraries

### Function to read JSON files and Reformat using methods above

In [8]:
import json

# Read in the JSON data
test_path = path_list[1]
  
# Helper to read text file
def read_text_file(file_path):
    with open(file_path, 'r') as f:
        return f.read()
   
# Function to iterate through all files
def load_json(path):

    # Extract Origin, Destination, and Timestamp from File Path    
    path_components = path.split('/')
    origin = path_components[6]
    destination = path_components[8].split('.')[0]  # remove ".json"
            # pprint((origin, destination, timestamp))
    timestamp = path_components[7]                  # PROBABLY ONLY NEED THIS


    # Read File Path
    string_data = read_text_file(path)
    data = json.loads(string_data)                  # maybe put loads() method into read_text_file()

    # Get necessary Descs lists
    schedule_descs = data["groupedItineraryResponse"]["scheduleDescs"]
    leg_descs = data["groupedItineraryResponse"]["legDescs"]
    itinerary_descs = data["groupedItineraryResponse"]["itineraryGroups"][0]["itineraries"]

    # Append schedules to legs
    new_legs = append_schedules(leg_descs, schedule_descs)

    # Append legs to itineraries
    new_itins = append_legs(itinerary_descs, new_legs)

    # Check Length and Format
            # pprint(new_itins)
            # print(len(new_itins))

    # Extract Relevant Features
    nice_itins = extract_features(new_itins)
            # pprint(nice_itins)
    
    # Add Timestamp to Features
    for nice in nice_itins:
        nice["timestamp"] = timestamp

    return nice_itins

### Iterate Through Path List, apply Load JSON to get Info

In [9]:
import numpy as np

# Iterate Path List
prepared_data = []
for path in path_list:
    info = load_json(path)
            # print(f"\nPREPARED JSON FOR PATH {path}: \n")
            # pprint(info)
    prepared_data.append(info)

# Flatten prepared data since it is list of lists
print("Prepared Data Length BEFORE: ", len(prepared_data))
flattened = np.concatenate( prepared_data , axis=0 )   # flatten list elements in prepared_data
print("Prepared Data Length AFTER: ", len(flattened))

('DXB', 'KWI', '2023-11-22T00:00:00')

PREPARED JSON FOR PATH /Users/dunya/Desktop/capstone_bfm/airport_hubs/DXB/2023-11-22T00:00:00/KWI.json: 

[
    {
        'aircraft_type': '77W',
        'arrival_time': '02:15:00+03:00',
        'booking_code': 'K',
        'cabin_code': 'Y',
        'departure_time': '01:25:00+04:00',
        'destination': 'KWI',
        'destination_country': 'KW',
        'distance': 530,
        'duration': 110,
        'frequency': 'SMTWTFS',
        'num_seats_available': 7,
        'num_stops': 0,
        'origin': 'DXB',
        'origin_country': 'AE',
        'timestamp': '2023-11-22T00:00:00',
        'total_price': 128.5,
        'total_tax': 65.5,
        'validating_carrier': 'FZ',
    },
    {
        'aircraft_type': '7M8',
        'arrival_time': '19:45:00+03:00',
        'booking_code': 'K',
        'cabin_code': 'Y',
        'departure_time': '19:00:00+04:00',
        'destination': 'KWI',
        'destination_country': 'KW',
        'distance'

TypeError: Object of type ndarray is not JSON serializable

### Save Prepped Data as JSON

In [17]:
# Save File
with open('prepared_data.json', 'w') as outf:
    outf.write(json.dumps(list(flattened)))

### Commented Raw BFM Payload

In [19]:
# FROM legDescs (# Relevant Features, # Optional Features)

{
    "id": 6,
    "elapsedTime": 185,
    "schedules": [
        {
            "ref": 5
        }
    ]
}

{'id': 6, 'elapsedTime': 185, 'schedules': [{'ref': 5}]}

In [20]:
# FROM itineraryGroups -> itineraries (# Relevant Features, # Optional Features)

{
    "id": 1,
    "pricingSource": "ADVJR1",
    "legs": [
        {
            "ref": 6
        }
    ],
    "pricingInformation": [
        {
            "pricingSubsource": "HPIS",
            "fare": {
                "validatingCarrierCode": "AI",
                "vita": true,
                "eTicketable": true,
                "lastTicketDate": "2023-11-22",
                "lastTicketTime": "11:45",
                "governingCarriers": "AI",
                "passengerInfoList": [
                    {
                        "passengerInfo": {
                            "passengerType": "ADT",
                            "passengerNumber": 1,
                            "nonRefundable": false,
                            "fareComponents": [
                                {
                                    "ref": 2,
                                    "beginAirport": "DXB",
                                    "endAirport": "BOM",
                                    "segments": [
                                        {
                                            "segment": {
                                                "bookingCode": "K",
                                                "cabinCode": "Y",
                                                "mealCode": "M",
                                                "seatsAvailable": 9,
                                                "availabilityBreak": True
                                            }
                                        }
                                    ]
                                }
                            ],
                            "taxes": [],
                            "taxSummaries": [],
                            "currencyConversion": {
                                "from": "AED",
                                "to": "USD",
                                "exchangeRateUsed": 0.27229691
                            },
                            "passengerTotalFare": {
                                "totalFare": 349.9,
                                "totalTaxAmount": 33.9,
                                "currency": "USD",
                                "baseFareAmount": 1160,
                                "baseFareCurrency": "AED",
                                "equivalentAmount": 316,
                                "equivalentCurrency": "USD",
                                "constructionAmount": 315.83,
                                "constructionCurrency": "NUC",
                                "exchangeRateOne": 3.67275
                            },
                            "baggageInformation": [
                                {
                                    "provisionType": "A",
                                    "airlineCode": "AI",
                                    "segments": [
                                        {
                                            "id": 0
                                        }
                                    ],
                                    "allowance": {
                                        "ref": 3
                                    }
                                }
                            ]
                        }
                    }
                ],
                "totalFare": {
                    "totalPrice": 349.9,
                    "totalTaxAmount": 33.9,
                    "currency": "USD",
                    "baseFareAmount": 1160,
                    "baseFareCurrency": "AED",
                    "constructionAmount": 315.83,
                    "constructionCurrency": "NUC",
                    "equivalentAmount": 316,
                    "equivalentCurrency": "USD"
                },
                "validatingCarriers": [
                    {
                        "ref": 4
                    }
                ]
            }
        }
    ],
    "diversitySwapper": {
        "weighedPrice": 121.992
    }
}

NameError: name 'true' is not defined