In [2]:
# import necessary libraries
import pandas as pd
import json
import os
import concurrent.futures
import time


from actions.done.CreateZFWMessageAction import CreateZFWMessageAction
from actions.done.UpdateEstimatesAction import UpdateEstimatesAction
from actions.done.CalculateWeightAndTrimAction import CalculateWeightAndTrimAction
from actions.done.CreateLoadsheetAction import CreateLoadsheetAction
from actions.done.UpdateFuelDataAction import UpdateFuelDataAction

In [3]:
def load_config(path):
    with open(path, 'r') as file:
        return json.load(file)

config_path = '../pandas_config.json'
config = load_config(config_path)
parquet_config = config['parquet']

# Setting up the engine as a global constant
ENGINE = parquet_config['engine']
COMPRESSION = parquet_config['compression']
FILE_PATH = "../data"

In [4]:
df = pd.read_parquet(f"{FILE_PATH}/data_parquet/processed_data_combined.parquet", engine=ENGINE)

In [5]:
df.reset_index(inplace=True)

In [6]:
KEEP_COLUMNS = ["flight_id", "action_name","entry_details", "index"]
df = df[KEEP_COLUMNS]

In [7]:
df.rename(columns={"index": "row_id"}, inplace=True)

In [8]:
os.listdir(f"actions/done")
ACTIONS = [action.replace(".py", "") for action in os.listdir(f"actions/done") if action.endswith(".py")]
df = df[df['action_name'].isin(ACTIONS)]

In [9]:
flight_ids = df['flight_id'].unique()
# Set flight_id as index for faster access
df.set_index('flight_id', inplace=True)

In [14]:
def process_flight(flight, df):
    subset_df = df.loc[flight].copy()
    subset_df.sort_values(inplace=True, by="row_id", ascending=True)
    actions = subset_df['action_name'].values
    entry_details = subset_df["entry_details"].values
    temp_estimated_zfws = []
    temp_actual_zfws = []
    temp_actual_fuels = []
    temp_actual_tows = []
    temp_min_take_off_fuels = []
    
    for entry, action in zip(entry_details, actions):
        if action == "CreateZFWMessageAction":
            estimated_zfw, actual_zfw = CreateZFWMessageAction(entry)
            if estimated_zfw is not None:
                try:
                    if estimated_zfw != temp_estimated_zfws[-1]:
                        temp_estimated_zfws.append(estimated_zfw)
                except IndexError:
                    temp_estimated_zfws.append(estimated_zfw)
            if actual_zfw is not None:
                try:
                    if actual_zfw != temp_actual_zfws[-1]:
                        temp_actual_zfws.append(actual_zfw)
                except IndexError:
                    temp_actual_zfws.append(actual_zfw)
        elif action == "UpdateEstimatesAction":
            estimated_zfw = UpdateEstimatesAction(entry)
            if estimated_zfw is not None:
                try:
                    if estimated_zfw != temp_estimated_zfws[-1]:
                        temp_estimated_zfws.append(estimated_zfw)
                except IndexError:
                    temp_estimated_zfws.append(estimated_zfw)
            if actual_zfw is not None:
                try:
                    if actual_zfw != temp_actual_zfws[-1]:
                        temp_actual_zfws.append(actual_zfw)
                except IndexError:
                    temp_actual_zfws.append(actual_zfw)
        elif action == "CalculateWeightAndTrimAction":
            actual_zfw = CalculateWeightAndTrimAction(entry)
            if actual_zfw is not None:
                try:
                    if actual_zfw != temp_actual_zfws[-1]:
                        temp_actual_zfws.append(actual_zfw)
                except IndexError:
                    temp_actual_zfws.append(actual_zfw)
        elif action == "CreateLoadsheetAction":
            actual_tow, actual_zfw, estimated_zfw, actual_fuel = CreateLoadsheetAction(entry)
            if actual_zfw is not None:
                try:
                    if actual_zfw != temp_actual_zfws[-1]:
                        temp_actual_zfws.append(actual_zfw)
                except IndexError:
                    temp_actual_zfws.append(actual_zfw)
            if estimated_zfw is not None:
                try:
                    if estimated_zfw != temp_estimated_zfws[-1]:
                        temp_estimated_zfws.append(estimated_zfw)
                except IndexError:
                    temp_estimated_zfws.append(estimated_zfw)
            if actual_fuel is not None:
                try: 
                    if actual_fuel != temp_actual_fuels[-1]:
                        temp_actual_fuels.append(actual_fuel)
                except IndexError:
                    temp_actual_fuels.append(actual_fuel)
            if actual_tow is not None:
                try:
                    if actual_tow != temp_actual_tows[-1]:
                        temp_actual_tows.append(actual_tow)
                except IndexError:
                    temp_actual_tows.append(actual_tow)
        elif action == "UpdateFuelDataAction":
            actual_fuel, minimum_tof = UpdateFuelDataAction(entry)
            if actual_fuel is not None:
                try: 
                    if actual_fuel != temp_actual_fuels[-1]:
                        temp_actual_fuels.append(actual_fuel)
                except IndexError:
                    temp_actual_fuels.append(actual_fuel)
            if minimum_tof is not None:
                try: 
                    if minimum_tof != temp_min_take_off_fuels[-1]:
                        temp_min_take_off_fuels.append(minimum_tof)
                except IndexError:
                    temp_min_take_off_fuels.append(minimum_tof)
    
    return flight, temp_estimated_zfws, temp_actual_zfws, temp_actual_fuels, temp_actual_tows, temp_min_take_off_fuels

def collect_results(result):
    flight, temp_estimated_zfws, temp_actual_zfws, temp_actual_fuels, temp_actual_tows, temp_min_take_off_fuels = result
    estimated_zfws[flight] = temp_estimated_zfws
    actual_zfws[flight] = temp_actual_zfws
    actual_take_off_fuels[flight] = temp_actual_fuels
    actual_tows[flight] = temp_actual_tows
    minimum_take_off_fuels[flight] = temp_min_take_off_fuels

start = time.time()
estimated_zfws = {}
actual_zfws = {}
actual_take_off_fuels = {}
actual_tows = {}
minimum_take_off_fuels = {}

with concurrent.futures.ThreadPoolExecutor() as executor:
    futures = [executor.submit(process_flight, flight, df) for flight in flight_ids]
    for future in concurrent.futures.as_completed(futures):
        collect_results(future.result())

end = time.time()
print(end - start)


91.49910807609558


In [21]:
estimated_zfws_tuples = [(k, v) for k, v in estimated_zfws.items()]
actual_zfws_tuples = [(k, v) for k, v in actual_zfws.items()]
actual_take_off_fuels_tuples = [(k, v) for k, v in actual_take_off_fuels.items()]
minimum_take_off_fuels_tuples = [(k, v) for k, v in minimum_take_off_fuels.items()]
actual_tows_tuples = [(k, v) for k, v in actual_tows.items()]
# Create DataFrame
estimated_zfws_df = pd.DataFrame(estimated_zfws_tuples, columns=['flight_number', 'estimated_zfws'])
actual_zfws_df = pd.DataFrame(actual_zfws_tuples, columns=['flight_number', 'actual_zfws'])
actual_take_off_fuels_df = pd.DataFrame(actual_take_off_fuels_tuples, columns=['flight_number', 'actual_take_off_fuels'])
minimum_take_off_fuels_df = pd.DataFrame(minimum_take_off_fuels_tuples, columns=['flight_number', 'minimum_take_off_fuels'])
actual_tows_df = pd.DataFrame(actual_tows_tuples, columns=['flight_number', 'actual_tows'])
weights = pd.merge(estimated_zfws_df, actual_zfws_df, on='flight_number', how="outer")
weights = pd.merge(weights, actual_take_off_fuels_df, on='flight_number', how="outer")
weights = pd.merge(weights, minimum_take_off_fuels_df, on='flight_number', how="outer")
weights = pd.merge(weights, actual_tows_df, on='flight_number', how="outer")


In [22]:
weights

Unnamed: 0,flight_number,estimated_zfws,actual_zfws,actual_take_off_fuels,minimum_take_off_fuels,actual_tows
0,AB-1070-2024-1-5-BOM,"[45446, 63200]","[61795, 62383, 6231070, 62332, 62038, 62551, 6...",[13659],[],[75454]
1,AB-1070-2024-15-5-BOM,[],[45446],[],[],[]
2,AB-1070-2024-16-5-BOM,[],[45446],[],[],[]
3,AB-1070-2024-18-5-BOM,[],[45438],[],[],[]
4,AB-1070-2024-19-5-BOM,[],[45438],[],[],[]
...,...,...,...,...,...,...
14805,ZY-99-2024-4-5-REC,[13734],"[13656, 13734, 13746]",[],[],[]
14806,ZY-99-2024-6-5-VCP,[45241],[45241],[],[],[]
14807,ZY-9900-2024-6-5-VCP,[162000],"[122604, 142814, 153124, 162000, 122584, 12267...",[67487],[],[]
14808,ZY-9902-2024-6-5-VCP,[162000],"[142814, 162000, 161410, 120514, 141720, 13830...",[24540],[],[]


In [23]:
weights.to_parquet("../data/data_parquet/weights.parquet", engine=ENGINE, compression=COMPRESSION)