In [1]:
import pandas as pd
from tqdm.auto import tqdm
import actions
from pathlib import Path
from utils import *
import re
from tqdm.auto import tqdm
import json

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define the file paths
parquet_file_abcd = Path("../data/ABCD_tripfiles.parquet")
parquet_file_abcd_conv = Path("../data/ABCD_tripfiles_conv.parquet")
parquet_file_mnop = Path("../data/MNOP_tripfiles.parquet")
parquet_file_mnop_conv = Path("../data/MNOP_tripfiles_conv.parquet")
parquet_file_zyxw = Path("../data/ZYXW_tripfiles.parquet")
parquet_file_zyxw_conv = Path("../data/ZYXW_tripfiles_conv.parquet")
print(
    parquet_file_abcd,
    parquet_file_abcd_conv,
    parquet_file_mnop,
    parquet_file_mnop_conv,
    parquet_file_zyxw,
    parquet_file_zyxw_conv,
)

..\data\ABCD_tripfiles.parquet ..\data\ABCD_tripfiles_conv.parquet ..\data\MNOP_tripfiles.parquet ..\data\MNOP_tripfiles_conv.parquet ..\data\ZYXW_tripfiles.parquet ..\data\ZYXW_tripfiles_conv.parquet


In [3]:
df_abcd = pd.read_parquet(parquet_file_abcd)
df_mnop = pd.read_parquet(parquet_file_mnop)
df_zyxw = pd.read_parquet(parquet_file_zyxw)

In [4]:
print(round(df_abcd.memory_usage(deep=True).sum() / 1024**2, 2), "MB")
print(round(df_mnop.memory_usage(deep=True).sum() / 1024**2, 2), "MB")
print(round(df_zyxw.memory_usage(deep=True).sum() / 1024**2, 2), "MB")

1214.94 MB
2219.62 MB
585.54 MB


In [5]:
action_names = set(df_abcd.action_name.unique())
print(len(action_names))
action_names.update(set(df_mnop.action_name.unique()))
print(len(action_names))
action_names.update(set(df_zyxw.action_name.unique()))
print(len(action_names))

49
62
67


In [6]:
action_extractors = {
    "CalculateWeightAndTrimAction": None,
    "CheckinMsgProcessor": None,
    "CreateLoadsheetAction": None,
    "CreateZFWMessageAction": None,
    "SendFuelOrderAction": None,
    "SendLoadsheetAction": None,
    "SetActualBagWeightIndicatorAction": None,
    "StorePaxDataAction": None,
    "StoreRegistrationAndConfigurationAc": None,
    "UpdateFuelDataAction": None,
    "RampFinalAction": actions.RampFinalAction.extract,
    "CreateLoadingInstructionAction": None,
    "CopyPaxDataAction": None,
    "CreateBaggageLoadItemsAction": None,
    "EstimateStorePaxDataAction": actions.EstimateStorePaxDataAction.extract,
    "SendLoadingInstructionAction": None,
    "SetCKIPaxDistributionAction": actions.SetCKIPaxDistributionAction.extract,
    "SpecialPaxWeightAction": None,
    "StoreAircraftDataAction": None,
    "StoreCKIAverageWeightAction": None,
    "StorePaxDataGuiAction": None,
    "TdmCreateLoadingInstructionAction": actions.TdmCreateLoadingInstructionAction.extract,
    "TransferCargoAction": None,
    "TransferCheckinDataAction": None,
    "UpdateCargoMailEstimatesAction": None,
    "UpdateEstimatesAction": actions.UpdateEstimatesAction.extract,
    "UpdateLastEzfwSentAction": None,
    "UpdateLoadTableAction": None,
}

In [7]:
def extract_df(df: pd.DataFrame):
    tqdm.pandas()
    for action_name, extractor in action_extractors.items():

        if extractor is not None:
            print(action_name)

            df[f"data_{action_name}"] = df[df.action_name == action_name][
                "entry_details"
            ].progress_apply(extractor)
    return df


df_abcd_conv = extract_df(df_abcd)
df_abcd_conv.to_parquet(parquet_file_abcd_conv, engine="pyarrow", compression="brotli")

df_mnop_conv = extract_df(df_mnop)
df_mnop_conv.to_parquet(parquet_file_mnop_conv, engine="pyarrow", compression="brotli")

df_zyxw_conv = extract_df(df_zyxw)
df_zyxw_conv.to_parquet(parquet_file_zyxw_conv, engine="pyarrow", compression="brotli")

RampFinalAction


100%|██████████| 3804/3804 [00:00<00:00, 28133.46it/s]


EstimateStorePaxDataAction


100%|██████████| 5069/5069 [00:00<00:00, 38836.29it/s]


{"estimated_Y": "189", "estimated_Jump": "NULL", "estimated_Standby": "0", "estimated_Male": "121", "estimated_Female": "59", "estimated_Child": "9", "estimated_Infant": "4", "estimated_Bags": "135", "estimated_BWgt": "1644.00", "estimated_Average_BWgt": "12.18"}
{"estimated_Y": "162", "estimated_Jump": "NULL", "estimated_Standby": "0", "estimated_Male": "83", "estimated_Female": "68", "estimated_Child": "11", "estimated_Infant": "2", "estimated_Bags": "0", "estimated_BWgt": "0.00", "estimated_Average_BWgt": "NULL"}
{"estimated_Y": "151", "estimated_Jump": "NULL", "estimated_Standby": "0", "estimated_Male": "99", "estimated_Female": "45", "estimated_Child": "7", "estimated_Infant": "6", "estimated_Bags": "102", "estimated_BWgt": "1149.00", "estimated_Average_BWgt": "11.26"}
{"estimated_Y": "164", "estimated_Jump": "NULL", "estimated_Standby": "0", "estimated_Male": "92", "estimated_Female": "63", "estimated_Child": "9", "estimated_Infant": "2", "estimated_Bags": "110", "estimated_BWgt"

0it [00:00, ?it/s]


UpdateEstimatesAction


100%|██████████| 1870/1870 [00:00<00:00, 10524.68it/s]


RampFinalAction


100%|██████████| 5612/5612 [00:00<00:00, 36020.40it/s]

EstimateStorePaxDataAction



100%|██████████| 4633/4633 [00:00<00:00, 773852.51it/s]


SetCKIPaxDistributionAction


0it [00:00, ?it/s]


UpdateEstimatesAction


100%|██████████| 174/174 [00:00<00:00, 10875.30it/s]


In [None]:
df = df_mnop


x = df[
    (df.action_name == "TdmCreateLoadingInstructionAction")
    # & (~df["entry_details"].isnull())
    & (
        ~df["entry_details"].str.contains(
            "com.systemone.lc2.common.dto.SingleAttributeDTO", na=False
        )
    )
    & (
        ~df["entry_details"].str.contains("STATUS LOADING_INSTRUCTION", na=False)
    )  # STATUS LOZYING_INSTRUCTION also included
    & (~df["entry_details"].str.contains("STATUS LOZYING_INSTRUCTION", na=False))
    & (
        ~df["entry_details"].str.contains("Email receivers", na=False)
    )  # don't include this
    & (
        ~df["entry_details"].str.contains("Telex receivers", na=False)
    )  # don't include this
]["entry_details"].unique()

In [None]:
print(x[2])

IndexError: index 2 is out of bounds for axis 0 with size 0

In [13]:
def extract(message: str):
    typos = {
        "BAG_LOZY_ITEMS_GEN": "BAG_LOAD_ITEMS_GEN",
        "LOZYING_INSTRUCTION": "LOADING_INSTRUCTION",
        "LOZYSHEET": "LOADSHEET",
    }  # Fix typos in the messages from zyxw
    for key, value in typos.items():
        message = message.replace(key, value)

    if "com.systemone.lc2.common.dto.SingleAttributeDTO" in message:
        return None
    if "STATUS LOADING_INSTRUCTION" in message:
        """ 
        Example Message: 'STATUS LOADING_INSTRUCTION 1 FUEL 2 AIRCRAFT_CONFIG 1 EZFW 1 
        CARGO_FINAL 1 CARGO_TRANSFER 1 OFP 2 CABIN_CONFIG 1 AUTO_MODE_ACTIVE 1 AUTOMATION_STARTED 0 
        BAG_LOAD_ITEMS_GEN 1 EZFW_COUNTER 1 REGISTRATION 1 REGISTRATION_CHANGE 1 FUEL_ORDER 2'
        
        All the keys are separated by a space and the values are the next element in the list.
        """
        keys = [
            "LOADING_INSTRUCTION",
            "FUEL",
            "AIRCRAFT_CONFIG",
            "EZFW",
            "CARGO_FINAL",
            "CARGO_TRANSFER",
            "OFP",
            "CABIN_CONFIG",
            "AUTO_MODE_ACTIVE",
            "AUTOMATION_STARTED",
            "BAG_LOAD_ITEMS_GEN",
            "EZFW_COUNTER",
            "REGISTRATION",
            "REGISTRATION_CHANGE",
            "FUEL_ORDER",
            "DGR_ITEMS",
            "CHECK_IN_FINAL",
            "OFFBLOCK",
            "AIRBORNE",
            "BAG_ULD_ORD",
            "CALC_HIST_DATA",
        ]

        # Initialize dictionary with None for all keys
        data = {key: None for key in keys}

        # Split the message into parts
        parts = message.split()

        # Iterate through parts and extract key-value pairs
        i = 0
        while i < len(parts):
            if parts[i] in keys:
                key = parts[i]
                if i + 1 < len(parts) and re.match(r"^-?\d+(\.\d+)?$", parts[i + 1]):
                    value = parts[i + 1]
                    data[key] = int(value)
                    i += 2
                else:
                    i += 1
            else:
                i += 1

        return  json.dumps(data)
    
    if "Telex receivers" in message or "Email receivers" in message: 
       return None #not relevant enough
    raise NotImplementedError("This message is not supported yet")

In [11]:
tqdm.pandas()

In [14]:
foo = df_mnop[df_mnop.action_name == "TdmCreateLoadingInstructionAction"][
    "entry_details"
].progress_apply(extract)

100%|██████████| 1182/1182 [00:00<00:00, 38129.45it/s]


In [27]:
foo[foo.str.contains("LOADING_INSTRUCTION", na=False)].count()

394

In [90]:
print(extract(x[0]))

{
  "Telex receivers": "ASRV1ZY",
  "Message type": "LOADING_INSTRUCTION",
  "Subject": "ZY2111/03MAY BVH-MCZ; Loading Instruction Edition 01\r",
  "Attachment file name": null,
  "Compartments": [],
  "Notes": null
}


In [None]:

x["data_UpdateEstimatesAction"] = x[x.action_name == "UpdateEstimatesAction"][
    "entry_details"
].progress_apply(extract)

AttributeError: 'numpy.ndarray' object has no attribute 'action_name'

In [None]:
x = df_abcd.copy()