In [1]:
import pandas as pd
from tqdm.auto import tqdm
import actions
from pathlib import Path
from utils import *
import re
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Define the file paths
parquet_file_abcd = Path("../data/ABCD_tripfiles.parquet")
parquet_file_abcd_conv = Path("../data/ABCD_tripfiles_conv.parquet")
parquet_file_mnop = Path("../data/MNOP_tripfiles.parquet")
parquet_file_mnop_conv = Path("../data/MNOP_tripfiles_conv.parquet")
parquet_file_zyxw = Path("../data/ZYXW_tripfiles.parquet")
parquet_file_zyxw_conv = Path("../data/ZYXW_tripfiles_conv.parquet")
print(
    parquet_file_abcd,
    parquet_file_abcd_conv,
    parquet_file_mnop,
    parquet_file_mnop_conv,
    parquet_file_zyxw,
    parquet_file_zyxw_conv,
)

..\data\ABCD_tripfiles.parquet ..\data\ABCD_tripfiles_conv.parquet ..\data\MNOP_tripfiles.parquet ..\data\MNOP_tripfiles_conv.parquet ..\data\ZYXW_tripfiles.parquet ..\data\ZYXW_tripfiles_conv.parquet


In [3]:
df_abcd = pd.read_parquet(parquet_file_abcd)
df_mnop = pd.read_parquet(parquet_file_mnop)
df_zyxw = pd.read_parquet(parquet_file_zyxw)

In [4]:
print(round(df_abcd.memory_usage(deep=True).sum() / 1024**2, 2), "MB")
print(round(df_mnop.memory_usage(deep=True).sum() / 1024**2, 2), "MB")
print(round(df_zyxw.memory_usage(deep=True).sum() / 1024**2, 2), "MB")

1252.74 MB
2278.75 MB
608.15 MB


In [5]:
action_names = set(df_abcd.action_name.unique())
print(len(action_names))
action_names.update(set(df_mnop.action_name.unique()))
print(len(action_names))
action_names.update(set(df_zyxw.action_name.unique()))
print(len(action_names))

49
62
67


In [6]:
import actions.EstimateStorePaxDataAction
import actions.UpdateFuelDataAction


action_extractors = {
    "CalculateWeightAndTrimAction": None,
    "CheckinMsgProcessor": None,
    "CreateLoadsheetAction": None,
    "CreateZFWMessageAction": None,
    "SendFuelOrderAction": None,
    "SendLoadsheetAction": None,
    "SetActualBagWeightIndicatorAction": None,
    "StorePaxDataAction": None,
    "StoreRegistrationAndConfigurationAc": None,
    "UpdateFuelDataAction": actions.UpdateFuelDataAction.extract,
    "RampFinalAction": actions.RampFinalAction.extract,
    "CreateLoadingInstructionAction": None,
    "CopyPaxDataAction": None,
    "CreateBaggageLoadItemsAction": None,
    "EstimateStorePaxDataAction": actions.EstimateStorePaxDataAction.extract,
    "SendLoadingInstructionAction": None,
    "SetCKIPaxDistributionAction": actions.SetCKIPaxDistributionAction.extract,
    "SpecialPaxWeightAction": None,
    "StoreAircraftDataAction": None,
    "StoreCKIAverageWeightAction": None,
    "StorePaxDataGuiAction": None,
    "TdmCreateLoadingInstructionAction": None,
    "TransferCargoAction": None,
    "TransferCheckinDataAction": None,
    "UpdateCargoMailEstimatesAction": None,
    "UpdateEstimatesAction": actions.UpdateEstimatesAction.extract,
    "UpdateLastEzfwSentAction": None,
    "UpdateLoadTableAction": None,
}

In [7]:
def extract_df(df: pd.DataFrame):
    tqdm.pandas()
    for action_name, extractor in action_extractors.items():

        if extractor is not None:
            print(action_name)

            df[f"data_{action_name}"] = df[df.action_name == action_name][
                "entry_details"
            ].progress_apply(extractor)
    return df


df_abcd_conv = extract_df(df_abcd)
df_abcd_conv.to_parquet(parquet_file_abcd_conv, engine="pyarrow", compression="brotli")

df_mnop_conv = extract_df(df_mnop)
df_mnop_conv.to_parquet(parquet_file_mnop_conv, engine="pyarrow", compression="brotli")

df_zyxw_conv = extract_df(df_zyxw)
df_zyxw_conv.to_parquet(parquet_file_zyxw_conv, engine="pyarrow", compression="brotli")

UpdateFuelDataAction


100%|██████████| 3583/3583 [00:00<00:00, 102304.28it/s]

RampFinalAction



100%|██████████| 3804/3804 [00:00<00:00, 63692.89it/s]

EstimateStorePaxDataAction



100%|██████████| 5069/5069 [00:00<00:00, 138842.34it/s]


SetCKIPaxDistributionAction


0it [00:00, ?it/s]


UpdateEstimatesAction


100%|██████████| 1870/1870 [00:00<00:00, 17236.58it/s]


UpdateFuelDataAction


100%|██████████| 4845/4845 [00:00<00:00, 156189.93it/s]


RampFinalAction


100%|██████████| 5612/5612 [00:00<00:00, 61926.30it/s]


EstimateStorePaxDataAction


100%|██████████| 4633/4633 [00:00<00:00, 1163326.77it/s]

SetCKIPaxDistributionAction



0it [00:00, ?it/s]


UpdateEstimatesAction


100%|██████████| 174/174 [00:00<00:00, 17407.49it/s]


UpdateFuelDataAction


100%|██████████| 125/125 [00:00<00:00, 128691.21it/s]


RampFinalAction


100%|██████████| 65/65 [00:00<00:00, 68311.14it/s]


EstimateStorePaxDataAction


100%|██████████| 8681/8681 [00:00<00:00, 1101120.54it/s]


SetCKIPaxDistributionAction


100%|██████████| 11598/11598 [00:00<00:00, 76867.89it/s]


UpdateEstimatesAction


0it [00:00, ?it/s]


In [8]:
df = df_abcd


x = df[
    (df.action_name == "UpdateEstimatesAction")
    # & (~df["entry_details"].isnull())
    # & (
    #     ~df["entry_details"].str.contains(
    #         "com.onesystem.lc2.estimateshandling.dto.EstimateWeightsDTO", na=False
    #     )
    # )
    # & (
    #     ~df["entry_details"].str.contains(
    #         "com.systemone.lc2.estimateshandling.dto.EstimateWeightsDTO", na=False
    #     )
    # )
    & (
        ~df["entry_details"].str.contains("Pax Weight =", na=False)
    )  # don't include this
]["entry_details"].unique()

In [9]:
"""TOTAL Pax: 6    Y: 6  Jump: 0    StandBy: NULL  Male: 2  Female: 2  Child: 2  Infant: 0  Total bag: 6  Total bag weight: 72.0 KG  Baggage weight type: HISTORIC\r\n 

Distribution        : CKI_DISTRIBUTION         \r\n 
Section             : 0A                       0C                       \r\n 
Capacity            : Y72                      Y102                     \r\n 
Distribution        : Y0                       Y0"""

'TOTAL Pax: 6    Y: 6  Jump: 0    StandBy: NULL  Male: 2  Female: 2  Child: 2  Infant: 0  Total bag: 6  Total bag weight: 72.0 KG  Baggage weight type: HISTORIC\r\n \n\nDistribution        : CKI_DISTRIBUTION         \r\n \nSection             : 0A                       0C                       \r\n \nCapacity            : Y72                      Y102                     \r\n \nDistribution        : Y0                       Y0'

In [10]:
foo: set = set()

In [11]:
for message in x:
    items = message.split(" ")
    for item in items:
        if not item.isdigit():
            foo.add(item)

In [12]:
foo

{'',
 '377.0',
 '458.0',
 '12785.0',
 '176.0',
 '2130.0',
 '2000.0',
 '194.0',
 '16237.0',
 '262.0',
 '15446.0',
 '18699.0',
 '104.0',
 '53500.0',
 '132.0',
 '764.0',
 '16064.0',
 '18923.0',
 '16500.0',
 '13965.0',
 '414.0',
 '44951.0',
 '45047.0',
 '15185.0',
 '14585.0',
 '16116.0',
 '16052.0',
 '2500.0',
 '8.0',
 '12990.0',
 '16754.0',
 '1365.0',
 '16370.0',
 '810.0',
 '16524.0',
 '13775.0',
 '15708.0',
 '12619.0',
 '15400.0',
 '6463.0',
 '12650.0',
 '11030.0',
 '13280.0',
 '151.0',
 '12610.0',
 '14029.0',
 '58400.0',
 '12450.0',
 '80.0',
 '16534.0',
 '12105.0',
 '16541.0',
 '51000.0',
 '14250.0',
 '16223.0',
 '55800.0',
 '14652.0',
 '15534.0',
 '16517.0',
 '55500.0',
 '16739.0',
 '52500.0',
 '794.0',
 '13875.0',
 '14065.0',
 '378.0',
 '3014.0',
 '14595.0',
 '131.0',
 '16526.0',
 '14954.0',
 '11397.0',
 '18702.0',
 '17716.0',
 '64200.0',
 '17769.0',
 '14239.0',
 '25.0',
 '12338.0',
 '675.0',
 '13985.0',
 '15026.0',
 '11555.0',
 '247.0',
 '43.0',
 '720.0',
 '14155.0',
 '287.0',
 '2112

In [13]:

x["data_UpdateEstimatesAction"] = x[x.action_name == "UpdateEstimatesAction"][
    "entry_details"
].progress_apply(extract)

AttributeError: 'numpy.ndarray' object has no attribute 'action_name'

In [None]:
x = df_abcd.copy()