In [1]:
import pandas as pd
import numpy as np
import random
import re
from tqdm import tqdm

# **Load ABCD Airline File**

In [2]:
df = pd.read_parquet("../data/ABCD_tripfiles.parquet", engine="pyarrow")
df.head()

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,header_line,entry_details
0,33902381,2024-05-01 09:14:42,AB,2359,1,BLR,service-acco,ASMMsgProcessor,"2024-05-01 09:14:42,420 INFO [3f326d134d9de45...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2..."
1,33910427,2024-05-01 10:28:12,AB,2104,2,BLR,service-acco,ASMMsgProcessor,"2024-05-01 10:28:12,745 INFO [de24be75947007f...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2..."
2,33910380,2024-05-01 10:28:11,AB,2109,2,BLR,service-acco,ASMMsgProcessor,"2024-05-01 10:28:11,301 INFO [df3a1cc7d574161...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2..."
3,33909790,2024-05-01 10:28:12,AB,2110,2,BOM,service-acco,ASMMsgProcessor,"2024-05-01 10:28:12,189 INFO [9e66dfb1ffe9048...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2..."
4,33902706,2024-05-01 09:14:41,AB,2127,2,BOM,service-acco,ASMMsgProcessor,"2024-05-01 09:14:41,884 INFO [829566f527b588f...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2..."


In [3]:
len(df)

709613

In [4]:
df.columns

Index(['id', 'creation_time', 'airline_code', 'flight_number', 'flight_date',
       'departure_airport', 'user_name', 'action_name', 'header_line',
       'entry_details'],
      dtype='object')

# **Actions**

In [5]:
unique_actions = df.action_name.unique()
len(unique_actions)

49

In [6]:
unique_actions

array(['ASMMsgProcessor', 'AcceptTransitDataAction',
       'AcceptTransitLoadAction', 'AcceptTransitNotocAction',
       'AssignLCCAction', 'AssignLoadplanAction',
       'AssignUnassignViewAction', 'CalculateWeightAndTrimAction',
       'CargoFinalAction', 'ChangeFlightLegStateAction',
       'CheckinMsgProcessor', 'CloseLegAction',
       'CreatePostDepartureMessagesAction',
       'CreateLoadingInstructionAction', 'CreateLoadsheetAction',
       'CreateZFWMessageAction', 'CreateBaggageLoadItemsAction',
       'ClearFlightsAction', 'InternalCreateLoadingInstructionAct',
       'InternalCreateLoadsheetAction', 'FlightPlanFiguresInMsgProcessor',
       'RampFinalAction', 'CrewMsgProcessor', 'PAXBOOKINGINMsgProcessor',
       'EstimateStorePaxDataAction', 'ReopenLegAction',
       'GetCabinConfigurationsAction', 'FuelDataInitializer',
       'SendPostDepartureMessagesAction',
       'SetActualBagWeightIndicatorAction', 'TransferCheckinDataAction',
       'StoreAircraftDataAction', 'Spe

In [7]:
df.action_name.value_counts()

action_name
CalculateWeightAndTrimAction           231990
StorePaxDataAction                     150871
CheckinMsgProcessor                    133782
UpdateLoadTableAction                   23554
ASMMsgProcessor                         22438
AssignLCCAction                         18162
UpdateFlightAction                       9468
PAXBOOKINGINMsgProcessor                 8878
ChangeFlightLegStateAction               8836
StoreRegistrationAndConfigurationAc      8036
UpdateSupplementaryInfoAction            7936
UpdateCrewDataAction                     7173
TransferCheckinDataAction                6586
CreateLoadingInstructionAction           6408
CrewMsgProcessor                         5444
EstimateStorePaxDataAction               5069
CreateLoadsheetAction                    5055
CreateZFWMessageAction                   5005
RampFinalAction                          3804
UpdateFuelDataAction                     3583
CreatePostDepartureMessagesAction        3406
AssignUnassignViewActi

In [8]:
df.user_name.value_counts()

user_name
service-acco    574407
human           135206
Name: count, dtype: int64

# **Parse Entry Details**

## Standard Inputs

In [9]:
standard_df = df[['id', 'creation_time', 'airline_code', 'flight_number', 'flight_date',
       'departure_airport', 'user_name', 'action_name']]

standard_df

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name
0,33902381,2024-05-01 09:14:42,AB,2359,1,BLR,service-acco,ASMMsgProcessor
1,33910427,2024-05-01 10:28:12,AB,2104,2,BLR,service-acco,ASMMsgProcessor
2,33910380,2024-05-01 10:28:11,AB,2109,2,BLR,service-acco,ASMMsgProcessor
3,33909790,2024-05-01 10:28:12,AB,2110,2,BOM,service-acco,ASMMsgProcessor
4,33902706,2024-05-01 09:14:41,AB,2127,2,BOM,service-acco,ASMMsgProcessor
...,...,...,...,...,...,...,...,...
709608,33875192,2024-05-01 04:07:01,AB,2372,30,GAU,service-acco,StorePaxDataAction
709609,33875261,2024-05-01 04:08:01,AB,2372,30,GAU,service-acco,StorePaxDataAction
709610,33875264,2024-05-01 04:08:01,AB,2372,30,GAU,service-acco,StorePaxDataAction
709611,33875306,2024-05-01 04:09:01,AB,2372,30,GAU,service-acco,StorePaxDataAction


## CalculateWeightAndTrimAction           

### Calculates the weight and balance (trim) of the aircraft to ensure safe operation.

In [10]:
calc_weight_and_trim_action_df = df[df.action_name == "CalculateWeightAndTrimAction"]
calc_weight_and_trim_action_df.head(10)

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,header_line,entry_details
46743,34496931,2024-05-07 09:16:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,477 INFO [7698c4068c8ec2a...",START_WI weight : 45102.00 KG ...
46744,34496930,2024-05-07 09:16:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,477 INFO [7698c4068c8ec2a...",com.onesystem.lc2.common.dto.SingleAttributeDT...
46745,34496925,2024-05-07 09:16:01,AB,2562,7,CCU,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,217 INFO [ea50cb2e6df087f...",START_WI weight : 44170.00 KG ...
46746,34496924,2024-05-07 09:16:01,AB,2562,7,CCU,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,216 INFO [ea50cb2e6df087f...",com.onesystem.lc2.common.dto.SingleAttributeDT...
46747,34496840,2024-05-07 09:15:01,AB,2108,7,AMD,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:15:01,749 INFO [74c9c1cee35afd3...",START_WI weight : 44562.00 KG ...
46748,34496039,2024-05-07 09:15:01,AB,2108,7,AMD,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:15:01,749 INFO [74c9c1cee35afd3...",com.onesystem.lc2.common.dto.SingleAttributeDT...
46749,34496761,2024-05-07 09:14:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:14:01,390 INFO [8c8bbee3ced523c...",START_WI weight : 45102.00 KG ...
46750,34496760,2024-05-07 09:14:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:14:01,389 INFO [8c8bbee3ced523c...",com.onesystem.lc2.common.dto.SingleAttributeDT...
46751,34496279,2024-05-07 09:14:00,AB,2562,7,CCU,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:14:00,896 INFO [b1d4bc9078c7f47...",START_WI weight : 44170.00 KG ...
46752,34496278,2024-05-07 09:14:00,AB,2562,7,CCU,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:14:00,896 INFO [b1d4bc9078c7f47...",com.onesystem.lc2.common.dto.SingleAttributeDT...


### parse random sample

In [11]:
def write_to_file(text: str, fileneame: str):
    with open(fileneame, "w") as file:
        file.write(text)

In [12]:
# CalculateWeightAndTrimAction random sample
random_index = random.randint(0, len(calc_weight_and_trim_action_df) - 1)
random_sample = str(calc_weight_and_trim_action_df.iloc[random_index].entry_details)
pd.set_option('display.max_colwidth', None)
random_sample

'com.onesystem.lc2.common.dto.SingleAttributeDTO[ id = NULL  deleted = false  legId = 544314 ]'

In [13]:
write_to_file(random_sample, "message.txt")

In [14]:
extracted_data = None

if "onesystem" in random_sample:
    extracted_data = random_sample
else:
    pattern = re.compile(r'(\w+(\s\w+)*)\s*:\s*([^\s]+)')

    # find all matches
    matches = pattern.findall(random_sample)

    # extracted keys and values
    extracted_data = {match[0].strip(): match[2] for match in matches}

print(extracted_data)

com.onesystem.lc2.common.dto.SingleAttributeDTO[ id = NULL  deleted = false  legId = 544314 ]


### parse all

In [15]:
# create columns and store information from entry_deatails in them
for idx, row in tqdm(calc_weight_and_trim_action_df.iterrows(), total=calc_weight_and_trim_action_df.shape[0], desc="Processing rows"):
    details = row["entry_details"]

    # skip system row because of lacking information
    if "onesystem" in details:
        pass
    else:
        # pattern to extract infos from string
        pattern = re.compile(r'(\w+(\s\w+)*)\s*:\s*([^\s]+)')
        matches = pattern.findall(details)
        extracted_data = {match[0].strip(): match[2] for match in matches}
        
        # iterate all rows
        for key, value in extracted_data.items():
            # add columns from entry_details keys if not exist
            if key not in calc_weight_and_trim_action_df.columns:
                calc_weight_and_trim_action_df[str(key)] = np.nan
            # write data in entry_details key columns
            calc_weight_and_trim_action_df.at[idx, key] = value

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  calc_weight_and_trim_action_df[str(key)] = np.nan
  calc_weight_and_trim_action_df.at[idx, key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  calc_weight_and_trim_action_df[str(key)] = np.nan
  calc_weight_and_trim_action_df.at[idx, key] = value
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy

In [16]:
calc_weight_and_trim_action_df.head(2)

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,header_line,entry_details,...,TAIL_TIPPING_INDEX_EXCEEDED,FWD_MOVABLE_PAX,AFT_MOVABLE_PAX,INDEX_OUT_OF_BALANCE,LOAD_TO_AFT,LOAD_TO_FWD,ESTIMATED_TRAFFIC_LOAD,ESTIMATED_ZFW,DELTA_ZFW,ZFW_TOLERANCE_EXCEEDED
46743,34496931,2024-05-07 09:16:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,477 INFO [7698c4068c8ec2ad] [AB2384 /07 BBI | service-acco | CalculateWeightAndTrimAction | | Thread-63646 (ActiveMQ-client-global-threads)] Saved:",START_WI weight : 45102.00 KG START_WI index : 37.23 \r\n DO_WI weight : 45601.00 KG DO_WI index : 35.98 \r\n PAX_WI weight : 13540.00 KG PAX_WI index : 1.43 \r\n TOTAL_DEADLOAD_WI weight : 47415.00 KG TOTAL_DEADLOAD_WI index : 47.95 \r\n TOTAL_LOAD_WI : 1814.00 KG TOTAL_TRAFFIC_LOAD : 15354.00 KG \r\n FUEL_INDEX : 4.65 \r\n AZFW : 60955.00 KG ATOW : 71631.00 KG \r\n ALAW : 67568.00 KG ATXW : 71829.00 KG \r\n AFT_LIMIT_ZFW : 68.35 FWD_LIMIT_ZFW : 24.85 \r\n AFT_LIMIT_TOW : 77.89 FWD_LIMIT_TOW : 29.40 \r\n AFT_LIMIT_LAW : NULL FWD_LIMIT_LAW : NULL \r\n LIZFW : 49.37 LITOW : 54.02 \r\n LILAW : 54.83 MAC_AT_ZFW : 21.64 \r\n MAC_AT_TOW : 22.86 MAC_AT_LAW : 23.29 \r\n DEADLOAD_MAC : 21.42 \r\n UNDERLOAD : 1390.00 KG \r\n LIMITING_WEIGHT : LIMITING_WEIGHT_LAW \r\n ALLOWED TOW : 73021.00 KG ALLOWED ZFW : 60955.00 KG \r\n ALLOWED LAW : 67568.00 KG ALLOWED TXW : 71829.00 KG \r\n STABTO : NULL \r\n OPTIMAL_TRIM : 56.45 \r\n IDEAL_ADDITIONAL_LOAD_AFT : 459.51 KG IDEAL_ADDITIONAL_LOAD_FWD : -459.51 KG \r\n TAIL_TIPPING_WI weight : 60963.39 KG TAIL_TIPPING_WI index : 100.18 \r\n TAIL_TIPPING_INDEX_EXCEEDED : false \r\n FWD_MOVABLE_PAX : NULL AFT_MOVABLE_PAX : NULL \r\n INDEX_OUT_OF_BALANCE : NULL \r\n LOAD_TO_AFT : NULL LOAD_TO_FWD : NULL \r\n ESTIMATED_TRAFFIC_LOAD : 15354.00 KG ESTIMATED_ZFW : 60955.00 KG \r\n DELTA_ZFW : -245.00 KG ZFW_TOLERANCE_EXCEEDED : NULL,...,False,,,,,,15354.0,60955.0,-245.0,
46744,34496930,2024-05-07 09:16:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,477 INFO [7698c4068c8ec2ad] [AB2384 /07 BBI | service-acco | CalculateWeightAndTrimAction | | Thread-63646 (ActiveMQ-client-global-threads)] Received:",com.onesystem.lc2.common.dto.SingleAttributeDTO[ id = NULL deleted = false legId = 545343 ],...,,,,,,,,,,


In [17]:
calc_weight_and_trim_action_df.columns

Index(['id', 'creation_time', 'airline_code', 'flight_number', 'flight_date',
       'departure_airport', 'user_name', 'action_name', 'header_line',
       'entry_details', 'START_WI weight', 'START_WI index', 'DO_WI weight',
       'DO_WI index', 'PAX_WI weight', 'PAX_WI index',
       'TOTAL_DEADLOAD_WI weight', 'TOTAL_DEADLOAD_WI index', 'TOTAL_LOAD_WI',
       'TOTAL_TRAFFIC_LOAD', 'FUEL_INDEX', 'AZFW', 'ATOW', 'ALAW', 'ATXW',
       'AFT_LIMIT_ZFW', 'FWD_LIMIT_ZFW', 'AFT_LIMIT_TOW', 'FWD_LIMIT_TOW',
       'AFT_LIMIT_LAW', 'FWD_LIMIT_LAW', 'LIZFW', 'LITOW', 'LILAW',
       'MAC_AT_ZFW', 'MAC_AT_TOW', 'MAC_AT_LAW', 'DEADLOAD_MAC', 'UNDERLOAD',
       'LIMITING_WEIGHT', 'ALLOWED TOW', 'ALLOWED ZFW', 'ALLOWED LAW',
       'ALLOWED TXW', 'STABTO', 'OPTIMAL_TRIM', 'IDEAL_ADDITIONAL_LOAD_AFT',
       'IDEAL_ADDITIONAL_LOAD_FWD', 'TAIL_TIPPING_WI weight',
       'TAIL_TIPPING_WI index', 'TAIL_TIPPING_INDEX_EXCEEDED',
       'FWD_MOVABLE_PAX', 'AFT_MOVABLE_PAX', 'INDEX_OUT_OF_BALANCE',
  

In [18]:
calc_weight_and_trim_action_df.head()

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,header_line,entry_details,...,TAIL_TIPPING_INDEX_EXCEEDED,FWD_MOVABLE_PAX,AFT_MOVABLE_PAX,INDEX_OUT_OF_BALANCE,LOAD_TO_AFT,LOAD_TO_FWD,ESTIMATED_TRAFFIC_LOAD,ESTIMATED_ZFW,DELTA_ZFW,ZFW_TOLERANCE_EXCEEDED
46743,34496931,2024-05-07 09:16:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,477 INFO [7698c4068c8ec2ad] [AB2384 /07 BBI | service-acco | CalculateWeightAndTrimAction | | Thread-63646 (ActiveMQ-client-global-threads)] Saved:",START_WI weight : 45102.00 KG START_WI index : 37.23 \r\n DO_WI weight : 45601.00 KG DO_WI index : 35.98 \r\n PAX_WI weight : 13540.00 KG PAX_WI index : 1.43 \r\n TOTAL_DEADLOAD_WI weight : 47415.00 KG TOTAL_DEADLOAD_WI index : 47.95 \r\n TOTAL_LOAD_WI : 1814.00 KG TOTAL_TRAFFIC_LOAD : 15354.00 KG \r\n FUEL_INDEX : 4.65 \r\n AZFW : 60955.00 KG ATOW : 71631.00 KG \r\n ALAW : 67568.00 KG ATXW : 71829.00 KG \r\n AFT_LIMIT_ZFW : 68.35 FWD_LIMIT_ZFW : 24.85 \r\n AFT_LIMIT_TOW : 77.89 FWD_LIMIT_TOW : 29.40 \r\n AFT_LIMIT_LAW : NULL FWD_LIMIT_LAW : NULL \r\n LIZFW : 49.37 LITOW : 54.02 \r\n LILAW : 54.83 MAC_AT_ZFW : 21.64 \r\n MAC_AT_TOW : 22.86 MAC_AT_LAW : 23.29 \r\n DEADLOAD_MAC : 21.42 \r\n UNDERLOAD : 1390.00 KG \r\n LIMITING_WEIGHT : LIMITING_WEIGHT_LAW \r\n ALLOWED TOW : 73021.00 KG ALLOWED ZFW : 60955.00 KG \r\n ALLOWED LAW : 67568.00 KG ALLOWED TXW : 71829.00 KG \r\n STABTO : NULL \r\n OPTIMAL_TRIM : 56.45 \r\n IDEAL_ADDITIONAL_LOAD_AFT : 459.51 KG IDEAL_ADDITIONAL_LOAD_FWD : -459.51 KG \r\n TAIL_TIPPING_WI weight : 60963.39 KG TAIL_TIPPING_WI index : 100.18 \r\n TAIL_TIPPING_INDEX_EXCEEDED : false \r\n FWD_MOVABLE_PAX : NULL AFT_MOVABLE_PAX : NULL \r\n INDEX_OUT_OF_BALANCE : NULL \r\n LOAD_TO_AFT : NULL LOAD_TO_FWD : NULL \r\n ESTIMATED_TRAFFIC_LOAD : 15354.00 KG ESTIMATED_ZFW : 60955.00 KG \r\n DELTA_ZFW : -245.00 KG ZFW_TOLERANCE_EXCEEDED : NULL,...,False,,,,,,15354.0,60955.0,-245.0,
46744,34496930,2024-05-07 09:16:01,AB,2384,7,BBI,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,477 INFO [7698c4068c8ec2ad] [AB2384 /07 BBI | service-acco | CalculateWeightAndTrimAction | | Thread-63646 (ActiveMQ-client-global-threads)] Received:",com.onesystem.lc2.common.dto.SingleAttributeDTO[ id = NULL deleted = false legId = 545343 ],...,,,,,,,,,,
46745,34496925,2024-05-07 09:16:01,AB,2562,7,CCU,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,217 INFO [ea50cb2e6df087f1] [AB2562 /07 CCU | service-acco | CalculateWeightAndTrimAction | | Thread-63644 (ActiveMQ-client-global-threads)] Saved:",START_WI weight : 44170.00 KG START_WI index : 39.79 \r\n DO_WI weight : 44669.00 KG DO_WI index : 41.22 \r\n PAX_WI weight : 6485.00 KG PAX_WI index : 7.38 \r\n TOTAL_DEADLOAD_WI weight : 45174.00 KG TOTAL_DEADLOAD_WI index : 44.25 \r\n TOTAL_LOAD_WI : 505.00 KG TOTAL_TRAFFIC_LOAD : 6990.00 KG \r\n FUEL_INDEX : 6.22 \r\n AZFW : 51659.00 KG ATOW : 61047.00 KG \r\n ALAW : 55103.00 KG ATXW : 61245.00 KG \r\n AFT_LIMIT_ZFW : 55.25 FWD_LIMIT_ZFW : 27.05 \r\n AFT_LIMIT_TOW : 63.79 FWD_LIMIT_TOW : 21.32 \r\n AFT_LIMIT_LAW : NULL FWD_LIMIT_LAW : NULL \r\n LIZFW : 51.63 LITOW : 57.85 \r\n LILAW : 52.63 MAC_AT_ZFW : 22.91 \r\n MAC_AT_TOW : 24.76 MAC_AT_LAW : 23.14 \r\n DEADLOAD_MAC : 19.65 \r\n UNDERLOAD : 13855.00 KG \r\n LIMITING_WEIGHT : LIMITING_WEIGHT_LAW \r\n ALLOWED TOW : 74902.00 KG ALLOWED ZFW : 51659.00 KG \r\n ALLOWED LAW : 55103.00 KG ALLOWED TXW : 61245.00 KG \r\n STABTO : NULL \r\n OPTIMAL_TRIM : 43.35 \r\n IDEAL_ADDITIONAL_LOAD_AFT : -537.73 KG IDEAL_ADDITIONAL_LOAD_FWD : 537.73 KG \r\n TAIL_TIPPING_WI weight : 59070.00 KG TAIL_TIPPING_WI index : 77.84 \r\n TAIL_TIPPING_INDEX_EXCEEDED : false \r\n FWD_MOVABLE_PAX : NULL AFT_MOVABLE_PAX : NULL \r\n INDEX_OUT_OF_BALANCE : NULL \r\n LOAD_TO_AFT : NULL LOAD_TO_FWD : NULL \r\n ESTIMATED_TRAFFIC_LOAD : 6990.00 KG ESTIMATED_ZFW : 51659.00 KG \r\n DELTA_ZFW : -1441.00 KG ZFW_TOLERANCE_EXCEEDED : NULL,...,False,,,,,,6990.0,51659.0,-1441.0,
46746,34496924,2024-05-07 09:16:01,AB,2562,7,CCU,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:16:01,216 INFO [ea50cb2e6df087f1] [AB2562 /07 CCU | service-acco | CalculateWeightAndTrimAction | | Thread-63644 (ActiveMQ-client-global-threads)] Received:",com.onesystem.lc2.common.dto.SingleAttributeDTO[ id = NULL deleted = false legId = 545512 ],...,,,,,,,,,,
46747,34496840,2024-05-07 09:15:01,AB,2108,7,AMD,service-acco,CalculateWeightAndTrimAction,"2024-05-07 09:15:01,749 INFO [74c9c1cee35afd36] [AB2108 /07 AMD | service-acco | CalculateWeightAndTrimAction | | Thread-63647 (ActiveMQ-client-global-threads)] Saved:",START_WI weight : 44562.00 KG START_WI index : 37.43 \r\n DO_WI weight : 45061.00 KG DO_WI index : 38.86 \r\n PAX_WI weight : 12950.00 KG PAX_WI index : 12.09 \r\n TOTAL_DEADLOAD_WI weight : 46605.00 KG TOTAL_DEADLOAD_WI index : 35.67 \r\n TOTAL_LOAD_WI : 1544.00 KG TOTAL_TRAFFIC_LOAD : 14494.00 KG \r\n FUEL_INDEX : 6.00 \r\n AZFW : 59555.00 KG ATOW : 69096.00 KG \r\n ALAW : 66523.00 KG ATXW : 69349.00 KG \r\n AFT_LIMIT_ZFW : 66.45 FWD_LIMIT_ZFW : 21.65 \r\n AFT_LIMIT_TOW : 74.66 FWD_LIMIT_TOW : 25.20 \r\n AFT_LIMIT_LAW : NULL FWD_LIMIT_LAW : NULL \r\n LIZFW : 47.76 LITOW : 53.76 \r\n LILAW : 54.63 MAC_AT_ZFW : 21.07 \r\n MAC_AT_TOW : 22.87 MAC_AT_LAW : 23.28 \r\n DEADLOAD_MAC : 15.53 \r\n UNDERLOAD : 2435.00 KG \r\n LIMITING_WEIGHT : LIMITING_WEIGHT_LAW \r\n ALLOWED TOW : 71531.00 KG ALLOWED ZFW : 59555.00 KG \r\n ALLOWED LAW : 66523.00 KG ALLOWED TXW : 69349.00 KG \r\n STABTO : NULL \r\n OPTIMAL_TRIM : 54.55 \r\n IDEAL_ADDITIONAL_LOAD_AFT : 440.70 KG IDEAL_ADDITIONAL_LOAD_FWD : -440.70 KG \r\n TAIL_TIPPING_WI weight : 59800.83 KG TAIL_TIPPING_WI index : 97.89 \r\n TAIL_TIPPING_INDEX_EXCEEDED : false \r\n FWD_MOVABLE_PAX : NULL AFT_MOVABLE_PAX : NULL \r\n INDEX_OUT_OF_BALANCE : NULL \r\n LOAD_TO_AFT : NULL LOAD_TO_FWD : NULL \r\n ESTIMATED_TRAFFIC_LOAD : 14494.00 KG ESTIMATED_ZFW : 59555.00 KG \r\n DELTA_ZFW : -1745.00 KG ZFW_TOLERANCE_EXCEEDED : NULL,...,False,,,,,,14494.0,59555.0,-1745.0,


In [19]:
# drop columns for join with defined standard df (cols alread exist there)
calc_weight_and_trim_action_df = calc_weight_and_trim_action_df.drop(columns=['creation_time', 'airline_code', 'flight_number', 'flight_date',
       'departure_airport', 'user_name', 'action_name', 'header_line',
       'entry_details'])

In [20]:
# merge calc_weight_and_trim_action_df with the standard df with left join
standard_df = pd.merge(standard_df, calc_weight_and_trim_action_df, on='id', how='left')
standard_df.columns

Index(['id', 'creation_time', 'airline_code', 'flight_number', 'flight_date',
       'departure_airport', 'user_name', 'action_name', 'START_WI weight',
       'START_WI index', 'DO_WI weight', 'DO_WI index', 'PAX_WI weight',
       'PAX_WI index', 'TOTAL_DEADLOAD_WI weight', 'TOTAL_DEADLOAD_WI index',
       'TOTAL_LOAD_WI', 'TOTAL_TRAFFIC_LOAD', 'FUEL_INDEX', 'AZFW', 'ATOW',
       'ALAW', 'ATXW', 'AFT_LIMIT_ZFW', 'FWD_LIMIT_ZFW', 'AFT_LIMIT_TOW',
       'FWD_LIMIT_TOW', 'AFT_LIMIT_LAW', 'FWD_LIMIT_LAW', 'LIZFW', 'LITOW',
       'LILAW', 'MAC_AT_ZFW', 'MAC_AT_TOW', 'MAC_AT_LAW', 'DEADLOAD_MAC',
       'UNDERLOAD', 'LIMITING_WEIGHT', 'ALLOWED TOW', 'ALLOWED ZFW',
       'ALLOWED LAW', 'ALLOWED TXW', 'STABTO', 'OPTIMAL_TRIM',
       'IDEAL_ADDITIONAL_LOAD_AFT', 'IDEAL_ADDITIONAL_LOAD_FWD',
       'TAIL_TIPPING_WI weight', 'TAIL_TIPPING_WI index',
       'TAIL_TIPPING_INDEX_EXCEEDED', 'FWD_MOVABLE_PAX', 'AFT_MOVABLE_PAX',
       'INDEX_OUT_OF_BALANCE', 'LOAD_TO_AFT', 'LOAD_TO_FWD',
    

In [21]:
standard_df.head()

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,START_WI weight,START_WI index,...,TAIL_TIPPING_INDEX_EXCEEDED,FWD_MOVABLE_PAX,AFT_MOVABLE_PAX,INDEX_OUT_OF_BALANCE,LOAD_TO_AFT,LOAD_TO_FWD,ESTIMATED_TRAFFIC_LOAD,ESTIMATED_ZFW,DELTA_ZFW,ZFW_TOLERANCE_EXCEEDED
0,33902381,2024-05-01 09:14:42,AB,2359,1,BLR,service-acco,ASMMsgProcessor,,,...,,,,,,,,,,
1,33910427,2024-05-01 10:28:12,AB,2104,2,BLR,service-acco,ASMMsgProcessor,,,...,,,,,,,,,,
2,33910380,2024-05-01 10:28:11,AB,2109,2,BLR,service-acco,ASMMsgProcessor,,,...,,,,,,,,,,
3,33909790,2024-05-01 10:28:12,AB,2110,2,BOM,service-acco,ASMMsgProcessor,,,...,,,,,,,,,,
4,33902706,2024-05-01 09:14:41,AB,2127,2,BOM,service-acco,ASMMsgProcessor,,,...,,,,,,,,,,


## StorePaxDataAction        
### Stores passenger data, ensuring all relevant information is captured in the system.             

In [354]:
store_pax_data_df = df[(df["action_name"] == "StorePaxDataAction")]
store_pax_data_df.head(2)

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,header_line,entry_details
558742,33810692,2024-04-30 13:11:25,AB,2439,1,BOM,human,StorePaxDataAction,"2024-04-30 13:11:25,230 INFO [d5c1b88577a18f08] [AB2439 /01 BOM | human | StorePaxDataAction | | default task-13] Received:",com.onesystem.lc2.paxactuals.dto.PaxDataDTO [ id = 2271338 ]\r\n Baggage weight type: HISTORIC \r\nBLR \r\n Y Jump Standby Male Female Child Infant Bags BWgt Average \r\nLoadsheet 174 NULL 0 174 0 0 0 0 0.00 KG NULL \r\n\r\n Distribution : STANDARD_DISTRIBUTION \r\n Section : 0A 0B 0C 0D 0E 0F \r\n Capacity : Y12 Y36 Y30 Y30 Y30 Y36 \r\n Distribution : Y0 Y0 Y0 Y0 Y0 Y0
558743,33810699,2024-04-30 13:11:25,AB,2439,1,BOM,human,StorePaxDataAction,"2024-04-30 13:11:25,232 INFO [d5c1b88577a18f08] [AB2439 /01 BOM | human | StorePaxDataAction | | default task-13] Saved:",TOTAL Pax: 174 Y: 174 Jump: 0 StandBy: NULL Male: 174 Female: 0 Child: 0 Infant: 0 Total bag: 0 Total bag weight: 0.0 KG Baggage weight type: HISTORIC\r\n Distribution : STANDARD_DISTRIBUTION \r\n Section : 0A 0B 0C 0D 0E 0F \r\n Capacity : Y12 Y36 Y30 Y30 Y30 Y36 \r\n Distribution : Y12 Y36 Y30 Y30 Y30 Y36 \r\n Undistributed pax : 0 \r\nSTATUS AIRCRAFT_CONFIG 1 EZFW 1 CARGO_TRANSFER 1 CABIN_CONFIG 1 CALC_HIST_DATA 1 AUTO_MODE_ACTIVE 1 AUTOMATION_STARTED 0 EZFW_COUNTER 1 REGISTRATION 1 REGISTRATION_CHANGE 5


In [333]:
random_sample = df[df["action_name"] == "StorePaxDataAction"].iloc[random.randint(0, len(df[df["action_name"] == "StorePaxDataAction"]))]

write_to_file(str(random_sample["entry_details"]), "message2.txt")
print("---")
print(random_sample["user_name"])
print(random_sample["header_line"])
print(random_sample["entry_details"])

---
service-acco
2024-05-03 05:06:00,778 INFO  [fa41de1cf6d49697] [AB2486 /03 IXZ | service-acco | StorePaxDataAction                  |                                | Thread-61777 (ActiveMQ-client-global-threads)]  Received:
com.onesystem.lc2.paxactuals.dto.PaxDataDTO [ id = 2295980 ]
 Baggage weight type: ACTUAL         
MAA            
                Y              Jump           Standby        Male           Female         Child          Infant         Bags           BWgt           Average        
Checkin         170            NULL           NULL           90             71             9              0              139            1466.00 KG     10.55 KG       
BLR            
                Y              Jump           Standby        Male           Female         Child          Infant         Bags           BWgt           Average        
Checkin         14             NULL           NULL           4              9              1              0              12             138.

In [358]:
def add_to_df(df, extracted_data):
    for key, value in extracted_data.items():
            # add columns from entry_details keys if not exist
            if key not in df.columns:
                df[str(key)] = np.nan
            # write data in entry_details key columns
            df.at[idx, key] = value

# create columns and store information from entry_deatails in them
for idx, row in tqdm(store_pax_data_df.iterrows(), total=store_pax_data_df.shape[0], desc="Processing rows"):
    details = row["entry_details"]

    # skip system row because of lacking information
    if "com.onesystem.lc2.paxactuals.dto.PaxDataDTO" in details:
        # pattern to extract infos from string
        pattern = re.compile(
        r'(?P<type>Checkin|Loadsheet)\s+'
        r'(?P<Y>\d+|\w+)\s+'
        r'(?P<Jump>\d+|\w+)\s+'
        r'(?P<Standby>\d+|\w+)\s+'
        r'(?P<Male>\d+|\w+)\s+'
        r'(?P<Female>\d+|\w+)\s+'
        r'(?P<Child>\d+|\w+)\s+'
        r'(?P<Infant>\d+|\w+)\s+'
        r'(?P<Bags>\d+|\w+)\s+'
        r'(?P<BWgt>[0-9.]+|\w+)\s+'
        #r'(?P<Average>[0-9.]+|\w+)'
    )
        # find match from pattern
        match = pattern.search(details)
        

        # add to teh data frame if match
        if match:
            extracted_data = match.groupdict()
            add_to_df(store_pax_data_df, extracted_data)

        else:
            print("No match found")
            print(details)
            break

    elif "TOTAL Pax" in details:
        pattern = re.compile(
            r'TOTAL Pax:\s*(?P<TOTAL_Pax>\d+)\s*'
            r'Y:\s*(?P<Y>\d+)\s*'
            r'Jump:\s*(?P<Jump>\d+|NULL)\s*'
            r'StandBy:\s*(?P<Standby>\d+|NULL)\s*'
            r'Male:\s*(?P<Male>\d+|NULL)\s*'
            r'Female:\s*(?P<Female>\d+|NULL)\s*'
            r'Child:\s*(?P<Child>\d+|NULL)\s*'
            r'Infant:\s*(?P<Infant>\d+|NULL)\s*'
            r'Total bag:\s*(?P<Bags>\d+)\s*'
            r'Total bag weight:\s*(?P<BWgt>[\d.]+ KG)\s*'
            r'Baggage weight type:\s*(?P<Baggage_weight_type>\w+)'
        )

        # Extracting matches
        match = pattern.search(details)

        # Extracted key-value pairs
        if match:
            extracted_data = match.groupdict()
            #print(extracted_data)
            add_to_df(store_pax_data_df, extracted_data)
        else:
            print("----")
            print("No match found")
            print(details)
            break
                
    else:
        break
        print("unexpected format")
        print(details)

Processing rows: 100%|██████████| 150871/150871 [00:17<00:00, 8399.23it/s]


In [356]:
store_pax_data_df.columns

Index(['id', 'creation_time', 'airline_code', 'flight_number', 'flight_date',
       'departure_airport', 'user_name', 'action_name', 'header_line',
       'entry_details', 'type', 'Y', 'Jump', 'Standby', 'Male', 'Female',
       'Child', 'Infant', 'Bags', 'BWgt', 'TOTAL_Pax', 'Baggage_weight_type'],
      dtype='object')

In [357]:
store_pax_data_df.head()

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,header_line,entry_details,...,Jump,Standby,Male,Female,Child,Infant,Bags,BWgt,TOTAL_Pax,Baggage_weight_type
558742,33810692,2024-04-30 13:11:25,AB,2439,1,BOM,human,StorePaxDataAction,"2024-04-30 13:11:25,230 INFO [d5c1b88577a18f08] [AB2439 /01 BOM | human | StorePaxDataAction | | default task-13] Received:",com.onesystem.lc2.paxactuals.dto.PaxDataDTO [ id = 2271338 ]\r\n Baggage weight type: HISTORIC \r\nBLR \r\n Y Jump Standby Male Female Child Infant Bags BWgt Average \r\nLoadsheet 174 NULL 0 174 0 0 0 0 0.00 KG NULL \r\n\r\n Distribution : STANDARD_DISTRIBUTION \r\n Section : 0A 0B 0C 0D 0E 0F \r\n Capacity : Y12 Y36 Y30 Y30 Y30 Y36 \r\n Distribution : Y0 Y0 Y0 Y0 Y0 Y0,...,,0.0,174,0,0,0,0,0.00,,
558743,33810699,2024-04-30 13:11:25,AB,2439,1,BOM,human,StorePaxDataAction,"2024-04-30 13:11:25,232 INFO [d5c1b88577a18f08] [AB2439 /01 BOM | human | StorePaxDataAction | | default task-13] Saved:",TOTAL Pax: 174 Y: 174 Jump: 0 StandBy: NULL Male: 174 Female: 0 Child: 0 Infant: 0 Total bag: 0 Total bag weight: 0.0 KG Baggage weight type: HISTORIC\r\n Distribution : STANDARD_DISTRIBUTION \r\n Section : 0A 0B 0C 0D 0E 0F \r\n Capacity : Y12 Y36 Y30 Y30 Y30 Y36 \r\n Distribution : Y12 Y36 Y30 Y30 Y30 Y36 \r\n Undistributed pax : 0 \r\nSTATUS AIRCRAFT_CONFIG 1 EZFW 1 CARGO_TRANSFER 1 CABIN_CONFIG 1 CALC_HIST_DATA 1 AUTO_MODE_ACTIVE 1 AUTOMATION_STARTED 0 EZFW_COUNTER 1 REGISTRATION 1 REGISTRATION_CHANGE 5,...,0.0,,174,0,0,0,0,0.0 KG,174.0,HISTORIC
558744,33841078,2024-04-30 17:09:32,AB,2439,1,BOM,human,StorePaxDataAction,"2024-04-30 17:09:32,622 INFO [12d698c50f0fceff] [AB2439 /01 BOM | human | StorePaxDataAction | | default task-42] Received:",com.onesystem.lc2.paxactuals.dto.PaxDataDTO [ id = 2271338 ]\r\n Baggage weight type: STANDARD \r\nBLR \r\n Y Jump Standby Male Female Child Infant Bags BWgt Average \r\nLoadsheet 174 NULL 0 174 0 0 0 100 1500.00 KG 15.00 KG \r\n\r\n Distribution : STANDARD_DISTRIBUTION \r\n Section : 0A 0B 0C 0D 0E 0F \r\n Capacity : Y12 Y36 Y30 Y30 Y30 Y36 \r\n Distribution : Y12 Y36 Y30 Y30 Y30 Y36,...,,0.0,174,0,0,0,100,1500.00,,
558745,33841888,2024-04-30 17:59:01,AB,2340,1,BLR,service-acco,StorePaxDataAction,"2024-04-30 17:59:01,241 INFO [1f59c1d0102e8969] [AB2340 /01 BLR | service-acco | StorePaxDataAction | | Thread-60540 (ActiveMQ-client-global-threads)] Received:",com.onesystem.lc2.paxactuals.dto.PaxDataDTO [ id = 2270799 ]\r\n Baggage weight type: HISTORIC \r\nBOM \r\n Y Jump Standby Male Female Child Infant Bags BWgt Average \r\nLoadsheet 164 NULL 0 111 49 4 1 0 0.00 KG NULL \r\n\r\n Distribution : STANDARD_DISTRIBUTION \r\n Section : 0A 0B 0C 0D 0E 0F \r\n Capacity : Y18 Y36 Y36 Y36 Y30 Y30 \r\n Distribution : Y0 Y0 Y0 Y0 Y0 Y0,...,,0.0,111,49,4,1,0,0.00,,
558746,33841897,2024-04-30 17:59:01,AB,2340,1,BLR,service-acco,StorePaxDataAction,"2024-04-30 17:59:01,243 INFO [1f59c1d0102e8969] [AB2340 /01 BLR | service-acco | StorePaxDataAction | | Thread-60540 (ActiveMQ-client-global-threads)] Saved:",TOTAL Pax: 164 Y: 164 Jump: 0 StandBy: NULL Male: 111 Female: 49 Child: 4 Infant: 1 Total bag: 0 Total bag weight: 0.0 KG Baggage weight type: HISTORIC\r\n Distribution : STANDARD_DISTRIBUTION \r\n Section : 0A 0B 0C 0D 0E 0F \r\n Capacity : Y18 Y36 Y36 Y36 Y30 Y30 \r\n Distribution : Y16 Y32 Y32 Y32 Y26 Y26 \r\n Undistributed pax : 0 \r\nSTATUS AIRCRAFT_CONFIG 1 CARGO_TRANSFER 1 CABIN_CONFIG 1 CALC_HIST_DATA 1 AUTO_MODE_ACTIVE 1 AUTOMATION_STARTED 0 REGISTRATION 1 REGISTRATION_CHANGE 3,...,0.0,,111,49,4,1,0,0.0 KG,164.0,HISTORIC
