In [15]:
import pandas as pd
from typing import Dict, List, Tuple
import json
from tqdm.auto import tqdm
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
import numbers

In [16]:
parquet_file_abcd_conv = Path("../data/ABCD_tripfiles_conv.parquet")
parquet_file_mnop_conv = Path("../data/MNOP_tripfiles_conv.parquet")
parquet_file_zyxw_conv = Path("../data/ZYXW_tripfiles_conv.parquet")

parquet_file_abcd_conv_test = Path("../data/ABCD_tripfiles_conv_test.parquet")
parquet_file_mnop_conv_test = Path("../data/MNOP_tripfiles_conv_test.parquet")
parquet_file_zyxw_conv_test = Path("../data/ZYXW_tripfiles_conv_test.parquet")

In [17]:
# ab_conv_test = pd.read_parquet(parquet_file_abcd_conv_test)
# mn_conv_test = pd.read_parquet(parquet_file_mnop_conv_test)
# zy_conv_test = pd.read_parquet(parquet_file_zyxw_conv_test)

In [18]:
ab_conv = pd.read_parquet(parquet_file_abcd_conv)
mn_conv = pd.read_parquet(parquet_file_mnop_conv)
zy_conv = pd.read_parquet(parquet_file_zyxw_conv)

In [19]:
class Weight:
    def __init__(self, key, desc=None):
        self.key = key
        self.description = desc

    def get_key(self):
        return self.key

    def get_description(self):
        return self.description

In [20]:
class WeightCluster:
    def __init__(self, name: str, weights: Dict[Weight, str]):
        self._name = name
        self._weights = weights

    def get_weights(self):
        return self._weights

In [21]:
class Action:
    def __init__(self, name: str, weights: Dict[Weight, str]):
        self._name = name
        self._weights = weights

    def get_name(self):
        return self._name

    def get_weights(self):
        return self._weights

    def get_weight_items(
        self,
    ):
        return self._weights.items()

In [59]:
EZFW = Weight(key="EZFW", desc="Estimated Zero Fuel Weight")
AZFW = Weight(key="AZFW", desc="Actual Zero Fuel Weight")

ETOW = Weight(key="ETOW", desc="Estimated Takeoff Weight")
ATOW = Weight(key="ATOW", desc="Actual Takeoff Weight")

ETTL = Weight(key="ETTL", desc="Estimated Traffic Load")
ATTL = Weight(key="ATTL", desc="Actual Total Traffic Load")

DOW = Weight(key="DOW", desc="Dry Operating Weight")
MEW = Weight(key="MEW", desc="Manufacturers Empty Weight")

TOF = Weight(key="TOF", desc="Take Off Fuel")
TF = Weight(key="TF", desc="Trip Fuel")

ALAW = Weight(key="ALAW", desc="Actual Landing Weight")

PAXW = Weight(key="PAXW", desc="Passenger Weight")
BAGW = Weight(key="BAGW", desc="Baggage Weight")
CARW = Weight(key="CARW", desc="Cargo Weight")

In [58]:
LOADSHEETACTION = WeightCluster(
    name="LOADSHEETACTION",
    weights={
        ATTL: "TOTAL TRAFFIC LOAD",
        DOW: "DRY OPERATING WEIGHT",
        AZFW: "ZERO FUEL WEIGHT ACTUAL",
        TOF: "TAKE OFF FUEL",
        ATOW: "TAKE OFF WEIGHT ACTUAL",
        TF: "TRIP",
        ALAW: "LANDING WEIGHT ACTUAL",
    },
)

CALCULATEWEIGHTANDTRIMACTION = WeightCluster(
    name="CALCULATEWEIGHTANDTRIMACTION",
    weights={
        MEW: "START_WI weight",
        DOW: "DO_WI weight",
        PAXW: "PAX_WI weight",
        AZFW: "AZFW",
        EZFW: "ESTIMATED_ZFW",
        ATOW: "ATOW",
        ALAW: "ALAW",
        ETTL: "ESTIMATED_TRAFFIC_LOAD",
        ATTL: "TOTAL_TRAFFIC_LOAD",
    },
)

CreateZFWMessageAction = WeightCluster(
    name="CreateZFWMessageAction",
    weights={
        DOW: "dryOperatingWeight",
        AZFW: "actualZFW",
        CARW: "cargoWeight",
        BAGW: "baggageWeight",
        PAXW: "paxWeight",
        DOW: "basicWeight",
    },
)

NameError: name 'CARW' is not defined

In [56]:
import pprint

pprint.pprint(
    ab_conv[
        ab_conv.action_name == "CreateZFWMessageAction"
    ].data_CreateZFWMessageAction.iloc[1]
)

('{"airline": "AB", "arrivalStation": "BOM", "departureStation": "COK", '
 '"flightDateLocal": "2024-04-30", "revisionNumber": "2", '
 '"dryOperatingWeight": "44584", "actualZFW": "59000", "cargoWeight": "700", '
 '"baggageWeight": "1800", "paxWeight": "11850", "basicWeight": "44170"}')


In [24]:
foo = [
    # Action(name="RampFinalAction", weights={EZFW: "EZFW"}), # EZFW is not a value just a status
    Action(
        name="CalculateWeightAndTrimAction",
        weights=CALCULATEWEIGHTANDTRIMACTION.get_weights(),
    ),
    Action(name="CreateZFWMessageAction", weights=CreateZFWMessageAction.get_weights()),
    Action(name="CreateLoadsheetAction", weights=LOADSHEETACTION.get_weights()),
    Action(name="SendLoadsheetAction", weights=LOADSHEETACTION.get_weights()),
]

In [41]:
s = 1
try:
    eval(s)
except:
    raise ValueError("value is not a number")

ValueError: value is not a number

In [44]:
# Recursive function to find the value for a given key
def find_value(data: dict | list, key: str):
    if isinstance(data, dict):
        for k, v in data.items():
            if k == key:
                if v is None:
                    return None

                if isinstance(v, numbers.Number):
                    return v
                if isinstance(v, str):
                    if v.lower() == "null":
                        return None

                    try:
                        return eval(v)
                    except:
                        raise ValueError(
                            "Value not a number string or a number", key, v
                        )

                return None
            else:
                found = find_value(v, key)
                if found is not None:
                    return found
    elif isinstance(data, list):
        for item in data:
            found = find_value(item, key)
            if found is not None:
                return found
    return None


# Function to apply the recursive search to JSON data
def extract_key(json_str: str, key: str):
    data = json.loads(json_str)
    return find_value(data, key)

In [51]:
df = ab_conv.copy()
df = df.replace({None: pd.NA})

In [52]:
for action in foo:
    for weight, key in action.get_weight_items():

        mask = (df.action_name == action.get_name()) & (
            ~df[f"data_{action.get_name()}"].isna()
        )

        # Use loc to update the DataFrame directly
        df.loc[mask, weight.get_key()] = df.loc[
            mask, f"data_{action.get_name()}"
        ].apply(lambda x: extract_key(x, key))

In [53]:
df

Unnamed: 0,id,creation_time,airline_code,flight_number,flight_date,departure_airport,user_name,action_name,header_line,entry_details,...,DOW,PAXW,AZFW,EZFW,ATOW,ALAW,ETTL,ATTL,TOF,TF
0,33902381,2024-05-01 09:14:42,AB,2359,1,BLR,service-acco,ASMMsgProcessor,"2024-05-01 09:14:42,420 INFO [3f326d134d9de45...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2...",...,,,,,,,,,,
1,33910427,2024-05-01 10:28:12,AB,2104,2,BLR,service-acco,ASMMsgProcessor,"2024-05-01 10:28:12,745 INFO [de24be75947007f...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2...",...,,,,,,,,,,
2,33910380,2024-05-01 10:28:11,AB,2109,2,BLR,service-acco,ASMMsgProcessor,"2024-05-01 10:28:11,301 INFO [df3a1cc7d574161...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2...",...,,,,,,,,,,
3,33909790,2024-05-01 10:28:12,AB,2110,2,BOM,service-acco,ASMMsgProcessor,"2024-05-01 10:28:12,189 INFO [9e66dfb1ffe9048...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2...",...,,,,,,,,,,
4,33902706,2024-05-01 09:14:41,AB,2127,2,BOM,service-acco,ASMMsgProcessor,"2024-05-01 09:14:41,884 INFO [829566f527b588f...","<?xml version=""1.0"" encoding=""UTF-8""?>\r\n<ns2...",...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
709608,33875192,2024-05-01 04:07:01,AB,2372,30,GAU,service-acco,StorePaxDataAction,"2024-05-01 04:07:01,675 INFO [90c6642a105b86f...",TOTAL Pax: 131 Y: 131 Jump: 0 StandBy: ...,...,,,,,,,,,,
709609,33875261,2024-05-01 04:08:01,AB,2372,30,GAU,service-acco,StorePaxDataAction,"2024-05-01 04:08:01,279 INFO [76f51797d6d0b42...",com.onesystem.lc2.paxactuals.dto.PaxDataDTO [ ...,...,,,,,,,,,,
709610,33875264,2024-05-01 04:08:01,AB,2372,30,GAU,service-acco,StorePaxDataAction,"2024-05-01 04:08:01,281 INFO [76f51797d6d0b42...",TOTAL Pax: 131 Y: 131 Jump: 0 StandBy: ...,...,,,,,,,,,,
709611,33875306,2024-05-01 04:09:01,AB,2372,30,GAU,service-acco,StorePaxDataAction,"2024-05-01 04:09:01,465 INFO [ef79e34fa812d85...",com.onesystem.lc2.paxactuals.dto.PaxDataDTO [ ...,...,,,,,,,,,,


In [22]:
# for name, keys in weights_to_extract.items():
#     print(name, keys)
#     for key, value in keys["keys"].items():