#### Dynamic Attributes Completness Ratio

In [53]:
import pandas as pd
import ast

##### Dirigo

In [3]:
truck_df_dirigo = pd.read_csv('../Dirigo/OCEL_Truck.csv')
cargo_df_dirigo = pd.read_csv('../Dirigo/OCEL_Cargo.csv')
pickupplan_df_dirigo = pd.read_csv('../Dirigo/OCEL_PickupPlan.csv')

In [None]:
def get_dynamic_attributes(df, object_type_name):
    exclude_cols = {"Object_id", "Timestamp", "Ocel_changed_field"}
    dynamic_attrs = []

    for col in df.columns:
        if col not in exclude_cols:
            # For each object, count how many non-null records exist for this attribute [col] at different timestamps
            changes_per_object = df[df[col].notna()].groupby("Object_id")["Timestamp"].nunique()
            if changes_per_object.gt(1).any():
                dynamic_attrs.append(col)

    return pd.DataFrame({
        "Object Type": object_type_name,
        "Dynamic Attribute": dynamic_attrs
    })


In [14]:
#truck_df_dirigo[truck_df_dirigo['Truck Status'].notna()].groupby("Object_id")["Timestamp"].nunique()

In [8]:
truck_dynamic_df = get_dynamic_attributes(truck_df_dirigo, "Truck")
truck_dynamic_df

Unnamed: 0,Object Type,Dynamic Attribute
0,Truck,Pickup Plan ID
1,Truck,Cargo ID
2,Truck,Scheduled Pickup Weight
3,Truck,Truck Status
4,Truck,Truck Weight


In [9]:
cargo_dynamic_df = get_dynamic_attributes(cargo_df_dirigo, "Cargo")
cargo_dynamic_df

Unnamed: 0,Object Type,Dynamic Attribute
0,Cargo,Cargo stock weight(scheduled)


In [17]:
pickup_dynamic_df = pd.DataFrame({
    "Object Type": "Pickup Plan",
    "Dynamic Attribute": [
        "Num of trucks",
        "Total pickup weight",
        "CargoID"
    ]
})

In [20]:
dirigo_dynamic_attributes_df = pd.concat(
    [truck_dynamic_df, cargo_dynamic_df, pickup_dynamic_df],
    ignore_index=True
)

In [21]:
dirigo_dynamic_attributes_df

Unnamed: 0,Object Type,Dynamic Attribute
0,Truck,Pickup Plan ID
1,Truck,Cargo ID
2,Truck,Scheduled Pickup Weight
3,Truck,Truck Status
4,Truck,Truck Weight
5,Cargo,Cargo stock weight(scheduled)
6,Pickup Plan,Num of trucks
7,Pickup Plan,Total pickup weight
8,Pickup Plan,CargoID


In [24]:
dirigo_object_type_to_df = {
    "Truck": truck_df_dirigo,
    "Cargo": cargo_df_dirigo,
    "Pickup Plan": pickupplan_df_dirigo
}


In [25]:
dirigo_object_type_to_df

{'Truck':      Object_id            Timestamp Ocel_changed_field Pickup Plan ID  \
 0         tr46  2024-03-04 00:00:00                NaN            NaN   
 1         tr28  2024-03-04 00:00:00                NaN            NaN   
 2          tr5  2024-03-04 00:00:00                NaN            NaN   
 3         tr27  2024-03-04 00:00:00                NaN            NaN   
 4         tr41  2024-03-04 00:00:00                NaN            NaN   
 ...        ...                  ...                ...            ...   
 1445      tr29  2024-05-03 15:22:06       Truck Weight            NaN   
 1446      tr49  2024-05-03 15:33:13       Truck Weight            NaN   
 1447      tr29  2024-05-03 15:43:38       Truck Weight            NaN   
 1448      tr49  2024-05-03 15:50:06       Truck Status            NaN   
 1449      tr29  2024-05-03 15:56:08       Truck Status            NaN   
 
      Cargo ID     LPT  Axles  Scheduled Pickup Weight Truck Status  \
 0         NaN  271FSE    6.0 

In [39]:
dirigo_record_counts = []

for _, row in dirigo_dynamic_attributes_df.iterrows():
    obj_type = row["Object Type"]
    attr = row["Dynamic Attribute"]
    df = dirigo_object_type_to_df[obj_type]

    count = df[(df[attr].notna()) & (df[attr]!=0.0) &(df["Timestamp"].notna())].shape[0]
    
    dirigo_record_counts.append({
        "Object Type": obj_type,
        "Dynamic Attribute": attr,
        "Logged Records (Dirigo)": count
    })

In [40]:
dirigo_record_counts

[{'Object Type': 'Truck',
  'Dynamic Attribute': 'Pickup Plan ID',
  'Logged Records (Dirigo)': 200},
 {'Object Type': 'Truck',
  'Dynamic Attribute': 'Cargo ID',
  'Logged Records (Dirigo)': 200},
 {'Object Type': 'Truck',
  'Dynamic Attribute': 'Scheduled Pickup Weight',
  'Logged Records (Dirigo)': 200},
 {'Object Type': 'Truck',
  'Dynamic Attribute': 'Truck Status',
  'Logged Records (Dirigo)': 450},
 {'Object Type': 'Truck',
  'Dynamic Attribute': 'Truck Weight',
  'Logged Records (Dirigo)': 400},
 {'Object Type': 'Cargo',
  'Dynamic Attribute': 'Cargo stock weight(scheduled)',
  'Logged Records (Dirigo)': 120},
 {'Object Type': 'Pickup Plan',
  'Dynamic Attribute': 'Num of trucks',
  'Logged Records (Dirigo)': 100},
 {'Object Type': 'Pickup Plan',
  'Dynamic Attribute': 'Total pickup weight',
  'Logged Records (Dirigo)': 100},
 {'Object Type': 'Pickup Plan',
  'Dynamic Attribute': 'CargoID',
  'Logged Records (Dirigo)': 100}]

In [74]:
dirigo_df = pd.DataFrame(dirigo_record_counts)
dirigo_df
# dirigo_logged_sum = dirigo_df.groupby("Object Type")["Logged Records (Dirigo)"].sum().reset_index()

Unnamed: 0,Object Type,Dynamic Attribute,Logged Records (Dirigo)
0,Truck,Pickup Plan ID,200
1,Truck,Cargo ID,200
2,Truck,Scheduled Pickup Weight,200
3,Truck,Truck Status,450
4,Truck,Truck Weight,400
5,Cargo,Cargo stock weight(scheduled),120
6,Pickup Plan,Num of trucks,100
7,Pickup Plan,Total pickup weight,100
8,Pickup Plan,CargoID,100


In [69]:
expected_record_counts = [
    {"Object Type": "Truck", "Dynamic Attribute": "Pickup Plan ID", "Expected Records": 200},
    {"Object Type": "Truck", "Dynamic Attribute": "Cargo ID", "Expected Records": 200},
    {"Object Type": "Truck", "Dynamic Attribute": "Scheduled Pickup Weight", "Expected Records": 200},
    {"Object Type": "Truck", "Dynamic Attribute": "Truck Status", "Expected Records": 450},
    {"Object Type": "Truck", "Dynamic Attribute": "Truck Weight", "Expected Records": 400},
    {"Object Type": "Cargo", "Dynamic Attribute": "Cargo stock weight(scheduled)", "Expected Records": 120},
    {"Object Type": "Pickup Plan", "Dynamic Attribute": "Num of trucks", "Expected Records": 100},
    {"Object Type": "Pickup Plan", "Dynamic Attribute": "Total pickup weight", "Expected Records": 100},
    {"Object Type": "Pickup Plan", "Dynamic Attribute": "CargoID", "Expected Records": 100},
]
expected_df = pd.DataFrame(expected_record_counts)
expected_df

Unnamed: 0,Object Type,Dynamic Attribute,Expected Records
0,Truck,Pickup Plan ID,200
1,Truck,Cargo ID,200
2,Truck,Scheduled Pickup Weight,200
3,Truck,Truck Status,450
4,Truck,Truck Weight,400
5,Cargo,Cargo stock weight(scheduled),120
6,Pickup Plan,Num of trucks,100
7,Pickup Plan,Total pickup weight,100
8,Pickup Plan,CargoID,100


In [75]:
# dacr_df = pd.merge(
#     dirigo_logged_sum,          
#     expected_sum_df,             
#     on="Object Type"
# )

# dacr_df["DACR (%)"] = ((dacr_df["Logged Records (Dirigo)"] / dacr_df["Expected Records"])*100).round(2)

# dacr_df

##### ACEL

In [50]:
acel_Events_df = pd.read_csv('../ACEL/ACEL_Events.csv')

In [51]:
prefix_to_type = {
    "tr": "Truck",
    "cr": "Cargo",
    "pcp": "Pickup Plan"
}

In [65]:
truck_status_values = {"available", "occupied"}

cleaned_acel_records = []

for _, row in acel_Events_df.iterrows():
    timestamp = row["Timestamp"]
    try:
        changes = ast.literal_eval(row["ObjectChanges"])
    except (ValueError, SyntaxError):
        continue

    for change in changes:
        obj_id = change.get("ObjectId", "").lower()
        attr = change.get("Attribute")
        val = change.get("NewValue")

        obj_type = None
        for prefix, ot in prefix_to_type.items():
            if obj_id.startswith(prefix):
                obj_type = ot
                break
        if obj_type is None:
            continue

        # exclude lifecycle entries
        if attr == "lifecycle":
            if obj_type == "Truck" and isinstance(val, str) and val.lower() in truck_status_values:
                cleaned_acel_records.append((obj_type, "Truck Status", val, timestamp))
        else:
            # Include all other attributes
            cleaned_acel_records.append((obj_type, attr, val, timestamp))

In [66]:
df_acel_cleaned = pd.DataFrame(cleaned_acel_records, columns=[
    "Object Type", "Dynamic Attribute", "Value", "Timestamp"
])
df_acel_cleaned

Unnamed: 0,Object Type,Dynamic Attribute,Value,Timestamp
0,Pickup Plan,CargoID,Cr2,2024-04-29 08:04:44
1,Pickup Plan,Num of trucks,2,2024-04-29 08:04:44
2,Pickup Plan,Total pickup weight,10611.3,2024-04-29 08:04:44
3,Cargo,Cargo stock weight(scheduled),90032.0,2024-04-29 08:04:44
4,Pickup Plan,CargoID,Cr9,2024-04-29 08:12:32
...,...,...,...,...
1595,Truck,Cargo ID,Cr10,2024-05-03 15:09:23
1596,Truck,Truck Weight,12150.9,2024-05-03 15:17:37
1597,Truck,Truck Weight,12136.1,2024-05-03 15:22:06
1598,Truck,Truck Weight,17593.0,2024-05-03 15:33:13


In [71]:
acel_logged_per_attr = (
    df_acel_cleaned
    .groupby(["Object Type", "Dynamic Attribute"])
    .size()
    .reset_index(name="Logged Records (ACEL)")
)

In [72]:
acel_logged_per_attr

Unnamed: 0,Object Type,Dynamic Attribute,Logged Records (ACEL)
0,Cargo,Cargo stock weight(scheduled),100
1,Pickup Plan,CargoID,100
2,Pickup Plan,Num of trucks,100
3,Pickup Plan,Total pickup weight,100
4,Truck,Cargo ID,200
5,Truck,Pickup Plan ID,200
6,Truck,Scheduled Pickup Weight,200
7,Truck,Truck Status,200
8,Truck,Truck Weight,400


##### DOCEL

In [85]:
docel_events_df = pd.read_csv("../DOCEL/DOCEL_Events.csv")

docel_event_timestamp_map = dict(zip(docel_events_df["Event_id"], docel_events_df["Timestamp"]))
docel_event_timestamp_map

{'Lodge_Pcp44': '2024-04-29 08:04:44',
 'Lodge_Pcp87': '2024-04-29 08:12:32',
 'Lodge_Pcp48': '2024-04-29 08:31:44',
 'Lodge_Pcp55': '2024-04-29 08:32:30',
 'Lodge_Pcp69': '2024-04-29 08:39:46',
 'Lodge_Pcp18': '2024-04-29 08:46:20',
 'Lodge_Pcp21': '2024-04-29 08:58:14',
 'Lodge_Pcp74': '2024-04-29 09:07:13',
 'Lodge_Pcp23': '2024-04-29 09:07:35',
 'Lodge_Pcp39': '2024-04-29 09:10:45',
 'Lodge_Pcp41': '2024-04-29 09:19:03',
 'Lodge_Pcp29': '2024-04-29 09:25:16',
 'Lodge_Pcp36': '2024-04-29 09:29:40',
 'Lodge_Pcp81': '2024-04-29 09:29:57',
 'Lodge_Pcp22': '2024-04-29 09:31:44',
 'Lodge_Pcp28': '2024-04-29 09:46:28',
 'Lodge_Pcp59': '2024-04-29 09:47:00',
 'Lodge_Pcp7': '2024-04-29 09:49:45',
 'Lodge_Pcp64': '2024-04-29 09:53:26',
 'Lodge_Pcp4': '2024-04-29 09:57:41',
 'Lodge_Pcp5': '2024-04-29 09:58:47',
 'Lodge_Pcp82': '2024-04-29 10:00:12',
 'Lodge_Pcp98': '2024-04-29 10:07:21',
 'Lodge_Pcp86': '2024-04-29 10:08:38',
 'Lodge_Pcp67': '2024-04-29 10:19:50',
 'Lodge_Pcp72': '2024-04-29 

In [88]:
# Truck
truck_PickupPlan_df = pd.read_csv("../DOCEL/DOCEL_TruckPickupplanID.csv")
truck_Cargo_df = pd.read_csv("../DOCEL/DOCEL_TruckCargoID.csv")
truck_scheduled_weight_df = pd.read_csv("../DOCEL/DOCEL_TruckScheduledPickupWeight.csv")
truck_weight_df = pd.read_csv("../DOCEL/DOCEL_TruckWeight.csv")
truck_status_df = pd.read_csv("../DOCEL/DOCEL_TruckStatus.csv")


# Cargo
cargo_stock_df = pd.read_csv("../DOCEL/DOCEL_CargoStock.csv")

# Pickup Plan
pickup_cargo_df = pd.read_csv("../DOCEL/DOCEL_PickupplanCargo.csv")
pickup_num_trucks_df = pd.read_csv("../DOCEL/DOCEL_PickupplanNotrs.csv")
pickup_total_weight_df = pd.read_csv("../DOCEL/DOCEL_PickupplanTotalWeight.csv")

In [98]:
prefix_to_type = {
    "tr": "Truck",
    "cr": "Cargo",
    "pcp": "Pickup Plan"
}


In [104]:
def extract_docel_tuples(df, value_col_name, dynamic_attr, event_map):
    records = []
    
    for _, row in df.iterrows():
        event_id = row.get("eventID")
        object_id = str(row.get("objectID")).lower()
        timestamp = event_map.get(event_id)

        # Infer object type from objectID
        object_type = "Unknown"
        for prefix, inferred_type in prefix_to_type.items():
            if object_id.startswith(prefix):
                object_type = inferred_type
                break

        if object_type != "Unknown" and pd.notna(timestamp):
            records.append({
                "Object Type": object_type,
                "Dynamic Attribute": dynamic_attr,
                "Value": row[value_col_name],
                "Timestamp": timestamp
            })
    
    return records


In [105]:
pickup_cargo_df

Unnamed: 0,PcpCrID,Cargo ID,objectID,eventID
0,CargoID_0,Cr2,Pcp44,Lodge_Pcp44
1,CargoID_1,Cr9,Pcp87,Lodge_Pcp87
2,CargoID_2,Cr20,Pcp48,Lodge_Pcp48
3,CargoID_3,Cr13,Pcp55,Lodge_Pcp55
4,CargoID_4,Cr7,Pcp69,Lodge_Pcp69
...,...,...,...,...
95,CargoID_95,Cr20,Pcp37,Lodge_Pcp37
96,CargoID_96,Cr14,Pcp75,Lodge_Pcp75
97,CargoID_97,Cr15,Pcp62,Lodge_Pcp62
98,CargoID_98,Cr17,Pcp50,Lodge_Pcp50


In [122]:
docel_records = []

# Truck
docel_records += extract_docel_tuples(truck_PickupPlan_df, "Pickup Plan ID", "Pickup Plan ID", docel_event_timestamp_map)
docel_records += extract_docel_tuples(truck_Cargo_df, "Cargo ID", "Cargo ID", docel_event_timestamp_map)
docel_records += extract_docel_tuples(truck_weight_df, "Truck Weight", "Truck Weight", docel_event_timestamp_map)
docel_records += extract_docel_tuples(truck_status_df, "Truck Status", "Truck Status", docel_event_timestamp_map)
docel_records += extract_docel_tuples(truck_scheduled_weight_df, "Scheduled Pickup Weight", "Scheduled Pickup Weight", docel_event_timestamp_map)

# Cargo
docel_records += extract_docel_tuples(cargo_stock_df, "Cargo stock weight(scheduled)", "Cargo stock weight(scheduled)", docel_event_timestamp_map)

# Pickup Plan
docel_records += extract_docel_tuples(pickup_cargo_df, "Cargo ID",  "Cargo ID", docel_event_timestamp_map)
docel_records += extract_docel_tuples(pickup_num_trucks_df, "Num of trucks",  "Num of trucks", docel_event_timestamp_map)
docel_records += extract_docel_tuples(pickup_total_weight_df, "Total pickup weight",  "Total pickup weight", docel_event_timestamp_map)


In [124]:
df_docel_full = pd.DataFrame(docel_records)
docel_logged_per_attr = (
    df_docel_full
    .groupby(["Object Type", "Dynamic Attribute"])
    .size()
    .reset_index(name="Logged Records (DOCEL)")
)

In [125]:
docel_logged_per_attr

Unnamed: 0,Object Type,Dynamic Attribute,Logged Records (DOCEL)
0,Cargo,Cargo stock weight(scheduled),100
1,Pickup Plan,Cargo ID,100
2,Pickup Plan,Num of trucks,100
3,Pickup Plan,Total pickup weight,100
4,Truck,Cargo ID,200
5,Truck,Pickup Plan ID,200
6,Truck,Scheduled Pickup Weight,200
7,Truck,Truck Status,200
8,Truck,Truck Weight,400


##### DACR comparisons

In [126]:
docel_logged_per_attr.loc[
    (docel_logged_per_attr["Object Type"] == "Pickup Plan") &
    (docel_logged_per_attr["Dynamic Attribute"] == "Cargo ID"),
    "Dynamic Attribute"
] = "CargoID"
docel_logged_per_attr

Unnamed: 0,Object Type,Dynamic Attribute,Logged Records (DOCEL)
0,Cargo,Cargo stock weight(scheduled),100
1,Pickup Plan,CargoID,100
2,Pickup Plan,Num of trucks,100
3,Pickup Plan,Total pickup weight,100
4,Truck,Cargo ID,200
5,Truck,Pickup Plan ID,200
6,Truck,Scheduled Pickup Weight,200
7,Truck,Truck Status,200
8,Truck,Truck Weight,400


In [127]:
merged_df = pd.merge(expected_df, acel_logged_per_attr,
                     on=["Object Type", "Dynamic Attribute"], how="left")

merged_df = pd.merge(merged_df, docel_logged_per_attr,
                     on=["Object Type", "Dynamic Attribute"], how="left")

# Then merge with Dirigo
merged_df = pd.merge(merged_df, dirigo_df,
                     on=["Object Type", "Dynamic Attribute"], how="left")

merged_df["DACR (ACEL) %"] = (merged_df["Logged Records (ACEL)"] / merged_df["Expected Records"] * 100).round(2)
merged_df["DACR (DOCEL) %"] = (merged_df["Logged Records (DOCEL)"] / merged_df["Expected Records"] * 100).round(2)
merged_df["DACR (Dirigo) %"] = (merged_df["Logged Records (Dirigo)"] / merged_df["Expected Records"] * 100).round(2)

merged_df

Unnamed: 0,Object Type,Dynamic Attribute,Expected Records,Logged Records (ACEL),Logged Records (DOCEL),Logged Records (Dirigo),DACR (ACEL) %,DACR (DOCEL) %,DACR (Dirigo) %
0,Truck,Pickup Plan ID,200,200,200,200,100.0,100.0,100.0
1,Truck,Cargo ID,200,200,200,200,100.0,100.0,100.0
2,Truck,Scheduled Pickup Weight,200,200,200,200,100.0,100.0,100.0
3,Truck,Truck Status,450,200,200,450,44.44,44.44,100.0
4,Truck,Truck Weight,400,400,400,400,100.0,100.0,100.0
5,Cargo,Cargo stock weight(scheduled),120,100,100,120,83.33,83.33,100.0
6,Pickup Plan,Num of trucks,100,100,100,100,100.0,100.0,100.0
7,Pickup Plan,Total pickup weight,100,100,100,100,100.0,100.0,100.0
8,Pickup Plan,CargoID,100,100,100,100,100.0,100.0,100.0
