In [7]:
import pandas as pd
import ast

In [4]:
def calculate_e2o_denominator(
    lodge_event_count: int,
    lodge_objects_per_event: int,
    other_event_types_count: int,
    other_event_instances_per_type: int
):
    lodge_links = lodge_event_count * lodge_objects_per_event
    other_links = other_event_types_count * other_event_instances_per_type
    total = lodge_links + other_links
    return total

denominator = calculate_e2o_denominator(
    lodge_event_count=100,
    lodge_objects_per_event=2,
    other_event_types_count=7,
    other_event_instances_per_type=200
)

print(f"E2O completeness denominator: {denominator}")


E2O completeness denominator: 1600


##### Dirigo

In [5]:
dirigo_e2o = pd.read_csv('../Dirigo/OCEL_E2O.csv')
numerator = dirigo_e2o[['Event_id', 'Object_id']].drop_duplicates().shape[0]

numerator


1600

In [14]:
e2o_dirigo = numerator / denominator
print(f"E2OCR (Dirigo): {e2o_dirigo:.2%}")

E2OCR (Dirigo): 100.00%


##### ACEL

In [9]:
acel_e2o = pd.read_csv('../ACEL/ACEL_Events.csv')

In [13]:
e2o_pairs = []

for _, row in acel_e2o.iterrows():
    event_id = row['Event_id']
    objects_raw = row['Objects']

    try:
        object_ids = ast.literal_eval(objects_raw)
        for obj_id in object_ids:
            e2o_pairs.append((event_id, obj_id))
    except Exception:
        continue  

df_e2o = pd.DataFrame(e2o_pairs, columns=['Event_id', 'Object_id']).drop_duplicates()
numerator = len(df_e2o)
numerator

1600

In [15]:
e2o_acel = numerator / denominator
print(f"E2OCR (ACEL): {e2o_acel:.2%}")

E2OCR (ACEL): 100.00%


##### DOCEL

In [16]:
docel_e2o = pd.read_csv('../DOCEL/DOCEL_Events.csv')

In [18]:
e2o_pairs = []
object_columns = ['PickupPlan', 'Cargo', 'Truck']

for _, row in docel_e2o .iterrows():
    event_id = row['Event_id']
    for field in object_columns:
        value = row.get(field)
        if pd.notna(value):
            try:
                object_ids = ast.literal_eval(value)
                for obj_id in object_ids:
                       e2o_pairs.append((event_id, obj_id))
            except Exception:
                continue

df_e2o = pd.DataFrame(e2o_pairs, columns=['Event_id', 'Object_id']).drop_duplicates()
numerator = len(df_e2o)

In [19]:
e2o_docel = numerator / denominator
print(f"E2OCR (DOCEL): {e2o_docel:.2%}")

E2OCR (DOCEL): 100.00%
