In [52]:
import re
from typing import Optional
import pandas as pd

## Summary of Actions


- The Simulator adds the container to the realm of the Optimizer: 2024-11-14 10:26:30 INFO adding TO TO_CO_TFTU000001, EMT 2024-11-14 10:26:28+01:00
- The Simulator lets your own Vehicle Optimizer run every certain amount of time and the logs show the job sequence per Straddle Carrier per Optimization run:
    - 2024-11-14 10:27:07 INFO SC001 schedule 1:TO_CO_TFTU000018#CO_TFTU000018#PICK,2:TO_CO_TFTU000018#CO_TFTU000018#DROP
    - 2024-11-14 10:27:07 INFO SC002 schedule 1:TO_CO_TFTU000023#CO_TFTU000023#PICK,2:TO_CO_TFTU000023#CO_TFTU000023#DROP

- Additional information regarding what a Straddle Carrier does it listed as follows:
    - 2024-11-14 10:27:07 INFO SC001 starting TO_CO_TFTU000018#CO_TFTU000018#PICK: travel 2024-11-14 10:27:00+01:00
    - 2024-11-14 10:27:30+01:00, action 2024-11-14 10:27:30+01:00 - 2024-11-14 10:28:30+01:00 (to pick up the container, the Straddle Carrier has to travel (30 seconds) and pick up the container (60 seconds).
    - 2024-11-14 10:27:10 INFO SC001 (TO: TO_CO_TFTU000018, CO: CO_TFTU000018, PICK) driving to QC003; 31 s; 172693 mm (Straddle Carrier is driving to a location, driving time and distance is shown – distance is measured in Manhattan Distance)
    - 2024-11-14 10:27:10 DEBUG location QC001: using lane 0 for CO CO_TFTU000001 (shows when a Straddle Carrier is using one of the available and limited spaces in a location)
    - 2024-11-14 10:27:10 INFO SC004 (TO: TO_CO_TFTU000001, CO: CO_TFTU000001, PICK) working at QC001; 60 s (the straddle carrier is picking up the container)
    - 2024-11-14 10:28:07 DEBUG location QC001: freeing lane 0 for CO CO_TFTU000001 (Straddle Carrier has freed up the limited space in a location)
    - 2024-11-14 10:28:07 INFO SC004 (TO: TO_CO_TFTU000001, CO: CO_TFTU000001, PICK) finished at QC001 (Straddle Carrier has finished picking up the container)
    - 2024-11-14 10:28:07 DEBUG finished expected schedule_element TO_CO_TFTU000001#CO_TFTU000001#PICK (Straddle Carrier has finished the logical element of the PICK which we call “schedule_element”)

In [16]:
log_file_path = "../data/logger_all.log"

In [17]:
log_lines = open(log_file_path, "r").readlines()

In [114]:
datetime_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}"
log_level_pattern = r"[A-Z]+"
vehicle_id_pattern = r"SC\d{3}"
to_id_pattern = r"TO_CO_TFTU\d{6}"
co_id_pattern = r"CO_TFTU\d{6}"
vehicle_status_pattern = r"(finished|working|waited)"
action_pattern = r"(PICK|DROP)"
tz_aware_datetime_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}[+-]\d{2}:\d{2}"
location_pattern = r"((QC)\d{3}|(RAIL)\d{3}.\d{2,3}|(WS|YARD)\d{3}.\d{2})"

log_patterns = {
    "container_submission": re.compile(
        rf"(?P<log_time>{datetime_pattern})\s+"
        rf"(?P<log_level>{log_level_pattern})\s+"
        rf"adding TO (?P<to_id>{to_id_pattern}),\s+"
        rf"EMT (?P<submission_time>{tz_aware_datetime_pattern})"
    ),
    "schedule": re.compile(
        rf"(?P<log_time>{datetime_pattern})\s+"
        rf"(?P<log_level>{log_level_pattern})\s+"
        rf"(?P<vehicle_id>{vehicle_id_pattern})\s+"
        rf"schedule\s+(\d:({to_id_pattern})#({co_id_pattern})#({action_pattern}),?)+"
    ),
    "travel_action": re.compile(
        rf"(?P<log_time>{datetime_pattern})\s+"  # Log timestamp
        rf"(?P<log_level>{log_level_pattern})\s+"  # Log level (INFO, ERROR, etc.)
        rf"(?P<vehicle_id>{vehicle_id_pattern})\s+starting\s+"  # Vehicle ID
        rf"(?P<to_id>{to_id_pattern})#"  # TO ID
        rf"(?P<co_id>{co_id_pattern})#"  # CO ID
        rf"(?P<action>{action_pattern}):\s+travel\s+"  # Action type
        rf"(?P<travel_start_time>{tz_aware_datetime_pattern})\s*-\s*"  # Travel start time
        rf"(?P<travel_end_time>{tz_aware_datetime_pattern}),\s*action\s+"  # Travel end time
        rf"(?P<action_start_time>{tz_aware_datetime_pattern})\s*-\s*"  # Action start time
        rf"(?P<action_end_time>{tz_aware_datetime_pattern})"  # Action end time
    ),
    "driving": re.compile(
        rf"(?P<log_time>{datetime_pattern})\s+"  # Log timestamp
        rf"(?P<log_level>{log_level_pattern})\s+"  # Log level (INFO, WARN, etc.)
        rf"(?P<vehicle_id>{vehicle_id_pattern})\s+"  # Vehicle ID
        rf"\(TO:\s+(?P<to_id>{to_id_pattern}),\s+"  # TO ID
        rf"CO:\s+(?P<co_id>{co_id_pattern}),\s+"  # CO ID
        rf"(?P<action>{action_pattern})\)\s+"  # Action type (e.g., PICK, DROP)
        rf"driving to\s+(?P<location_name>{location_pattern});\s+"  # Location name
        rf"(?P<duration_in_s>\d+)\s+s;\s+"  # Duration in seconds
        rf"(?P<distance_in_mm>\d+)\s+mm"  # Distance in millimeters
    ),
    "lane_usage": re.compile(
        rf"(?P<log_time>{datetime_pattern})\s+"  # Log timestamp
        rf"(?P<log_level>{log_level_pattern})\s+"  # Log level (DEBUG, INFO, etc.)
        rf"location\s+(?P<location_name>{location_pattern}):\s+"  # Location name
        rf"(?P<action>(using|freeing))\s+"
        rf"lane\s+(?P<lane_number>\d+)\s+for CO\s+"  # Lane number
        rf"(?P<co_id>{co_id_pattern})"  # CO ID
    ),
    "action": re.compile(
        rf"(?P<log_time>{datetime_pattern})\s+"  # Log timestamp
        rf"(?P<log_level>{log_level_pattern})\s+"  # Log level (INFO, DEBUG, etc.)
        rf"(?P<vehicle_id>{vehicle_id_pattern})\s+"  # Vehicle ID
        rf"\(TO:\s+(?P<to_id>{to_id_pattern}),\s+"  # TO ID
        rf"CO:\s+(?P<co_id>{co_id_pattern}),\s+"  # CO ID
        rf"(?P<action>{action_pattern})\)\s+"  # Action type (PICK, DROP)
        rf"(?P<status>{vehicle_status_pattern})\s+at\s+"  # Vehicle status (finished, working, waited)
        rf"(?P<location_name>{location_pattern})"  # Location name
        rf"(;\s+(?P<duration_in_s>\d+)\s+s)?"  # Duration in seconds
    ),
    "schedule_element": re.compile(
        rf"(?P<log_time>{datetime_pattern})\s+"  # Log timestamp
        rf"(?P<log_level>{log_level_pattern})\s+"  # Log level (DEBUG, INFO, etc.)
        rf"finished expected schedule_element\s+"  # Fixed phrase
        rf"(?P<to_id>{to_id_pattern})#"  # TO ID
        rf"(?P<co_id>{co_id_pattern})#"  # CO ID
        rf"(?P<action>{action_pattern})"  # Action (PICK, DROP, etc.)
    )
}


def parse_log(log: str) -> tuple[Optional[str], Optional[dict]]:
    log_pattern = None
    extracted_data = None
    for log_pattern_name, regex_compiler in log_patterns.items():
        # Check if the log string matches current regex pattern
        match = regex_compiler.match(log)
        if match:
            # Extract named groups if there's a match
            log_pattern = log_pattern_name
            extracted_data = match.groupdict()
            if log_pattern == "schedule":
                schedules = re.findall(rf"\d:(?P<to_id>{to_id_pattern})#(?P<co_id>{co_id_pattern})#(?P<action>{action_pattern})", log)
                extracted_data = {
                    **extracted_data,
                    "schedules": schedules
                }

            break

    return log_pattern, extracted_data

In [115]:
relevant_logs = {log_pattern: [] for log_pattern in log_patterns}
parsed_logs = []

for log_line in log_lines:
    pattern, data = parse_log(log_line)
    if pattern:
        relevant_logs[pattern].append(data)
        parsed_logs.append((pattern, data))

In [116]:
for pattern, logs in relevant_logs.items():
    print(pattern, len(logs))

container_submission 286
schedule 8225
travel_action 572
driving 572
lane_usage 968
action 1229
schedule_element 572


In [117]:
relevant_logs["schedule"][0]

{'log_time': '2024-11-14 10:27:07',
 'log_level': 'INFO',
 'vehicle_id': 'SC001',
 'schedules': [('TO_CO_TFTU000018', 'CO_TFTU000018', 'PICK', 'PICK'),
  ('TO_CO_TFTU000018', 'CO_TFTU000018', 'DROP', 'DROP')]}

In [82]:
sample_container_info = [d for (pattern, d) in parsed_logs if d.get("co_id", None) == "CO_TFTU000023" or d.get("to_id", None) == "TO_CO_TFTU000023"]
sample_container_info

[{'log_time': '2024-11-14 10:26:30',
  'log_level': 'INFO',
  'to_id': 'TO_CO_TFTU000023',
  'submission_time': '2024-11-14 10:31:28+01:00'},
 {'log_time': '2024-11-14 10:27:07',
  'log_level': 'INFO',
  'vehicle_id': 'SC002',
  'to_id': 'TO_CO_TFTU000023',
  'co_id': 'CO_TFTU000023',
  'action': 'PICK',
  'travel_start_time': '2024-11-14 10:27:00+01:00',
  'travel_end_time': '2024-11-14 10:27:29+01:00',
  'action_start_time': '2024-11-14 10:27:29+01:00',
  'action_end_time': '2024-11-14 10:28:29+01:00'},
 {'log_time': '2024-11-14 10:27:10',
  'log_level': 'INFO',
  'vehicle_id': 'SC002',
  'to_id': 'TO_CO_TFTU000023',
  'co_id': 'CO_TFTU000023',
  'action': 'PICK',
  'location_name': 'QC003',
  'duration_in_s': '29',
  'distance_in_mm': '162693'},
 {'log_time': '2024-11-14 10:30:07',
  'log_level': 'DEBUG',
  'location_name': 'QC003',
  'action': 'using',
  'lane_number': '0',
  'co_id': 'CO_TFTU000023'},
 {'log_time': '2024-11-14 10:30:07',
  'log_level': 'INFO',
  'vehicle_id': 'SC0

In [83]:
pd.DataFrame(sample_container_info)

Unnamed: 0,log_time,log_level,to_id,submission_time,vehicle_id,co_id,action,travel_start_time,travel_end_time,action_start_time,action_end_time,location_name,duration_in_s,distance_in_mm,lane_number,status
0,2024-11-14 10:26:30,INFO,TO_CO_TFTU000023,2024-11-14 10:31:28+01:00,,,,,,,,,,,,
1,2024-11-14 10:27:07,INFO,TO_CO_TFTU000023,,SC002,CO_TFTU000023,PICK,2024-11-14 10:27:00+01:00,2024-11-14 10:27:29+01:00,2024-11-14 10:27:29+01:00,2024-11-14 10:28:29+01:00,,,,,
2,2024-11-14 10:27:10,INFO,TO_CO_TFTU000023,,SC002,CO_TFTU000023,PICK,,,,,QC003,29.0,162693.0,,
3,2024-11-14 10:30:07,DEBUG,,,,CO_TFTU000023,using,,,,,QC003,,,0.0,
4,2024-11-14 10:30:07,INFO,TO_CO_TFTU000023,,SC002,CO_TFTU000023,PICK,,,,,QC003,158.0,,,waited
5,2024-11-14 10:30:07,INFO,TO_CO_TFTU000023,,SC002,CO_TFTU000023,PICK,,,,,QC003,60.0,,,working
6,2024-11-14 10:31:07,DEBUG,,,,CO_TFTU000023,freeing,,,,,QC003,,,0.0,
7,2024-11-14 10:31:07,INFO,TO_CO_TFTU000023,,SC002,CO_TFTU000023,PICK,,,,,QC003,,,,finished
8,2024-11-14 10:31:07,DEBUG,TO_CO_TFTU000023,,,CO_TFTU000023,PICK,,,,,,,,,
9,2024-11-14 10:31:07,INFO,TO_CO_TFTU000023,,SC002,CO_TFTU000023,DROP,2024-11-14 10:36:03+01:00,2024-11-14 10:38:28+01:00,2024-11-14 10:38:28+01:00,2024-11-14 10:39:28+01:00,,,,,


In [118]:
event_log = []
for pattern, log_data in parsed_logs:
    if pattern == "container_submission":
        co_id = log_data["to_id"].removeprefix("TO_")
        submission_start = {
            "to_id": log_data["to_id"],
            "co_id": co_id,
            "action": "submission",
            "timestamp": log_data["submission_time"],
            "lifecycle": "start"
        }
        submission_end = {
            "to_id": log_data["to_id"],
            "co_id": co_id,
            "action": "submission",
            "timestamp": log_data["submission_time"],
            "lifecycle": "complete"
        }
        event_log.extend([submission_start, submission_end])
    elif pattern == "schedule":
        for schedule_data in log_data["schedules"]:
            to_id, co_id, action = schedule_data[:3]
            event_log.append({
                "to_id": to_id,
                "co_id": co_id,
                "vehicle_id": None,
                "action": "pick container" if action == "PICK" else "drop container",
                "timestamp": log_data["log_time"],
                "lifecycle": "schedule"
            })
    elif pattern == "travel_action":
        travel_start = {
            "to_id": log_data["to_id"],
            "co_id": log_data["co_id"],
            "vehicle_id": log_data["vehicle_id"],
            "action": "deploy vehicle to pick container" if log_data["action"] == "PICK" else "deploy vehicle to drop container",
            "timestamp": log_data["travel_start_time"],
            "lifecycle": "start"
        }
        travel_end = {
            "to_id": log_data["to_id"],
            "co_id": log_data["co_id"],
            "vehicle_id": log_data["vehicle_id"],
            "action": "deploy vehicle to pick container" if log_data["action"] == "PICK" else "deploy vehicle to drop container",
            "timestamp": log_data["travel_start_time"],
            "lifecycle": "complete"
        }
        action_start = {
            "to_id": log_data["to_id"],
            "co_id": log_data["co_id"],
            "vehicle_id": log_data["vehicle_id"],
            "action": "pick container" if log_data["action"] == "PICK" else "drop container",
            "timestamp": log_data["action_start_time"],
            "lifecycle": "start"
        }
        action_end = {
            "to_id": log_data["to_id"],
            "co_id": log_data["co_id"],
            "vehicle_id": log_data["vehicle_id"],
            "action": "pick container" if log_data["action"] == "PICK" else "drop container",
            "timestamp": log_data["action_end_time"],
            "lifecycle": "complete"
        }
        event_log.extend([travel_start, travel_end, action_start, action_end])



In [123]:
event_log_df = pd.DataFrame(event_log)
event_log_df.groupby("co_id", as_index=False).apply(lambda x: x.sort_values('timestamp'))

  event_log_df.groupby("co_id", as_index=False).apply(lambda x: x.sort_values('timestamp'))


Unnamed: 0,Unnamed: 1,to_id,co_id,action,timestamp,lifecycle,vehicle_id
0,0,TO_CO_TFTU000001,CO_TFTU000001,submission,2024-11-14 10:26:28+01:00,start,
0,1,TO_CO_TFTU000001,CO_TFTU000001,submission,2024-11-14 10:26:28+01:00,complete,
0,162,TO_CO_TFTU000001,CO_TFTU000001,deploy vehicle to pick container,2024-11-14 10:27:00+01:00,start,SC004
0,163,TO_CO_TFTU000001,CO_TFTU000001,deploy vehicle to pick container,2024-11-14 10:27:00+01:00,complete,SC004
0,116,TO_CO_TFTU000001,CO_TFTU000001,pick container,2024-11-14 10:27:07,schedule,
...,...,...,...,...,...,...,...
285,27304,TO_CO_TFTU000286,CO_TFTU000286,pick container,2024-11-14 11:33:48+01:00,complete,SC019
285,26469,TO_CO_TFTU000286,CO_TFTU000286,submission,2024-11-14 11:34:28+01:00,complete,
285,26468,TO_CO_TFTU000286,CO_TFTU000286,submission,2024-11-14 11:34:28+01:00,start,
285,28399,TO_CO_TFTU000286,CO_TFTU000286,drop container,2024-11-14 11:35:34+01:00,start,SC019


In [60]:
vehicles_meta_df = pd.read_excel("../data/VOSimu-InputInformation.xlsx", sheet_name="Vehicles")
locations_meta_df = pd.read_excel("../data/VOSimu-InputInformation.xlsx", sheet_name="Locations")
co_meta_df = pd.read_excel("../data/VOSimu-InputInformation.xlsx", sheet_name="ContainerOrders")

Pipeline Overview:

A vehicle is assigned to a specific container. The vehicle drive from its original location to pick up the container. Once picked up, the container is taken all way to its destination and dropped by the vehicle.