In [42]:
from datetime import datetime
import plotly.express as px
import pandas as pd

def load_logs(log_file: str):

    with open(log_file, "r") as f:
        lines = f.readlines()

        logs = []
        for line in lines:
            if "detected at" not in line:
                continue

            bee_id = int(line.split("=")[1].split(" ")[0])
            timestamp = datetime.strptime(line.split(" ")[-1].strip(), "%H:%M:%S")
            logs.append({
                "bee_id": bee_id,
                "timestamp": timestamp
            })

    return pd.DataFrame(logs)

def parse_log(log, verbose=True) -> pd.DataFrame:

    with open(log, "r") as f:
        lines = f.readlines()

    logs = []

    for line in lines:
        if "detected at" not in line:
            continue

        bee_id = int(line.split("=")[1].split(" ")[0])
        timestamp_to_parse = line.split(" ")[-1].strip()

        ####################
        # clean timestamps #
        ####################
        
        if len(timestamp_to_parse) < 5:
            # there just isn't enough to parse
            if verbose:
                print("Dropping row with insufficient timestamp:", timestamp_to_parse)
            continue

        edited_timestamp = False

        if ":" not in timestamp_to_parse:
            # add missing colons
            timestamp_to_parse = timestamp_to_parse[:2] + ":" + timestamp_to_parse[2:4] + ":" + timestamp_to_parse[4:]
            edited_timestamp = True

        if timestamp_to_parse.count(":") == 1:
            edited_timestamp = True
            chars_after_colon = timestamp_to_parse.split(":")[1]
            if len(chars_after_colon) == 3:
                # add zero
                chars_after_colon = chars_after_colon[:2] + ":" + chars_after_colon[2] + "0"
            elif len(chars_after_colon) == 2:
                # add missing seconds
                chars_after_colon = chars_after_colon + ":00"
            elif len(chars_after_colon) == 1:
                # add missing seconds and minutes
                chars_after_colon = "0:00" + chars_after_colon
            else:
                chars_after_colon = chars_after_colon[:2] + ":" + chars_after_colon[2:]

            timestamp_to_parse = timestamp_to_parse.split(":")[0] + ":" + chars_after_colon
        
        # parse
        timestamp_to_parse = timestamp_to_parse[:8]
        try:
            timestamp = datetime.strptime(timestamp_to_parse, "%H:%M:%S")
        except ValueError:
            if verbose:
                print("Unable to parse timestamp:", timestamp_to_parse)
            continue

        logs.append({
            "bee_id": bee_id,
            "timestamp": timestamp,
            "timestamp was edited": edited_timestamp
        })
        
    df = pd.DataFrame(logs)
    df["timestamp"] = df["timestamp"].dt.strftime('%H:%M:%S')
    return df


def plot_timestamps_per_BI(timestamps, renderer="browser"):
    fig = px.scatter(
        timestamps, 
        x="timestamp", 
        y="bee_id", 
        color="timestamp was edited", 
        title="Timestamps per BI", 
        render_mode=renderer,)
    fig.update_xaxes(showgrid=False)
    fig.update_yaxes(showgrid=False, 
                    zeroline=True, zerolinecolor='black', zerolinewidth=3)
    fig.update_layout(height=1000, plot_bgcolor='white')
    fig.show(renderer=renderer)

In [45]:
with open("./logs/12pm_log.txt", "r") as f:
        lines = f.readlines()

In [48]:
logs = parse_log("./logs/12pm_log.txt", verbose=False)
logs

Unnamed: 0,bee_id,timestamp,timestamp was edited
0,30,12:00:05,True
1,30,12:00:05,True
2,34,12:00:50,True
3,34,12:00:50,True
4,35,12:00:52,False
...,...,...,...
4461,24,13:00:41,False
4462,16,13:00:43,False
4463,16,13:00:43,False
4464,16,13:00:45,False


In [49]:
plot_timestamps_per_BI(logs)