## Timestamp Analysis

### TODO:
- Look into time: 26, 15, 21*, 31*, 23, 05
- Look into date: 33, 30, 24, 25, 21*, 31*
- Look into mono: 32, 02, 15, 11

In [None]:
import json
import pandas as pd

from datetime import datetime, timedelta, timezone
from os import listdir, path

CAMERA_DB_PATH = "./metadata/cameras.json"

META_PATHS = [
  "./metadata/FULL-0801",
  "./metadata/0801-1152",
  "./metadata/0801-1152-pp0",
  "./metadata/0801-1152-crop",
  "./metadata/0801-1152-pp0-crop",
]

# makedirs(META_PATH_2, exist_ok=True)

TZ_BR = timezone(timedelta(hours=-3))

DATETIME_STR_FORMAT = '%Y-%m-%d %H:%M:%S'
DEFAULT_DATETIME_2023 = datetime.strptime("08012023000000-0300", '%d%m%Y%H%M%S%z')
ERROR_DATETIME = datetime.strptime("01012025000000-0300", '%d%m%Y%H%M%S%z')
ERROR_TIMESTAMP = int(ERROR_DATETIME.timestamp())

In [None]:
def correct_date(dt):
  correct_year = DEFAULT_DATETIME_2023.year == dt.year
  correct_month = DEFAULT_DATETIME_2023.month == dt.month
  correct_day = abs(DEFAULT_DATETIME_2023.day - dt.day) < 2
  return correct_year and correct_month and correct_day

In [None]:
def fix_dates(mseeks):
  nseeks = []
  for s in mseeks:
    nstamps = []
    for ts, f in s:
      mdt = datetime.fromtimestamp(ts, tz=TZ_BR).replace(year=2023, month=1, day=8)
      mts = int(mdt.timestamp())
      nstamps.append([mts, f])
    nseeks.append(nstamps)
  return nseeks

In [None]:
def remove_bad_times(mseeks):
  nseeks = []
  for s in mseeks:
    nstamps = []
    for ts, f in s:
      if ts != ERROR_TIMESTAMP:
        mdt = datetime.fromtimestamp(ts, tz=TZ_BR).replace(year=2023, month=1, day=8)
        mts = int(mdt.timestamp())
        nstamps.append([mts, f])
    nseeks.append(nstamps)
  return nseeks

In [None]:
def timestamp_str(ts):
  return datetime.fromtimestamp(ts, tz=TZ_BR).strftime(DATETIME_STR_FORMAT)

def datetime_str(dt):
  return dt.strftime(DATETIME_STR_FORMAT)

In [None]:
with open(CAMERA_DB_PATH, "r") as f:
  camera_data = json.load(f)

In [None]:
error_data = []

for io_dir in sorted(camera_data.keys()):
  vid_dir_paths = [path.join(dir_path, io_dir) for dir_path in META_PATHS]
  input_files = sorted([f for f in listdir(vid_dir_paths[0]) if f.endswith("json")])
  print("processing", io_dir)

  for io_file in input_files:
    input_file_paths = [path.join(file_path, io_file) for file_path in vid_dir_paths]
    input_file_objs = [open(input_file_path, "r") for input_file_path in input_file_paths]
    video_data = [json.load(fo) for fo in input_file_objs]

    mcontinuous = [vd["continuous"] for vd in video_data]

    mseeks = [vd["seek"] for vd in video_data]
    # mseeks = fix_dates(mseeks) # TRYAL
    # mseeks = remove_bad_times(mseeks) # TRYAL

    mdatetimes = [[datetime.fromtimestamp(ts, tz=TZ_BR) for (ts, _) in s] for s in mseeks]
    mtimestamps = [[ts for (ts, _) in s] for s in mseeks]
    mframes = [[f for (_, f) in s] for s in mseeks]
    stamp_counts = [len(s) for s in mseeks]

    # check if all methods have same number of timestamps
    len_errors = [len(s) != len(mseeks[0]) for s in mseeks]
    if any(len_errors):
      error_lengths = [len(s) for s in mseeks]
      print(io_file, "lengths", error_lengths, mseeks)

    # check if time == 00:00:00
    time_errors = [[ts == ERROR_TIMESTAMP for ts in tss] for tss in mtimestamps]
    time_error_counts = [sum(te) for te in time_errors]

    if any(time_error_counts):
      time_values = [[timestamp_str(ts) for ts in tss if ts == ERROR_TIMESTAMP] for tss in mtimestamps]
      print(io_file, "time", time_error_counts, time_values)

    # check if date not 2023/01/08
    date_errors = [[not correct_date(dt) for dt in dts] for dts in mdatetimes]
    date_error_counts = [sum(dte) for dte in date_errors]

    if any(date_error_counts):
      date_values = [[datetime_str(dt) for dt in dts if not correct_date(dt)] for dts in mdatetimes]
      print(io_file, "date", date_error_counts, date_values)

    # check if all methods have analyzed the same frames
    frame_errors = [[[f0 != f1 for f0, f1 in zip(fs0, fs1)] for fs1 in mframes] for fs0 in mframes]    
    frame_error_counts = [sum([sum(fe) for fe in fes]) for fes in frame_errors]

    if any(frame_error_counts):
      frame_values = [[[(f0, f1) for f0, f1 in zip(fs0, fs1) if f0 != f1] for fs1 in mframes] for fs0 in mframes]
      print(io_file, "frame", frame_error_counts, frame_values)

    # check mismatch between pairs of stamps
    stamp_errors = [[[ts0 != ts1 for ts0, ts1 in zip(tss0, tss1)] for tss1 in mtimestamps] for tss0 in mtimestamps]
    stamp_error_counts = [sum([sum(pe) for pe in pes]) for pes in stamp_errors]

    if any(stamp_error_counts):
      stamp_values = [[[(timestamp_str(ts0), timestamp_str(ts1)) for ts0, ts1 in zip(tss0, tss1) if ts0 != ts1] for tss1 in mtimestamps] for tss0 in mtimestamps]
      print(io_file, "mismatch", stamp_error_counts, stamp_values)

    # check monotonicity
    mono_errors = [[ts1 < ts0 for ts0, ts1 in zip(tss[:-1], tss[1:])] for tss in mtimestamps]
    mono_error_counts = [sum(me) for me in mono_errors]

    if any(mono_error_counts):
      mono_values = [[(timestamp_str(ts0), timestamp_str(ts1)) for ts0, ts1 in zip(tss[:-1], tss[1:]) if ts1 < ts0] for tss in mtimestamps]
      print(io_file, "monotonicity", mono_error_counts, mono_values)

    # append error data
    for i, p in enumerate(META_PATHS):
      error_data.append({
        "method": p.replace("./metadata/", ""),
        "name": io_file.replace(".json", ""),
        "camera": io_dir,
        "continuous": mcontinuous[i],
        "time-error": time_error_counts[i],
        "date-error": date_error_counts[i],
        "frame-error": frame_error_counts[i],
        "stamp-error": stamp_error_counts[i],
        "mono-error": mono_error_counts[i],
        "stamp-count": stamp_counts[i]
      })

In [None]:
error_df = pd.DataFrame.from_records(error_data)

non_error = ["method", "name", "camera"]

methods = error_df["method"].unique()
cameras = error_df["camera"].unique()
files = error_df["name"].unique()

In [None]:
by_method = {m: error_df[error_df["method"] == m] for m in methods}

for m, df in by_method.items():
  nrows = len(df.index)
  ncols = len(df.columns)
  nstamps = df["stamp-count"].sum()
  print(m, ":", nrows, "records,", nstamps, "timestamps")

  for met in ["time", "date"]:
    for pct in [75, 50, 25]:
      limit = int(pct/100 * 17)
      df.insert(ncols, f"{met}-error_{pct}", (df[f"{met}-error"] > limit).values)

  error_sums = df.drop(columns=non_error).sum().to_frame(name="sums")
  error_sums["pct"] = error_sums["sums"] / nstamps

  error_sums.loc["continuous", "pct"] = error_sums.loc["continuous", "sums"] / nrows

  for met in ["time", "date"]:
    for pct in [75, 50, 25]:
      error_sums.loc[f"{met}-error_{pct}", "pct"] = error_sums.loc[f"{met}-error_{pct}", "sums"] / nrows

  print(error_sums, "\n")

In [None]:
by_method_camera = [error_df[(error_df["method"] == m) & (error_df["camera"] == c)] for m in methods for c in cameras]

by_camera_df = pd.DataFrame()

for df in by_method_camera:
  m = df["method"].values[0]
  c = df["camera"].values[0]
  nrows = len(df.index)
  ncols = len(df.columns)
  nstamps = df["stamp-count"].sum()

  msum = df.drop(columns=non_error).sum()
  mpct = pd.Series(msum / nstamps).add_suffix("-pct")
  mrow = pd.concat((pd.Series([m,c], index=["method","camera"]), msum, mpct))
  mframe = pd.DataFrame([mrow.values], columns=mrow.index.values).drop(columns=["stamp-count-pct"])

  by_camera_df = pd.concat((by_camera_df, mframe))

In [None]:
by_camera_df.sort_values("time-error-pct", ascending=False)[:10]

In [None]:
by_camera_df.sort_values("date-error-pct", ascending=False).head()

In [None]:
by_camera_df.sort_values("stamp-error-pct", ascending=False)[:10]

In [None]:
by_camera_df.sort_values("mono-error-pct", ascending=False)[:10]

In [None]:
by_camera_pp0_df = by_camera_df[by_camera_df["method"] == "0801-1152-pp0"]
by_camera_pp0_df = by_camera_pp0_df.drop(columns=["method", "date-error", "frame-error"])

In [None]:
by_camera_pp0_df.sort_values("time-error-pct", ascending=False)[:10]

In [None]:
by_camera_pp0_df.sort_values("date-error-pct", ascending=False)[:10]

In [None]:
by_camera_pp0_df.sort_values("mono-error-pct", ascending=False)[:10]