## Timestamp

In [None]:
import cv2
import json
import numpy as np
import re
import torch

from datetime import datetime
from os import listdir, makedirs, path
from transformers import TrOCRProcessor, VisionEncoderDecoderModel

CAMERA_DB_PATH = "./metadata/cameras.json"

VIDEO_PATH = "../../vids/0801-1152"
VIDEO_DATA_PATH = "./metadata/0801-1152-crop-64"

OCR_MODEL = 'microsoft/trocr-large-printed'

torch.cuda.empty_cache()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = TrOCRProcessor.from_pretrained(OCR_MODEL)
model = VisionEncoderDecoderModel.from_pretrained(OCR_MODEL).to(device)

In [None]:
with open(CAMERA_DB_PATH, "r") as f:
  camera_data = json.load(f)

In [None]:
DATETIME_PATTERN = r'([0-9]{1,2})[ -/:]([0-9]{1,2})[ -/:](202[0-9])[ -/:]([0-9]{1,2})[ -/:]?([0-9]{1,2})[ -/:]?([0-9]{1,2})'
TIME_PATTERN = r'([0-9]{2}):([0-9]{2}):([0-9]{2})'
DATETIME_FORMAT = '%d%m%Y%H%M%S%z'

ERROR_DATETIME = ["01", "01", "2025", "00", "00", "00"]

def string_to_timestamp(datetime_string):
  datetime_string = re.sub(r"[@CDOQcdo]", "0", datetime_string)
  try:
    matches = list(re.search(DATETIME_PATTERN, datetime_string).groups())
  except:
    try:
      matches = ["08", "01", "2023"] + list(re.search(TIME_PATTERN, datetime_string).groups())
    except:
      matches = ERROR_DATETIME

  matches = [('00'+m)[-2:] for m in matches]
  matches[2] = ('20'+matches[2])[-4:]
  matches[2] = re.sub(r"202[0-9]", r"2023", matches[2])
  matches[4] = re.sub(r"8([0-9])", r"3\1", matches[4])
  matches[5] = re.sub(r"8([0-9])", r"3\1", matches[5])
  with_utc_offset = "".join(matches) + "-0300"

  try:
    dt = datetime.strptime(with_utc_offset, DATETIME_FORMAT)
  except:
    with_utc_offset = ''.join(ERROR_DATETIME) + "-0300"
    dt = datetime.strptime(with_utc_offset, DATETIME_FORMAT)

  return int(dt.timestamp())

In [None]:
class Stamp:
  def __init__(self, timestamp, seconds):
    self.timestamp = timestamp
    self.seconds = seconds
  def __str__(self):
    return self.stamp().__str__()
  def stamp(self):
    return [self.timestamp, self.seconds]

In [None]:
def get_frames(vid, frame, n=7, step=1):
  frame_count = vid.get(cv2.CAP_PROP_FRAME_COUNT)
  start = frame - int(n / 2) * step
  start = max(0, start)
  start = min(start, frame_count - n * step)

  frames = []
  for i in range(n):
    vid.set(cv2.CAP_PROP_POS_FRAMES, start + i * step)
    _, frame = vid.read()
    frames.append(frame)
  return frames

In [None]:
def get_max_count(txts):
  counts = {}
  for txt in txts:
    counts[txt] = counts.get(txt, 0) + 1
  by_count = sorted([[k,v] for k,v in counts.items()], key=lambda x: x[1], reverse=True)
  return by_count[0][0]

In [None]:
def ocr(imgs, groups=1):
  pixel_values = processor(images=imgs, return_tensors="pt").pixel_values.to(device)
  generated_ids = model.generate(pixel_values)
  generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)
  generated_groups = np.array(generated_text).reshape(groups, -1)
  return [get_max_count(txts) for txts in generated_groups]

In [None]:
def crop_frame(frame, crop_x0, crop_x1, crop_y0, crop_y1):
  return frame[crop_y0:crop_y1, crop_x0:crop_x1]

In [None]:
morph_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 1))

def clean_frame(frame, crop_x0, crop_x1, crop_y0, crop_y1):
  crop = frame[crop_y0:crop_y1, crop_x0:crop_x1]
  _, thresh = cv2.threshold(cv2.cvtColor(crop, cv2.COLOR_RGB2GRAY), 190, 255, cv2.THRESH_BINARY)
  inv_er_di = cv2.dilate(cv2.erode(cv2.bitwise_not(thresh), morph_kernel), morph_kernel)
  rgb = cv2.cvtColor(inv_er_di, cv2.COLOR_GRAY2RGB)
  return rgb

In [None]:
def get_stamps(vid, keyframes, prefun=crop_frame):
  width = vid.get(cv2.CAP_PROP_FRAME_WIDTH)
  height = vid.get(cv2.CAP_PROP_FRAME_HEIGHT)

  crop_x0 = int(0.58 * width)
  crop_x1 = crop_x0 + int(0.4 * width)
  crop_y0 = int(0.04 * height)
  crop_y1 = crop_y0 + int(0.04 * height)

  fps = vid.get(cv2.CAP_PROP_FPS)
  frame_seconds = [int(frame // fps) for frame in keyframes]

  frames = [get_frames(vid, frame, n=5, step=2) for frame in keyframes]
  ocr_frames = [f for fs in frames for f in fs]
  imgs = [prefun(frame, crop_x0, crop_x1, crop_y0, crop_y1) for frame in ocr_frames]

  dt_str = ocr(imgs, groups=len(keyframes))

  return [Stamp(string_to_timestamp(s), t) for s,t in zip(dt_str, frame_seconds)]

In [None]:
init_dt_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

for io_dir in sorted(camera_data.keys()):
  input_dir_path = path.join(VIDEO_PATH, io_dir)
  output_dir_path = path.join(VIDEO_DATA_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("mp4")])
  makedirs(output_dir_path, exist_ok=True)

  print(io_dir)
  for io_file in input_files:
    input_file_path = path.join(input_dir_path, io_file)
    file_data_out_path = path.join(output_dir_path, io_file.replace("mp4", "json"))

    if path.exists(file_data_out_path):
      continue

    print("processing:", io_file)

    file_data = {
      "name": io_file,
      "camera": io_dir,
    }

    vid = None
    if not ("length_seconds" in file_data and "length_frames" in file_data):
      if vid is None:
        vid = cv2.VideoCapture(input_file_path)

      fps = vid.get(cv2.CAP_PROP_FPS)
      if not fps > 0:
        continue

      length_frames = vid.get(cv2.CAP_PROP_FRAME_COUNT)
      file_data["length_frames"] = int(length_frames)
      file_data["length_seconds"] = int(length_frames // fps)

    if not ("time_start" in file_data and "time_end" in file_data):
      if vid is None:
        vid = cv2.VideoCapture(input_file_path)

      fps = vid.get(cv2.CAP_PROP_FPS)
      length_frames = vid.get(cv2.CAP_PROP_FRAME_COUNT)
      last_frame = length_frames - 1
      length_seconds_fps = int(length_frames // fps)

      num_keyframes = min(64, length_seconds_fps)
      keyframes_per_group = min(16, length_seconds_fps)
      keyframes_per_group = keyframes_per_group + 1 if (num_keyframes % keyframes_per_group) == 0 else keyframes_per_group
      keyframe_idx_groups = [range(i, i + keyframes_per_group) for i in range(0, num_keyframes, keyframes_per_group)]
      keyframe_groups = [[int(i * last_frame / num_keyframes) for i in kfs if i <= num_keyframes] for kfs in keyframe_idx_groups]

      stamps = []
      for kfs in keyframe_groups:
        stamps = stamps + get_stamps(vid, kfs, prefun=crop_frame)

      file_data["time_start"] = stamps[0].timestamp
      file_data["time_end"] = stamps[-1].timestamp
      file_data["continuous"] = abs((stamps[-1].timestamp - stamps[0].timestamp) - length_seconds_fps) < 2
      file_data["seek"] = [s.stamp() for s in stamps]

    if vid is not None:
      vid.release()

    with open(file_data_out_path, "w") as f:
      json.dump(file_data, f)

end_dt_str = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

print("DONE!", init_dt_str, end_dt_str)

## Timestamp Post-Processing

In [None]:
import json

from datetime import datetime, timedelta, timezone
from os import listdir, path

CAMERA_DB_PATH = "./metadata/cameras.json"
VIDEO_DATA_PATH = "./metadata/0801-1152-crop-64"

VIDEO_DB_PATH = path.join(VIDEO_DATA_PATH, "videos.json")
SEEK_DB_PATH = path.join(VIDEO_DATA_PATH, "seek.json")

TZ_BR = timezone(timedelta(hours=-3))

ERROR_DATETIME = datetime.strptime("01012025000000-0300", '%d%m%Y%H%M%S%z')
MIN_DATETIME = datetime.strptime("07012023230000-0300", '%d%m%Y%H%M%S%z')
MAX_DATETIME = datetime.strptime("09012023010000-0300", '%d%m%Y%H%M%S%z')

ERROR_TIMESTAMP = int(ERROR_DATETIME.timestamp())
MIN_TIMESTAMP = int(MIN_DATETIME.timestamp())
MAX_TIMESTAMP = int(MAX_DATETIME.timestamp())

In [None]:
with open(CAMERA_DB_PATH, "r") as f:
  camera_data = json.load(f)

In [None]:
camera_ranges = {}
file_seeks = {}
videos_info = {}

for io_dir in sorted(camera_data.keys()):
  input_dir_path = path.join(VIDEO_DATA_PATH, io_dir)
  input_files = sorted([f for f in listdir(input_dir_path) if f.endswith("json")])

  camera_ranges[io_dir] = []

  for io_file in input_files:
    input_file_path = path.join(input_dir_path, io_file)
    video_file_name = io_file.replace("json", "mp4")

    with open(input_file_path, "r") as f:
      video_data = json.load(f)

    seek_data = video_data["seek"]
    seek_data_sorted = sorted(video_data["seek"], key=lambda x: x[1])
    is_continuous = video_data["continuous"]

    if is_continuous:
      camera_ranges[io_dir].append((seek_data_sorted[0][0], seek_data_sorted[-1][0], video_file_name))
      file_seeks[video_file_name] = [seek_data_sorted[0],  seek_data_sorted[-1]]

    else:
      unique_ts_dict = {}
      for ts, s in seek_data:
        if ts == ERROR_TIMESTAMP:
          continue

        if ts < MIN_TIMESTAMP or ts > MAX_TIMESTAMP:
          mdt = datetime.fromtimestamp(ts, tz=TZ_BR).replace(year=2023, month=1, day=8)
          ts = int(mdt.timestamp())

        if ts not in unique_ts_dict:
          unique_ts_dict[ts] = []
        unique_ts_dict[ts].append(s)

      # average non-unique timestamps
      unique_ts_seek = sorted([(ts, sum(ss) // len(ss)) for ts,ss in unique_ts_dict.items()], key=lambda x:x[1])

      non_redundant_seek = unique_ts_seek[:1]
      for ts, s in unique_ts_seek[1:]:
        ts0, s0 = non_redundant_seek[-1]
        if ((ts - ts0) != (s - s0)) or ts == unique_ts_seek[-1][0]:
          non_redundant_seek.append((ts,s))

      camera_ranges[io_dir].append((non_redundant_seek[0][0], non_redundant_seek[-1][0], video_file_name))
      file_seeks[video_file_name] = non_redundant_seek

    video_data["seek"] = file_seeks[video_file_name]
    videos_info[video_file_name] = video_data

  camera_ranges[io_dir].sort(key=lambda x: x[1], reverse=True)
  # TODO: calculate and print overlapping ranges

In [None]:
print(json.dumps(camera_ranges, separators=(',',':')).replace("]],", "]],\n"))

In [None]:
print(json.dumps(file_seeks, separators=(',',':')).replace("]],", "]],\n"))

In [None]:
print(json.dumps(videos_info, separators=(',',':')).replace("]]},", "]]},\n"))

In [None]:
seek_info = {
  "ranges": camera_ranges,
  "seeks": file_seeks
}

In [None]:
with open(VIDEO_DB_PATH, "w") as f:
  json.dump(videos_info, f, indent=2, separators=(',',':'))

In [None]:
with open(SEEK_DB_PATH, "w") as f:
  json.dump(seek_info, f, separators=(',',':'))

### Recursive Stamping

In [None]:
def stamp_center(vid, stamp_0, stamp_1):
  diff_seconds = stamp_1.seconds - stamp_0.seconds
  diff_timestamp = stamp_1.timestamp - stamp_0.timestamp

  if (diff_seconds) > 1 and abs(diff_seconds - diff_timestamp) > 1:
    center_seconds = (stamp_1.seconds + stamp_0.seconds) / 2 + stamp_0.seconds
    center_frame = center_seconds * vid.get(cv2.CAP_PROP_FPS)

    stamp_c = get_stamps(vid, center_frame)

    left_center = [] # stamp_center(vid, stamp_0, stamp_c)
    right_center = [] # stamp_center(vid, stamp_c, stamp_1)

    return left_center + [stamp_c] + right_center
  else:
    return []