In [1]:
import glob
import json

log_folder = "assets/logs/"
log_files = glob.glob(log_folder + "*.json")
logs = [json.load(open(log_file, "r")) for log_file in log_files]

In [2]:
def process_logs(logs: list[dict]):
  start_time = logs[0]["timestamp"]
  processed_logs = []

  for i, log in enumerate(logs):
    if log["event"] != "state":
      name = log["key"]
      value = None
      split_name = name.split("-")
      if "-" in name and len(split_name) > 1:
        name = split_name[0]
        value = split_name[1]

      if log["event"] == "scroll" and name == "chart_container":
        value = log["data"]["chartIndex"]

      if name == "prefer_chart":
        value = logs[i + 1]["data"][0]
        value = {
          "chartIndex": value["path"].split(".")[1],
          "preferred": value["value"],
        }

      processed_logs.append(
        {
          "pid": log["pid"],
          "timestamp": log["timestamp"] - start_time,
          "type": log["event"],
          "name": name,
          "value": value,
          "states": [],
        }
      )

    else:
      processed_logs[-1]["states"].append(log)

  return processed_logs


def extract_logs(log: list[dict], participant_id: int):
  splitted_logs = []
  start_idx = 0

  for i in range(len(log)):
    if log[i]["key"] == "load_demo":
      splitted_logs.append(log[start_idx:i])
      start_idx = i

  splitted_logs.append(log[start_idx:])
  filtered_logs = [log for log in splitted_logs if len(log) > 50]
  cars = [
    filtered_log
    for filtered_log in filtered_logs
    if filtered_log[1]["key"] == "data_store"
    and filtered_log[1]["data"][0]["value"] == "cars_ko.json"
  ][-1]

  cars = [{**car, "pid": f"P{participant_id}"} for car in cars]

  movies = [
    filtered_log
    for filtered_log in filtered_logs
    if filtered_log[1]["key"] == "data_store"
    and filtered_log[1]["data"][0]["value"] == "movies_ko.json"
  ][-1]

  movies = [{**movie, "pid": f"P{participant_id}"} for movie in movies]

  return cars, movies

In [3]:
cars_logs = [extract_logs(log, idx)[0] for idx, log in enumerate(logs)]
movies_logs = [extract_logs(log, idx)[1] for idx, log in enumerate(logs)]

In [4]:
processed_cars_logs = [process_logs(log) for log in cars_logs]
processed_movies_logs = [process_logs(log) for log in movies_logs]

In [5]:
from pprint import pprint


pprint(processed_cars_logs[0])

[{'name': 'load_demo',
  'pid': 'P0',
  'states': [{'data': [{'path': 'filename', 'value': 'cars_ko.json'}],
              'event': 'state',
              'key': 'data_store',
              'pid': 'P0',
              'timestamp': 1732523449104,
              'version': 1},
             {'data': [{'path': 'filename', 'value': 'cars_ko.json'}],
              'event': 'state',
              'key': 'session_store',
              'pid': 'P0',
              'timestamp': 1732523449118,
              'version': 1},
             {'data': [{'path': 'charts.0',
                        'value': {'preferred': False,
                                  'timestamp': 1732523449149,
                                  'title': '연비'}},
                       {'path': 'charts.1',
                        'value': {'preferred': False,
                                  'timestamp': 1732523449151,
                                  'title': '실린더 수'}},
                       {'path': 'currentChartIndex', 'value': 