In [27]:
import glob
import json

log_folder = "assets/logs/"
log_files = glob.glob(log_folder + "*.json")
log_files = [log for log in log_files if "p4" in log]
logs = [json.load(open(log_file, "r")) for log_file in log_files]

In [28]:
def process_logs(logs: list[dict]):
  start_time = logs[0]["timestamp"]
  processed_logs = []

  for i, log in enumerate(logs):
    if log["event"] != "state":
      name = log["key"]
      value = None
      split_name = name.split("-")
      if "-" in name and len(split_name) > 1:
        name = split_name[0]
        value = split_name[1]

      if log["event"] == "scroll" and name == "chart_container":
        value = log["data"]["chartIndex"]

      if name == "prefer_chart":
        value = logs[i + 1]["data"][0]
        value = {
          "chartIndex": value["path"].split(".")[1],
          "preferred": value["value"],
        }

      processed_logs.append(
        {
          "pid": log["pid"],
          "timestamp": log["timestamp"] - start_time,
          "type": log["event"],
          "name": name,
          "value": value,
          "states": [],
        }
      )

    else:
      processed_logs[-1]["states"].append(log)

  return processed_logs


def extract_logs(log: list[dict], participant_id: int):
  splitted_logs = []
  start_idx = 0

  for i in range(len(log)):
    if log[i]["key"] == "load_demo":
      splitted_logs.append(log[start_idx:i])
      start_idx = i

  splitted_logs.append(log[start_idx:])
  filtered_logs = [log for log in splitted_logs if len(log) > 50]
  cars = [
    filtered_log
    for filtered_log in filtered_logs
    if filtered_log[1]["key"] == "data_store"
    and filtered_log[1]["data"][0]["value"] == "cars_ko.json"
  ][-1]

  cars = [{**car, "pid": f"P{participant_id}"} for car in cars]

  movies = [
    filtered_log
    for filtered_log in filtered_logs
    if filtered_log[1]["key"] == "data_store"
    and filtered_log[1]["data"][0]["value"] == "movies_ko.json"
  ][-1]

  movies = [{**movie, "pid": f"P{participant_id}"} for movie in movies]

  return cars, movies

In [29]:
cars_logs = [extract_logs(log, idx)[0] for idx, log in enumerate(logs)]
movies_logs = [extract_logs(log, idx)[1] for idx, log in enumerate(logs)]

In [30]:
processed_cars_logs = [process_logs(log) for log in cars_logs]
processed_movies_logs = [process_logs(log) for log in movies_logs]


In [31]:
processed_movies_logs

[[{'pid': 'P0',
   'timestamp': 0,
   'type': 'click',
   'name': 'load_demo',
   'value': None,
   'states': [{'timestamp': 1732604986700,
     'key': 'data_store',
     'event': 'state',
     'data': [{'path': 'filename', 'value': 'movies_ko.json'}],
     'version': 1,
     'pid': 'P0'},
    {'timestamp': 1732604986719,
     'key': 'session_store',
     'event': 'state',
     'data': [{'path': 'filename', 'value': 'movies_ko.json'}],
     'version': 1,
     'pid': 'P0'},
    {'timestamp': 1732604987060,
     'key': 'session_store',
     'event': 'state',
     'data': [{'path': 'charts.0',
       'value': {'title': '개봉일',
        'preferred': False,
        'timestamp': 1732604986462}},
      {'path': 'charts.1',
       'value': {'title': 'IMDB 평점',
        'preferred': False,
        'timestamp': 1732604986465}},
      {'path': 'currentChartIndex', 'value': 0}],
     'version': 1,
     'pid': 'P0'},
    {'timestamp': 1732604987115,
     'key': 'interaction_store',
     'event': 'stat

In [32]:
def extract_charts_from_log(log_data):
  # Dictionary to store charts with index as key
  charts = {}

  # Go through each event in the log
  for event in log_data:
    # Check states in each event
    if "states" in event:
      for state in event["states"]:
        # Check if there's data in the state
        if "data" in state:
          for data_item in state["data"]:
            # Extract path and check if it starts with 'charts'
            path = data_item.get("path", "")
            if path.startswith("charts."):
              # Parse the chart index and remaining path
              path_parts = path.split(".")
              if len(path_parts) < 2:
                continue

              chart_idx = path_parts[1]

              # Handle the case where it's updating a specific field
              if len(path_parts) > 2:
                # If this is a preferred update
                if path_parts[2] == "preferred":
                  if chart_idx in charts:
                    charts[chart_idx]["preferred"] = data_item["value"]
                continue

              # If it's a full chart value, update or add it
              if "value" in data_item:
                charts[chart_idx] = data_item["value"]

  # Convert dictionary to list, sorting by index
  chart_list = []
  for idx in sorted(charts.keys(), key=lambda x: int(x)):
    chart_list.append(charts[idx])

  return chart_list


def process_charts(chart_list):
  # Process charts to match desired output format
  processed_charts = []
  for chart in chart_list:
    # Count number of fields based on & in title
    n_fields = chart["title"].count("&") + 1

    processed_chart = {
      "title": chart["title"],
      "n_fields": n_fields,
      "preferred": chart.get("preferred", False),
      "generatedBy": chart.get("generatedBy", None),
    }
    processed_charts.append(processed_chart)

  return processed_charts


# Example usage:
# raw_charts = extract_charts_from_log(log_data)
# final_charts = process_charts(raw_charts)


In [33]:
print(len(processed_movies_logs[0]))

c = extract_charts_from_log(processed_movies_logs[0])
cc = process_charts(c)

json.dump(cc, open("assets/sessions/p4-movies.json", "w"), indent=2, ensure_ascii=False)

108
