In [20]:
import glob
import json

log_folder = "assets/sessions/"

log_files = sorted(
  glob.glob(log_folder + "*movies.json"), key=lambda x: int(x.split("p")[1].split("-")[0])
)
logs = [json.load(open(log_file, "r")) for log_file in log_files]

In [21]:
len(logs)

14

In [22]:
# MARKS
one = "M20 10 A10 10 0 1 1 0 10 A10 10 0 1 1 20 10"
two = "M12 6C12 9.31371 9.31371 12 6 12C2.68629 12 0 9.31371 0 6C0 2.68629 2.68629 0 6 0C9.31371 0 12 2.68629 12 6Z M20 14C20 17.3137 17.3137 20 14 20C10.6863 20 8 17.3137 8 14C8 10.6863 10.6863 8 14 8C17.3137 8 20 10.6863 20 14Z"
three = "M10 14C10 16.7614 7.76142 19 5 19C2.23858 19 0 16.7614 0 14C0 11.2386 2.23858 9 5 9C7.76142 9 10 11.2386 10 14Z M20 14C20 16.7614 17.7614 19 15 19C12.2386 19 10 16.7614 10 14C10 11.2386 12.2386 9 15 9C17.7614 9 20 11.2386 20 14Z M15 5.40002C15 8.16145 12.7614 10.4 10 10.4C7.23858 10.4 5 8.16145 5 5.40002C5 2.6386 7.23858 0.400024 10 0.400024C12.7614 0.400024 15 2.6386 15 5.40002Z"

In [23]:
log_files

['assets/sessions/p1-movies.json',
 'assets/sessions/p2-movies.json',
 'assets/sessions/p3-movies.json',
 'assets/sessions/p4-movies.json',
 'assets/sessions/p5-movies.json',
 'assets/sessions/p6-movies.json',
 'assets/sessions/p7-movies.json',
 'assets/sessions/p8-movies.json',
 'assets/sessions/p9-movies.json',
 'assets/sessions/p10-movies.json',
 'assets/sessions/p11-movies.json',
 'assets/sessions/p12-movies.json',
 'assets/sessions/p13-movies.json',
 'assets/sessions/p14-movies.json']

In [24]:
[len(l) for l in logs]

[35, 40, 29, 41, 48, 29, 50, 50, 64, 13, 67, 49, 55, 17]

In [25]:
pid_logs = [
  {"pid": f"P{i+1}", "idx": j, **l} for i, log in enumerate(logs) for j, l in enumerate(log[:-1])
]

In [26]:
import altair as alt
import pandas as pd

df = pd.DataFrame(pid_logs)
df

Unnamed: 0,pid,idx,title,n_fields,preferred,generatedBy
0,P1,0,IMDB 투표수,1,False,
1,P1,1,개봉일,1,False,
2,P1,2,로튼 토마토 평점,1,False,scroll
3,P1,3,IMDB 평점,1,False,scroll
4,P1,4,영화 관람 등급,1,False,scroll
...,...,...,...,...,...,...
568,P14,11,개봉일 & 로튼 토마토 평점,2,False,scroll
569,P14,12,제작 예산 ($) & 영화 관람 등급,2,False,scroll
570,P14,13,제작 예산 ($) & 로튼 토마토 평점,2,False,scroll
571,P14,14,제작 예산 ($) & IMDB 평점,2,False,scroll


In [27]:
#  show row that generatedBy is NaN  in df["generatedBy"]
pd.set_option("display.max_rows", 100)
df[df["generatedBy"].isna()]

# for generatedBy is NaN, if idx is 0, than set to 'auto', if 1, than set to 'scroll'
df.loc[(df["generatedBy"].isna()) & (df["idx"] == 0), "generatedBy"] = "scroll"
df.loc[(df["generatedBy"].isna()) & (df["idx"] == 1), "generatedBy"] = "scroll"

df

Unnamed: 0,pid,idx,title,n_fields,preferred,generatedBy
0,P1,0,IMDB 투표수,1,False,scroll
1,P1,1,개봉일,1,False,scroll
2,P1,2,로튼 토마토 평점,1,False,scroll
3,P1,3,IMDB 평점,1,False,scroll
4,P1,4,영화 관람 등급,1,False,scroll
...,...,...,...,...,...,...
568,P14,11,개봉일 & 로튼 토마토 평점,2,False,scroll
569,P14,12,제작 예산 ($) & 영화 관람 등급,2,False,scroll
570,P14,13,제작 예산 ($) & 로튼 토마토 평점,2,False,scroll
571,P14,14,제작 예산 ($) & IMDB 평점,2,False,scroll


In [70]:
import altair as alt
from numpy import size

alt.Chart(df).mark_point(
  filled=True,
  size=1,
  xOffset=-5,
  yOffset=-5,
).encode(
  x=alt.X(
    "idx:O", title="Chart Index", axis=alt.Axis(values=list(range(0, df["idx"].max() + 1, 5)))
  ),
  y=alt.Y("pid:N", title="Participants", sort=[f"P{i+1}" for i in range(14)]),
  shape=alt.Shape(
    "n_fields:O",
    scale=alt.Scale(domain=[1, 2, 3], range=[one, two, three]),
    title="Number of Attributes",
    legend=alt.Legend(
      title="Number of Attributes",
      symbolSize=5,
      symbolOffset=20,
      labelOffset=0,
      labelAlign="left",
    ),
  ),
  stroke=alt.condition(
    alt.datum.preferred == True,
    alt.value("#C53130"),  # preferred가 True일 때 검은색 stroke
    alt.value(None),  # preferred가 False일 때 stroke 없음
  ),
  strokeWidth=alt.condition(
    alt.datum.preferred == True,
    alt.value(2),  # preferred가 True일 때 stroke 두께
    alt.value(0),  # preferred가 False일 때 stroke 없음
  ),
  color=alt.Color(
    "generatedBy:N",
    title="Generated By",
    scale=alt.Scale(domain=["scroll", "append"], range=["#888888", "#F58518"]),
  ),
).properties(
  width=800,
)
