In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import tikzplotlib


plt.rc("text", usetex=True)
plt.rc("text.latex", preamble="""\\usepackage{amsmath}""")
plt.style.use("fivethirtyeight")
# set background color to white of the figure and axes
plt.rcParams["figure.facecolor"] = "white"
plt.rcParams["axes.facecolor"] = "white"

# def tikzplotlib_fix_ncols(obj):
#     """
#     workaround for matplotlib 3.6 renamed legend's _ncol to _ncols, which breaks tikzplotlib
#     """
#     if hasattr(obj, "_ncols"):
#         obj._ncol = obj._ncols
#     for child in obj.get_children():
#         tikzplotlib_fix_ncols(child)


# https://github.com/nschloe/tikzplotlib/issues/567
# from matplotlib.lines import Line2D
# from matplotlib.legend import Legend

# Line2D._us_dashSeq = property(lambda self: self._dash_pattern[1])
# Line2D._us_dashOffset = property(lambda self: self._dash_pattern[0])
# Legend._ncol = property(lambda self: self._ncols)

import seaborn as sns

palette = sns.color_palette('pastel').as_hex()
def adjust_lightness(color, amount=1.1):
    import matplotlib.colors as mc
    import colorsys

    try:
        c = mc.cnames[color]
    except:
        c = color
    c = colorsys.rgb_to_hls(*mc.to_rgb(c))
    return colorsys.hls_to_rgb(c[0], np.maximum(0, np.minimum(1, amount * c[1])), c[2])

plt.rcParams.update({'errorbar.capsize': 2})

In [2]:
df=pd.read_csv("./private/OnSET SUS (responses).csv")
df

Unnamed: 0,User ID,User display name,Timestamp,Participant ID,I think that I would like to use OnSET frequently.,I found OnSET unnecessarily complex.,I thought OnSET was easy to use.,I think that I would need the support of a technical person to be able to use OnSET.,I found the various functions in OnSET were well integrated.,I thought there was too much inconsistency in OnSET.,...,I needed to learn a lot of things before I could get going with OnSET.,Background,Task 0 Time,Task 0 Success,Task 1 Time,Task 1 Success,Task 2 Time,Task 2 Success,Task 3 Time,Task 3 Success
0,,Anonymous user,2025-06-03T10:42:18+02:00,IT28,3,1 - Strongly Disagree,4,4,3,1 - Strongly Disagree,...,2,CS Background (e.g. studying informatics),08:30,Correct Links; Correct Nodes,03:40,Correct Links; Correct Nodes,02:40,Fully Correct; Correct Nodes; Correct Links,00:40,Correct Nodes; Correct Links; Fully Correct
1,,Anonymous user,2025-06-03T12:00:40+02:00,SR01,4,2,4,1 - Strongly Disagree,5 - Strongly agree,2,...,1 - Strongly Disagree,CS Background (e.g. studying informatics),01:40,Fully Correct; Correct Nodes; Correct Links,00:40,Fully Correct; Correct Nodes; Correct Links,04:30,Fully Correct; Correct Nodes; Correct Links,01:50,Fully Correct; Correct Nodes; Correct Links
2,,Anonymous user,2025-06-03T14:57:47+02:00,SI01,1 - Strongly Disagree,1 - Strongly Disagree,2,1 - Strongly Disagree,1 - Strongly Disagree,1 - Strongly Disagree,...,1 - Strongly Disagree,CS Background (e.g. studying informatics),03:50,Fully Correct; Correct Nodes; Correct Links,01:00,Fully Correct; Correct Nodes; Correct Links,02:30,Fully Correct; Correct Nodes; Correct Links,01:30,Fully Correct; Correct Nodes; Correct Links
3,,Anonymous user,2025-06-04T13:52:37+02:00,RR15,4,1 - Strongly Disagree,4,1 - Strongly Disagree,5 - Strongly agree,2,...,2,CS Background (e.g. studying informatics),04:00,Fully Correct; Correct Nodes; Correct Links,00:30,Fully Correct; Correct Nodes; Correct Links,09:00,Correct Nodes,01:00,Fully Correct; Correct Nodes; Correct Links
4,,Anonymous user,2025-06-07T16:31:02+02:00,MH02,3,2,4,3,3,2,...,2,Non-technical background,03:00,Fully Correct; Correct Nodes; Correct Links,00:40,Fully Correct; Correct Nodes; Correct Links,05:00,Fully Correct; Correct Nodes; Correct Links,08:00,Fully Correct; Correct Nodes; Correct Links


In [3]:
list(enumerate(df.columns))

[(0, 'User ID'),
 (1, 'User display name'),
 (2, 'Timestamp'),
 (3, 'Participant ID'),
 (4, ' I think that I would like to use OnSET frequently.'),
 (5, ' I found OnSET unnecessarily complex.'),
 (6, 'I thought OnSET was easy to use.'),
 (7,
  'I think that I would need the support of a technical person to be able to use OnSET.'),
 (8, 'I found the various functions in OnSET were well integrated.'),
 (9, 'I thought there was too much inconsistency in OnSET.'),
 (10,
  'I would imagine that most people would learn to use OnSET very quickly.'),
 (11, 'I found OnSET very cumbersome to use.'),
 (12, 'I felt very confident using OnSET.'),
 (13,
  'I needed to learn a lot of things before I could get going with OnSET.'),
 (14, 'Background'),
 (15, 'Task 0 Time'),
 (16, 'Task 0 Success'),
 (17, 'Task 1 Time'),
 (18, 'Task 1 Success'),
 (19, 'Task 2 Time'),
 (20, 'Task 2 Success'),
 (21, 'Task 3 Time'),
 (22, 'Task 3 Success')]

In [4]:
likert_cols = {
    "Recommendable": 4,
    "Satisfaction": 10,
    "Confidence": 12,
}
positive_col_ids = [i * 2 + 4 for i in range(5)]
negative_col_ids = [col + 1 for col in positive_col_ids]

sus_cols = positive_col_ids + negative_col_ids

task_times_cols = [i * 2 + 15 for i in range(4)]
task_successes_cols = [col + 1 for col in task_times_cols]
df.iloc[:, sus_cols] = (
    df.iloc[:, sus_cols]
    .map(lambda x: pd.to_numeric(x[0]) if isinstance(x, str) else pd.to_numeric(x))
    .astype(int)
)
df = df.infer_objects()
df["sus"] = (
    df.iloc[:, positive_col_ids].to_numpy()
    - 1
    + 5
    - df.iloc[:, negative_col_ids].to_numpy()
).sum(axis=1) * 2.5
df

Unnamed: 0,User ID,User display name,Timestamp,Participant ID,I think that I would like to use OnSET frequently.,I found OnSET unnecessarily complex.,I thought OnSET was easy to use.,I think that I would need the support of a technical person to be able to use OnSET.,I found the various functions in OnSET were well integrated.,I thought there was too much inconsistency in OnSET.,...,Background,Task 0 Time,Task 0 Success,Task 1 Time,Task 1 Success,Task 2 Time,Task 2 Success,Task 3 Time,Task 3 Success,sus
0,,Anonymous user,2025-06-03T10:42:18+02:00,IT28,3,1,4,4,3,1,...,CS Background (e.g. studying informatics),08:30,Correct Links; Correct Nodes,03:40,Correct Links; Correct Nodes,02:40,Fully Correct; Correct Nodes; Correct Links,00:40,Correct Nodes; Correct Links; Fully Correct,67.5
1,,Anonymous user,2025-06-03T12:00:40+02:00,SR01,4,2,4,1,5,2,...,CS Background (e.g. studying informatics),01:40,Fully Correct; Correct Nodes; Correct Links,00:40,Fully Correct; Correct Nodes; Correct Links,04:30,Fully Correct; Correct Nodes; Correct Links,01:50,Fully Correct; Correct Nodes; Correct Links,82.5
2,,Anonymous user,2025-06-03T14:57:47+02:00,SI01,1,1,2,1,1,1,...,CS Background (e.g. studying informatics),03:50,Fully Correct; Correct Nodes; Correct Links,01:00,Fully Correct; Correct Nodes; Correct Links,02:30,Fully Correct; Correct Nodes; Correct Links,01:30,Fully Correct; Correct Nodes; Correct Links,55.0
3,,Anonymous user,2025-06-04T13:52:37+02:00,RR15,4,1,4,1,5,2,...,CS Background (e.g. studying informatics),04:00,Fully Correct; Correct Nodes; Correct Links,00:30,Fully Correct; Correct Nodes; Correct Links,09:00,Correct Nodes,01:00,Fully Correct; Correct Nodes; Correct Links,90.0
4,,Anonymous user,2025-06-07T16:31:02+02:00,MH02,3,2,4,3,3,2,...,Non-technical background,03:00,Fully Correct; Correct Nodes; Correct Links,00:40,Fully Correct; Correct Nodes; Correct Links,05:00,Fully Correct; Correct Nodes; Correct Links,08:00,Fully Correct; Correct Nodes; Correct Links,67.5


In [5]:
sus= df["sus"].mean()
sus

np.float64(72.5)

In [6]:
task_times_cols

[15, 17, 19, 21]

In [7]:
df.iloc[:, task_times_cols] = (
    df.iloc[:, task_times_cols]
    .map(lambda x: pd.to_timedelta(f"00:{x}"))
    .map(lambda x: x.total_seconds() / 60)
)
task_times_mean = df.iloc[:, task_times_cols].mean(axis=0)
task_times_totals = df.iloc[:, task_times_cols].sum(axis=1)

task_full_correct = (
    df.iloc[:, task_successes_cols]
    .map(lambda cor: 1 if "Fully Correct" in cor else 0)
    .mean(axis=0)
)
task_times_totals

0         15.5
1     8.666667
2     8.833333
3         14.5
4    16.666667
dtype: object

In [24]:
task_df = pd.DataFrame(
    {
        "Task": [f"Task {i}" for i in range(task_times_mean.shape[0])],
        "Time": task_times_mean.to_list(),
        "Success Rate": task_full_correct.to_list(),
    }
)
task_df["Time (delta)"] = task_df["Time"].apply(lambda x: pd.to_timedelta(x, unit="m"))
task_df["Time (min)"] = task_df["Time (delta)"].apply(lambda x: f"{x.components.minutes}:{x.components.seconds}")


with open("../tables/task_results.tex", "w") as f:
    f.write(
        task_df[["Task","Success Rate", "Time (min)"]].to_latex(
            float_format="%.2f",
            caption=f"Task results for the OnSET user study on DBpedia with $n={df.shape[0]}$.",
            label="tab:task_results",
            index=False,
            formatters={},
        )
    )
task_df

Unnamed: 0,Task,Time,Success Rate,Time (delta),Time (min)
0,Task 0,4.2,0.8,0 days 00:04:12,4:12
1,Task 1,1.3,0.8,0 days 00:01:18,1:18
2,Task 2,4.733333,0.8,0 days 00:04:43.999999998,4:43
3,Task 3,2.6,1.0,0 days 00:02:36,2:36
