In [None]:
import json
from collections import defaultdict
from os import listdir
from os.path import isfile, join
from pprint import pprint
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns

right_left_mapping = {
    "schnell": "fast",
    "langsam": "slow",
    "effizient": "efficient",
    "ineffizient": "inefficient",
    "vorhersagbar": "predictable",
    "unberechenbar": "unpredictable",
    "unterstützend": "supportive",
    "behindernd":"obstructive",
    "hilfreich": "helpful",
    "nicht hilfreich": "not helpful",
    "lohnend": "rewarding",
    "nicht lohnend":"not rewarding",
    "mühelos": "easy",
    "mühevoll": "difficult",
    "logisch": "logical",
    "unlogisch": "illogical",
    "einleuchtend": "plausible",
    "nicht einleuchtend": "not plausible",
    "passend": "suitable",
    "unpassend": "inappropriate",
    "intelligent": "intelligent",
    "unintelligent": "unintelligent",
    "gut aufbereitet": "well prepared",
    "schlecht aufbereitet": "poorly prepared",
    "genau": "ambiguous",
    "ungenau": "unambiguous",

}

In [None]:
def parse_json_file(path: str) -> dict:
    with open(path, "r") as f:
        return json.load(f)


def results_per_user() -> dict[str, list[dict]]:
    # Search all files in the evaluation directory
    results_path = "dev/evaluation/results/"
    results_files = [join(results_path, f) for f in listdir(results_path)]
    results_files = [f for f in results_files if isfile(f)]

    # Map each participant to their AI and traditional questionnaire
    results_per_user: dict[str, list[dict]] = defaultdict(list)
    for path in results_files:
        user = path.split("_")[0]
        user = user.removeprefix(results_path)

        results_per_user[user].append(parse_json_file(path))

    return results_per_user

results_per_user = results_per_user()
results_per_user

In [None]:
def category_results(
    results_per_user: dict[str, list[dict]],
) -> dict[str, list[(str, int)]]:
    # Map a category to the positive (right) value and its difference
    category_results: dict[str, list[(str, int)]] = defaultdict(list)

    for user, results in results_per_user.items():
        ai = [r for r in results if r["mode"] == "AI"][0]
        traditional = [r for r in results if r["mode"] == "traditional"][0]

        for ai_section, trad_section in zip(
            ai["questionnaire"], traditional["questionnaire"]
        ):
            for ai_c, trad_c in zip(
                ai_section["categories"], trad_section["categories"]
            ):
                # small number = bad for AI
                # large number = good for AI
                category_results[ai_section["name"]].append(
                    (ai_c["right"], ai_c["reply"] - trad_c["reply"])
                )

    return category_results

category_results = category_results(results_per_user)
category_results

In [None]:
import pandas as pd


def dataframes(
    results_per_user: dict[str, list[dict]],
) -> dict[str, list[(str, int)]]:
    ai_rows: list[dict[str, any]] = []
    traditional_rows: list[dict[str, any]] = []
    improvement_rows: list[dict[str, any]] = []
    # Map a category to the positive (right) value and its difference
    category_results: dict[str, list[(str, int)]] = defaultdict(list)

    for user, results in results_per_user.items():
        ai = [r for r in results if r["mode"] == "AI"][0]
        traditional = [r for r in results if r["mode"] == "traditional"][0]

        ai_replies = {"user": user}
        traditional_replies = {"user": user}
        improvement_replies = {"user": user}

        for ai_section, trad_section in zip(
            ai["questionnaire"], traditional["questionnaire"]
        ):
            for ai_c, trad_c in zip(
                ai_section["categories"], trad_section["categories"]
            ):
                right = right_left_mapping[ai_c["right"]]
                left = right_left_mapping[ai_c["left"]]
                ai_replies[f"{right}/\n{left}"] = ai_c["reply"]
                traditional_replies[f"{right}/\n{left}"] = trad_c["reply"]
                improvement_replies[f"{right}/\n{left}"] = ai_c["reply"] - trad_c["reply"]

        ai_rows.append(ai_replies)
        traditional_rows.append(traditional_replies)
        improvement_rows.append(improvement_replies)

    pprint([len(r.values()) for r in ai_rows])
    pprint([len(r.values()) for r in traditional_rows])
    pprint([len(r.values()) for r in improvement_rows])

    traditional_rows = pd.DataFrame.from_records(traditional_rows)
    ai_rows = pd.DataFrame.from_records(ai_rows)
    improvement_rows = pd.DataFrame.from_records(improvement_rows)

    return traditional_rows, ai_rows, improvement_rows


trad, ai, improvement = dataframes(results_per_user)
trad

In [None]:
ai

In [None]:
improvement

In [None]:
def plot_table(df, path:str, title:str, x_label:str,y_label:str):
    melted_df = df.melt(
        var_name=x_label, value_name=y_label
    )

    # Set up the plot
    plt.figure(figsize=(len(df.columns)-1, 5))
    medianprops = dict(linestyle='-', linewidth=1.5, color='blue')
    sns.boxplot(
        data=melted_df,
        x=x_label,
        y=y_label,
        color="white",  # Color scheme
        medianprops=medianprops,
        fill=True,
        width=0.5,  # Adjust box width
        linewidth=1.5,  # Thicker box borders
        linecolor="black"
    )

    # Optional: Add stripplot for actual data points
    # sns.stripplot(
    #     data=melted_df,
    #     x='Category',
    #     y='Improvement',
    #     color='black',
    #     alpha=0.5,         # Transparency
    #     jitter=True        # Spread points to avoid overlap
    # )

    # Customize plot
    plt.title(title, fontsize=14)
    plt.xlabel(x_label, fontsize=12)
    plt.ylabel(y_label + " Score", fontsize=12)
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.xticks(rotation=45)  # Rotate x-labels if needed
    plt.yticks(range(-4,5))
    plt.tight_layout()
    plt.axhline(y=0, lw=1, color="k")

    plt.savefig(path)

In [None]:
plot_table(improvement.iloc[:, 1 : 8], "thesis/figures/first_trial_results.svg", "Improvement Scores by Category","Category", "Improvement")

In [None]:
plot_table(improvement.iloc[:, 8 : ], "thesis/figures/second_trial_results.svg", "Improvement Scores by Category","Category", "Improvement")

In [None]:
plot_table(ai.iloc[:, 1 : ], "thesis/figures/ai_results.svg", "Results for the AI approach","Category", "Score")

In [None]:
plot_table(trad.iloc[:, 1 : ], "thesis/figures/traditional_results.svg", "Results for the traditional approach","Category", "Score")

In [None]:
def plot_comparison(
    ai_df, traditional_df, path: str, title: str, x_label: str, y_label: str
):
    # Melt both dataframes to long format
    melted_ai_df = ai_df.melt(
        var_name=x_label, value_name=y_label
    )
    melted_ai_df["Table"] = "AI"

    melted_traditional_df = traditional_df.melt(
        var_name=x_label, value_name=y_label
    )
    melted_traditional_df["Table"] = "Traditional"

    # Combine the melted dataframes
    combined_df = pd.concat([melted_ai_df, melted_traditional_df])
    medianprops = dict(linestyle="-", linewidth=1.5, color="black")
    # Set up the plot
    plt.figure(figsize=(len(ai_df.columns)+2, 5))
    sns.boxplot(
        data=combined_df,
        x=x_label,
        medianprops=medianprops,
        y=y_label,
        width=.6,
        hue="Table",  # Differentiate by table
        palette="Set1",  # Use a distinct color palette
        linewidth=1.5,  # Thicker box borders
    )

    # Customize plot appearance
    plt.title(title, fontsize=14)
    plt.xlabel(x_label, fontsize=12)
    plt.ylabel(y_label, fontsize=12)
    plt.grid(axis="y", linestyle="--", alpha=0.7)
    plt.legend(bbox_to_anchor=(.4, 0.2))
    plt.xticks(rotation=45)  # Rotate x-labels for better readability
    plt.tight_layout()

    # Save and display the plot
    plt.savefig(path)


In [None]:
plot_comparison(
    ai_df=ai.iloc[:, 1 : 8],
    traditional_df=trad.iloc[:, 1 : 8],
    path="thesis/figures/first_comparison.svg",
    title="Comparison of AI and Traditional Approach",
    x_label="Category",
    y_label="Score",
)

In [None]:
plot_comparison(
    ai_df=ai.iloc[:, 8 : ],
    traditional_df=trad.iloc[:, 8 : ],
    path="thesis/figures/second_comparison.svg",
    title="Comparison of AI and Traditional Approach",
    x_label="Category",
    y_label="Score",
)