# TASD Overleaf Generator

In [2]:
import json

name_list = ["../../11_annotations/crowd/tasd/trainset_crowd_tasd_noconflict.jsonl",
             "../../11_annotations/students/tasd/trainset_students_tasd_noconflict.jsonl",
               "../../11_annotations/llm/tasd/tasd_llm_trainset_low.jsonl",
                "../../11_annotations/experts/tasd_experts_trainset_low.jsonl",
                "../../11_annotations/text_only/updatet_labels_original.jsonl"]

# ===== CONFIG =====
include_conflict = False  # <-- SWITCH: set False to remove Conflict column
categories = ['ambiente', 'essen', 'gesamteindruck', 'preis', 'service']
# ==================

# Define polarity order and labels
if include_conflict:
    polarities = ['positive', 'negative', 'neutral', 'conflict']
else:
    polarities = ['positive', 'negative', 'neutral']

for name in name_list:
    print(f"% ===== FILE: {name} ===== \n\n")
    file_name = name
    # Initialize stats
    stats = {cat: {pol: {'explicit': 0, 'implicit': 0}
                for pol in polarities} for cat in categories}

    # Read JSONL file
    with open(file_name, "r", encoding="utf-8") as f:
        for line in f:
            entry = json.loads(line)
            for cat, pol, phrase in entry["labels"]:
                # Map German to English polarities
                pol_map = {
                    'Positiv': 'Positive',
                    'Negativ': 'Negative',
                    'Neutral': 'Neutral',
                    'Konflikt': 'Conflict'
                }
                pol_eng = pol_map.get(pol, pol)
                if cat in stats and pol_eng in stats[cat]:
                    if phrase == "NULL":
                        stats[cat][pol_eng]['implicit'] += 1
                    else:
                        stats[cat][pol_eng]['explicit'] += 1

    # Prepare totals for each column
    col_totals = {pol: {'explicit': 0, 'implicit': 0} for pol in polarities}
    total_totals = {'explicit': 0, 'implicit': 0}

    # ===== LaTeX OUTPUT =====
    print(r"\begin{table}[h!]")
    print(r"\centering")
    print(
        r"\caption{Counts of polarity triplets by category, with explicit/implicit split}")
    print(r"\label{tab:triplet_counts}")

    # Define column alignment
    col_format = "l" + "cc" * len(polarities) + "cc"
    print(r"\begin{tabular}{" + col_format + "}")

    # First header row
    first_header = [" "]
    for pol in polarities:
        first_header.append(r"\multicolumn{2}{c}{\textbf{" + pol + "}}")
    first_header.append(r"\multicolumn{2}{c}{\textbf{Total}}")
    print(" & ".join(first_header) + r" \\ \hline")

    # Second header row
    second_header = [r"\textbf{Aspect Category}"]
    for _ in polarities + ["Total"]:
        second_header += [r"\textbf{Expl.}", r"\textbf{Impl.}"]
    print(" & ".join(second_header) + r"\\")
    print(r"\midrule")

    # Data rows
    for cat in categories:
        row = [cat]
        total_explicit = 0
        total_implicit = 0
        for pol in polarities:
            e = stats[cat][pol]['explicit']
            i = stats[cat][pol]['implicit']
            row += [str(e), str(i)]
            total_explicit += e
            total_implicit += i
            col_totals[pol]['explicit'] += e
            col_totals[pol]['implicit'] += i
        total_totals['explicit'] += total_explicit
        total_totals['implicit'] += total_implicit
        row += [str(total_explicit), str(total_implicit)]
        # print(" & ".join(row) + r" \\")
        print(",".join(row))

    # Totals row
    print(r"\hline")
    totals_row = [r"Total"]
    for pol in polarities:
        totals_row.append(str(col_totals[pol]['explicit']))
        totals_row.append(str(col_totals[pol]['implicit']))
    totals_row.append(str(total_totals['explicit']))
    totals_row.append(str(total_totals['implicit']))
    # print(" & ".join(totals_row) + r" \\")
    print(",".join(totals_row))
    print(r"\bottomrule")
    print(r"\end{tabular}")
    print(r"\end{table}")
    print("\n\n")


% ===== FILE: ../../11_annotations/crowd/tasd/trainset_crowd_tasd_noconflict.jsonl ===== 


\begin{table}[h!]
\centering
\caption{Counts of polarity triplets by category, with explicit/implicit split}
\label{tab:triplet_counts}
\begin{tabular}{lcccccccc}
  & \multicolumn{2}{c}{\textbf{positive}} & \multicolumn{2}{c}{\textbf{negative}} & \multicolumn{2}{c}{\textbf{neutral}} & \multicolumn{2}{c}{\textbf{Total}} \\ \hline
\textbf{Aspect Category} & \textbf{Expl.} & \textbf{Impl.} & \textbf{Expl.} & \textbf{Impl.} & \textbf{Expl.} & \textbf{Impl.} & \textbf{Expl.} & \textbf{Impl.}\\
\midrule
ambiente,81,3,26,8,0,0,107,11
essen,232,19,134,26,21,3,387,48
gesamteindruck,26,77,16,62,1,1,43,140
preis,8,2,20,16,2,1,30,19
service,128,10,79,34,3,1,210,45
\hline
Total,475,111,275,146,27,6,777,263
\bottomrule
\end{tabular}
\end{table}



% ===== FILE: ../../11_annotations/students/tasd/trainset_students_tasd_noconflict.jsonl ===== 


\begin{table}[h!]
\centering
\caption{Counts of polarity triplets 

# ACSA Overleaf Generator

In [3]:
import json

name_list = ["../../11_annotations/crowd/acsa/trainset_crowd_acsa_noconflict.jsonl",
             "../../11_annotations/students/acsa/trainset_students_acsa_noconflict.jsonl",
              "../../11_annotations/students/acsa/trainset_students_acsa_redo_noconflict.jsonl",
               "../../11_annotations/llm/acsa/acsa_llm_trainset_low.jsonl",
                "../../11_annotations/experts/acsa_experts_trainset_low.jsonl",
                "../../11_annotations/text_only/updatet_labels_original_acsa.jsonl"]


# ===== CONFIG =====
include_conflict = False  # <-- SWITCH: set True to keep "Conflict" column
categories = ['ambiente', 'essen', 'gesamteindruck', 'preis', 'service']
# ==================

# Define polarity order and labels
if include_conflict:
    polarities = ['positive', 'negative', 'neutral', 'conflict']
else:
    polarities = ['positive', 'negative', 'neutral']


for name in name_list:

    print(f"% ===== FILE: {name} =====\n\n")
    # Initialize stats
    stats = {cat: {pol: 0 for pol in polarities} for cat in categories}

    # Read JSONL file
    with open(name, "r", encoding="utf-8") as f:
        for line in f:
            entry = json.loads(line)
            for cat, pol in entry["labels"]:  # now only (category, polarity)
                pol_map = {
                    'Positiv': 'positive',
                    'Negativ': 'negative',
                    'Neutral': 'neutral',
                    'Konflikt': 'conflict'
                }
                pol_eng = pol_map.get(pol, pol.lower())
                if cat in stats and pol_eng in stats[cat]:
                    stats[cat][pol_eng] += 1

    # Prepare totals for each column
    col_totals = {pol: 0 for pol in polarities}
    total_total = 0

    # ===== LaTeX OUTPUT =====
    print(r"\begin{table}[h!]")
    print(r"\centering")
    print(r"\caption{Counts of polarity tuples by category}")
    print(r"\label{tab:tuple_counts}")

    # Define column alignment
    col_format = "l" + "c" * len(polarities) + "c"
    print(r"\begin{tabular}{" + col_format + "}")

    # Header row
    header = [r"\textbf{Aspect Category}"] + [r"\textbf{" + pol.capitalize() + "}" for pol in polarities] + [r"\textbf{Total}"]
    print(" & ".join(header) + r" \\ \hline")

    # Data rows
    for cat in categories:
        row = [cat]
        total = 0
        for pol in polarities:
            val = stats[cat][pol]
            row.append(str(val))
            total += val
            col_totals[pol] += val
        total_total += total
        row.append(str(total))
        print(" & ".join(row) + r" \\")
        print(",".join(row))

    # Totals row
    print(r"\hline")
    totals_row = [r"Total"] + [str(col_totals[pol]) for pol in polarities] + [str(total_total)]
    print(" & ".join(totals_row) + r" \\")
    print(",".join(totals_row))
    print(r"\bottomrule")
    print(r"\end{tabular}")
    print(r"\end{table}")
    print("\n\n")
    


% ===== FILE: ../../11_annotations/crowd/acsa/trainset_crowd_acsa_noconflict.jsonl =====


\begin{table}[h!]
\centering
\caption{Counts of polarity tuples by category}
\label{tab:tuple_counts}
\begin{tabular}{lcccc}
\textbf{Aspect Category} & \textbf{Positive} & \textbf{Negative} & \textbf{Neutral} & \textbf{Total} \\ \hline
ambiente & 104 & 34 & 4 & 142 \\
ambiente,104,34,4,142
essen & 240 & 158 & 39 & 437 \\
essen,240,158,39,437
gesamteindruck & 157 & 120 & 16 & 293 \\
gesamteindruck,157,120,16,293
preis & 15 & 51 & 6 & 72 \\
preis,15,51,6,72
service & 174 & 161 & 3 & 338 \\
service,174,161,3,338
\hline
Total & 690 & 524 & 68 & 1282 \\
Total,690,524,68,1282
\bottomrule
\end{tabular}
\end{table}



% ===== FILE: ../../11_annotations/students/acsa/trainset_students_acsa_noconflict.jsonl =====


\begin{table}[h!]
\centering
\caption{Counts of polarity tuples by category}
\label{tab:tuple_counts}
\begin{tabular}{lcccc}
\textbf{Aspect Category} & \textbf{Positive} & \textbf{Negative} & \t

# ACSA Checker 
- for Category or Polarity is different than predefined
- Opposite Polarities

In [16]:
import json

# ===== CONFIG =====
name_list = [
    "../../11_annotations/crowd/acsa/trainset_crowd_acsa_noconflict.jsonl",
    "../../11_annotations/students/acsa/trainset_students_acsa_noconflict.jsonl",
    "../../11_annotations/students/acsa/trainset_students_acsa_redo_noconflict.jsonl",
    "../../11_annotations/llm/acsa/acsa_llm_trainset_low.jsonl",
    "../../11_annotations/experts/acsa_experts_trainset_low.jsonl",
    "../../11_annotations/ground_truth/acsa_testset_low.jsonl"
]

categories = ['ambiente', 'essen', 'gesamteindruck', 'preis', 'service']
polarities = ['positive', 'negative', 'neutral', 'conflict']
# ==================


def check_files(files):
    for file in files:
        print(f"\n=== Checking {file} ===")
        with open(file, "r", encoding="utf-8") as f:
            for line_no, line in enumerate(f, start=1):
                entry = json.loads(line)

                # skip entries with no labels
                if "labels" not in entry:
                    continue

                # collect all categories and polarities in this entry
                entry_cats = []
                entry_pols = []

                for label in entry["labels"]:
                    if len(label) < 2:
                        continue  # malformed
                    cat, pol = label[0], label[1]

                    # Check for invalid categories
                    if cat not in categories:
                        print(f"[Invalid Category] line {line_no}: {cat} in {file}")
                        print(line.strip())

                    # Normalize polarity for checking
                    pol_norm = pol.lower()

                    # Check for invalid polarities
                    if pol_norm not in polarities:
                        print(f"[Invalid Polarity] line {line_no}: {pol} in {file}")
                        print(line.strip())

                    # Check for any 'conflict' variants
                    if pol_norm == "conflict" or pol.lower() in ["konflikt", "Konflikt"]:
                        print(f"[Conflict Found] line {line_no}: {pol} in {file}")
                        print(line.strip())

                    entry_cats.append(cat)
                    entry_pols.append(pol_norm)

                # Check for opposite polarities within same text
                if "positive" in entry_pols and "negative" in entry_pols:
                    print(f"[Opposite Polarities] line {line_no} in {file}")
                    print(line.strip())

# Run checks
check_files(name_list)



=== Checking ../../11_annotations/crowd/acsa/trainset_crowd_acsa_noconflict.jsonl ===
[Opposite Polarities] line 47 in ../../11_annotations/crowd/acsa/trainset_crowd_acsa_noconflict.jsonl
{"id": "46", "original_id": "109", "text": "Der georderte vegane Bagel war köstlich was wir von den lieblos angerichteten Käsespätzle leider nicht behaupten konnten.", "labels": [["essen", "negative"], ["essen", "positive"]]}
[Opposite Polarities] line 69 in ../../11_annotations/crowd/acsa/trainset_crowd_acsa_noconflict.jsonl
{"id": "68", "original_id": "170", "text": "Vom Ambiente her natürlich toll, RESTAURANT_NAME eben, alles sehr sauber...aber der Empfang war mehr als unfreundlich, der Kellner am Tisch dafür wieder sehr aber nachdem wir von zwei servicekräften sehr unfreundlich begrüßt wurden wären wir fast wieder gegangen, essen war dann leider auch nicht toll, die Burger,waren sehr trocken, würden nicht nochmal wieder kommen...leider", "labels": [["ambiente", "positive"], ["service", "positive"