In [None]:
!pip install -U kaleido==0.2.1
!pip install -U "plotly>=6.1.1"




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import os
import json
import itertools
import random
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import regex as re
import numpy as np
import math



# Fehleranalyse

In [None]:

import os
import pandas as pd
from glob import glob
import plotly.graph_objects as go


def erroranalyse_relation_new(folder_path, relation_title=None):

    csv_files = sorted(glob(os.path.join(folder_path, "*.csv")))
    if not csv_files:
        raise FileNotFoundError(f"Keine CSV-Dateien in {folder_path} gefunden!")

    merged_data = {}
    for file in csv_files:
        shot_label = os.path.basename(file).split(".")[0]
        df = pd.read_csv(file)

        if "answer_token" not in merged_data:
            merged_data["answer_token"] = df["answer_token"]

        if "last_template" not in merged_data and "1shot" in shot_label:
            merged_data["last_template"] = (
                df["prompt"].apply(lambda x: x.strip().split("\n")[-1])
            )

        merged_data[shot_label.split("_")[-1]] = df["predicted_top1_token"]

    merged_df = pd.DataFrame(merged_data)


    ordered_cols = ["answer_token"]
    if "last_template" in merged_df.columns:
        ordered_cols.append("last_template")

    shot_cols = sorted(
        [c for c in merged_df.columns if c.endswith("shot")],
        key=lambda x: int(x.replace("shot", ""))
    )
    ordered_cols.extend(shot_cols)
    merged_df = merged_df[ordered_cols]


    def tokenize(s):
        return str(s).split()

    gold_col  = "answer_token"
    pred_cols = shot_cols

    row_wrong = merged_df.apply(
        lambda r: any(tokenize(r[c]) != tokenize(r[gold_col]) for c in pred_cols),
        axis=1
    )

    total_samples = len(merged_df)

    group_col_candidates = ["prompt", "last_template", "template"]
    group_col = next((c for c in group_col_candidates if c in merged_df.columns), None)
    if group_col is None:
        raise KeyError("Keine geeignete Template-/Prompt-Spalte gefunden!")

    prompts_with_error = merged_df.loc[row_wrong, group_col].unique()
    wrong_df = merged_df[merged_df[group_col].isin(prompts_with_error)]

    error_counts = {
        col: sum(tokenize(r[col]) != tokenize(r[gold_col]) for _, r in wrong_df.iterrows())
        for col in pred_cols
    }


    values, colors = [], []

    values.append([""] + list(wrong_df.index))
    colors.append(["white"] * (len(wrong_df) + 1))

    def col_vals(col):
        body = [
            " ".join(tokenize(v)) if col in pred_cols else v
            for v in wrong_df[col]
        ]
        if col == gold_col:
            header = f"Token/Subtoken: {total_samples}"
            return [header] + body
        if col in pred_cols:
            return [f"Error: {error_counts[col]}"] + body
        return [""] + body

    def col_cols(col):
        if col in pred_cols:
            body = [
                "#d4edda" if tokenize(v) == tokenize(g) else "#f8d7da"
                for v, g in zip(wrong_df[col], wrong_df[gold_col])
            ]
        else:
            body = ["white"] * len(wrong_df)
        return ["white"] + body

    for c in wrong_df.columns:
        values.append(col_vals(c))
        colors.append(col_cols(c))

    title = relation_title or os.path.basename(folder_path)
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=["Index"] + list(wrong_df.columns),
            fill_color="#333333",
            font=dict(color="white", size=12)
        ),
        cells=dict(
            values=values,
            fill_color=colors,
            font=dict(color="black", size=11),
            height=25
        )
    )])

    fig.update_layout(
        height=min(600, 40 + 25 * (len(wrong_df) + 1)),
        title=f"Erroranalyse: {title}",
        margin=dict(l=0, r=0, t=40, b=0)
    )
    fig.show()


In [None]:
!pip install -q -U plotly==6.1.1 kaleido==0.2.1 pandas

import os, re, pandas as pd, plotly.graph_objects as go
from glob import glob

def export_compact_table_with_errors_f(folder_path, rows_to_show, out_name,
                                     out_dir="/content/drive/MyDrive/master_thesis/ploted_data_en",
                                     only_errors=False,
                                     show_title=False):
    merged = {}
    for f in sorted(glob(os.path.join(folder_path, "*.csv"))):
        shot = os.path.basename(f).split(".")[0]
        df = pd.read_csv(f, encoding="utf-8-sig")
        merged.setdefault("answer_token", df["answer_token"])
        if "last_template" not in merged and "1shot" in shot:
            merged["last_template"] = df["prompt"].str.strip().str.split("\n").str[-1]
        merged[shot.split("_")[-1]] = df["predicted_top1_token"]

    df = pd.DataFrame(merged)

    tok = lambda s: str(s).split()

    import re as _re
    shot_cols = sorted(
        [c for c in df if c.endswith("shot")],
        key=lambda s: int(_re.search(r"\d+", s).group())
    )

    wrong = df.apply(lambda r: any(tok(r[c]) != tok(r["answer_token"]) for c in shot_cols), axis=1)
    base = df[wrong] if only_errors else df

    sub = base.loc[[i for i in rows_to_show if i in base.index]]
    sub = sub.rename(columns={"answer_token": "Gold Token", "last_template": "Template"})

    total_gold_tokens = len(df)

    # Fehler zählen
    error_counts = {c: sum(tok(v) != tok(g) for v, g in zip(base[c], base["answer_token"])) for c in shot_cols}

    rename_map = {c: f"{c.replace('shot', '-Shot')}<br>Error: {error_counts[c]}" for c in shot_cols}
    sub = sub.rename(columns=rename_map)

    ordered_cols = ["Gold Token", "Template"] + list(rename_map.values())
    sub = sub[ordered_cols]

    colors = []
    for c in sub.columns:
        if "-Shot" in c:
            orig_col = c.split("<br>")[0].replace("-Shot", "shot")
            colors.append(["#d4edda" if tok(v) == tok(g) else "#f8d7da"
                           for v, g in zip(sub[c], sub["Gold Token"])])
        else:
            colors.append(["white"] * len(sub))

    col_values = [sub[c].apply(lambda x: " ".join(tok(x)) if "-Shot" in c else x).tolist()
                  for c in sub.columns]

    fig = go.Figure(go.Table(
        header=dict(values=[f"Gold Token<br>Count: {total_gold_tokens}"] + list(sub.columns[1:]), fill_color="#333", font=dict(color="white", size=12)),
        cells=dict(values=col_values, fill_color=colors, font=dict(color="black", size=11), height=26)))

    w = 150 * 2 + 90 * len(rename_map)
    h = 50 * (len(sub) + 1) + 40

    if show_title:
        fig.update_layout(width=w, height=h, title=out_name, margin=dict(l=0, r=0, t=40, b=0))
    else:
        fig.update_layout(width=w, height=h, margin=dict(l=0, r=0, t=20, b=0))

    os.makedirs(out_dir, exist_ok=True)
    safe = re.sub(r"[^0-9A-Za-z_\-]+", "_", out_name).strip("_")
    pdf = os.path.join(out_dir, f"{safe}.pdf")
    png = os.path.join(out_dir, f"{safe}.png")
    fig.write_image(pdf, width=w, height=h, scale=1)
    fig.write_image(png, width=w, height=h, scale=1)
    print("✔︎ PDF :", pdf)
    print("✔︎ PNG :", png)
    fig.show()


#factual

###**EN**

###personen bezogene facten


#### person mother

In [None]:

rows = [236, 237, 238, 349, 350, 351, 352, 353, 354, 104, 105, 106, 107]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/factual_data/"
    "few_shots_final/result_new/logits/permutation_0/person_mother",
    rows_to_show = rows,
    out_name     = "person_mother_Englisch",
    only_errors  = False,
    show_title   = False
)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/person_mother_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/person_mother_Englisch.png


In [None]:

erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_mother")

####personfather

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_father")

#### person occupation

In [None]:
rows = [0, 3, 4, 6, 53]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/factual_data/"
    "few_shots_final/result_new/logits/permutation_0/person_occupation",
    rows_to_show = rows,
    out_name     = "person_occupation_Englisch",
    only_errors  = False,
    show_title   = False

)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/person_occupation_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/person_occupation_Englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_occupation")

#### person univerity

In [None]:
rows = [141, 142, 143, 120, 121, 122, 123, 124]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/factual_data/"
    "few_shots_final/result_new/logits/permutation_0/person_university",
    rows_to_show = rows,
    out_name     = "person_university_Englisch",
    only_errors  = False,
        show_title   = False

)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/person_university_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/person_university_Englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_university")

####person plays instrument

In [None]:


erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_plays_instrument")

#### person band lead singer

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_band_lead_singer")

####person_plays_position_in_sport

In [None]:

erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_plays_position_in_sport")

####person plays pro sport

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/person_plays_pro_sport")

#### superhero person

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/superhero_person")

### Orts-länderbezogene facten

####landmark on continent

In [None]:
rows = [0, 2,43, 403, 169 ]

export_compact_table(
    "/content/drive/MyDrive/master_thesis/data/factual_data/"
    "few_shots_final/result_new/logits/permutation_0/landmark_on_continent",
    rows_to_show = rows,
    out_name     = "landmark_on_continent_englisch",
    only_errors  = False,
        show_title   = False

)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/landmark_on_continent_englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/landmark_on_continent_englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/landmark_on_continent")

####landmark in country

In [None]:
("Eiffel Tower", "France"), ("Statue of Liberty", "United States"),
        ("Christ the Redeemer", "Brazil"), ("Big Ben", "United Kingdom"),
        ("Great Wall of China", "China"), ("Sydney Opera House", "Australia"),
        ("Taj Mahal", "India"), ("Mount Fuji", "Japan"),
        ("Burj Khalifa", "United Arab Emirates"), ("Colosseum", "Italy")

erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/landmark_in_country")

####food from country

In [None]:
"Souvlaki", "Greece"),
         ("Ratatouille", "France"),
          ("Kebab", "Turkey"),
          ("Khachapuri", "Georgia"),
          ("Brigadeiro", "Brazil"),
          ("Gazpacho", "Spain"),
         ("Moules-frites", "Belgium"),
        ("Poffertjes", "Netherlands"),
        ("Tajine", "Morocco"),
         ("Picarones", "Peru")

erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/food_from_country")

####city in country

In [None]:
  normalize("city_in_country"): [
        ("Paris", "France"),
        ("Tokyo", "Japan"),
        ("Berlin", "Germany"),
        ("Toronto", "Canada"),
        ("Madrid", "Spain"),
        ("Lisbon", "Portugal"),
        ("Cairo", "Egypt"),
        ("Rome", "Italy"),
        ("Seoul", "South Korea"),
        ("Athens", "Greece")
    ],

In [None]:
rows = [0,1,13, 14,15 ]

export_compact_table(
    "/content/drive/MyDrive/master_thesis/data/factual_data/"
    "few_shots_final/result_new/logits/permutation_0/city_in_country",
    rows_to_show = rows,
    out_name     = "city_in_country_englisch",
    only_errors  = False,
        show_title   = False

)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/city_in_country_englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/city_in_country_englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/city_in_country")

####country capital city

In [None]:
 normalize("country_capital_city"): [
    ("Norway", "Oslo"), ("Finland", "Helsinki"), ("Denmark", "Copenhagen"),
    ("Netherlands", "Amsterdam"), ("Switzerland", "Bern"),
    ("Austria", "Vienna"), ("Belgium", "Brussels"), ("Czech Republic", "Prague"),
    ("Ireland", "Dublin"), ("Portugal", "Lisbon")
    ],

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/country_capital_city")

####country language

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/country_language")

####country largest city

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/country_largest_city")

####country currency

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/country_currency")

###Unternehmens- & Produktbezogene Fakten

#### product by company

In [None]:
erroranalyse_relation_new ("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/product_by_company")

#### company ceo

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/company_ceo")

####company hq

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/company_hq")

###Historische/Zeit- & sonstige Fakten

####president birth year

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/presidents_birth_year")

#### president election year

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/presidents_election_year")

####star constellation

In [None]:
#majority labels bias
rows = [42,43,44, 45,46,47,48,49,50 ]

export_compact_table(
    "/content/drive/MyDrive/master_thesis/data/factual_data/"
    "few_shots_final/result_new/logits/permutation_0/star_constellation",
    rows_to_show = rows,
    out_name     = "star_constellation_englisch",
    only_errors  = False,
        show_title   = False

)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/star_constellation_englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/star_constellation_englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/star_constellation")

####pokeman evaluation

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/pokemon_evolutions")

####superhero archnemesis

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/factual_data/few_shots_final/result_new/logits/permutation_0/superhero_archnemesis")

#linguistic

###EN

####Adj Antonym

In [None]:
rows = [14, 13, 23, 7, 75, 19, 80, 81, 104, 19, 85, 86]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/adj_antonym",
    rows_to_show = rows,
    out_name     = "adj_antonym_Englisch",
    only_errors  = False,
    show_title   = False
)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/adj_antonym_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/adj_antonym_Englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/adj_antonym")

####Adj Comparative

In [None]:
rows = [0, 1, 14, 10, 42, 43, 76, 77, 67, 68, 33, 34]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/adj_comparative",
    rows_to_show = rows,
    out_name     = "adj_comparative_Englisch",
    only_errors  = False,
     show_title   = False
)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/adj_comparative_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/adj_comparative_Englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/adj_comparative")


####Adj Superlative

In [None]:
rows = [3, 5, 6, 27, 28, 29, 62, 63, 80, 81, 91, 92, 47, 48]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/adj_superlative",
    rows_to_show = rows,
    out_name     = "adj_superlative_Englisch",
    only_errors  = False,
     show_title   = False
)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/adj_superlative_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/adj_superlative_Englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/adj_superlative")



####verb_past_tense

In [None]:
rows = [2,18]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/verb_past_tense",
    rows_to_show = rows,
    out_name     = "verb_past_tense_Englisch",
    only_errors  = False,
     show_title   = False
)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/verb_past_tense_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/verb_past_tense_Englisch.png


####word first letter

In [None]:
rows = [2,9, 28, 99]

export_compact_table_with_errors_f(
    "/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/word_first_letter",
    rows_to_show = rows,
    out_name     = "word_first_letter_Englisch",
    only_errors  = False,
     show_title   = False
)

✔︎ PDF : /content/drive/MyDrive/master_thesis/ploted_data_en/word_first_letter_Englisch.pdf
✔︎ PNG : /content/drive/MyDrive/master_thesis/ploted_data_en/word_first_letter_Englisch.png


In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_10_accuracy/logits/permutation_0/word_first_letter")


####word_last_letter

In [None]:
erroranalyse_relation_new("/content/drive/MyDrive/master_thesis/data/linguistic_data/few_shots_final/result_new/logits/permutation_0/word_last_letter")

