In [16]:
import sys
import os

sys.path.append("..")

DATABASE_DIR_MANIFESTOS = "../../data/manifestos/chroma/openai"
DATABASE_DIR_DEBATES = "../../data/debates/chroma/openai"
TEST_DATA_PATH = "../../streamlit_app/example_prompts.csv"

# Here we define the experimental run! This is important for the filenames, etc!

In [17]:
experiment_name = "2_llm"
experiment_run_name = "2b_mixtral8x7b"

In [18]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm


from database.vector_database import VectorDatabase
from models.embedding import ManifestoBertaEmbeddings
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_mistralai.chat_models import ChatMistralAI
from langchain_anthropic import ChatAnthropic
from langchain_google_genai import ChatGoogleGenerativeAI


from datasets import Dataset, DatasetDict, load_from_disk

In [19]:
def format_response(response):
    q = response[next(iter(response))]["question"]
    c = {party: response[party]["context"] for party in response.keys()}
    d = {
        source: {party: response[party]["docs"][source]
                 for party in response.keys()}
        for source in response[list(response.keys())[0]]["docs"].keys()
    }
    a = {party: response[party]["answer"] for party in response.keys()}
    response = {"question": q, "context": c, "docs": d, "answer": a}
    return response

# Create chain

In [20]:
from models.generation import generate_chain

# Select an embedding_model
########################################################################
# embedding_model = ManifestoBertaEmbeddings()
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")


# Select an LLM
#########################################################################
# LARGE_LANGUAGE_MODEL = ChatOpenAI(
#     model_name="gpt-3.5-turbo", max_tokens=300, temperature=0
# )

LARGE_LANGUAGE_MODEL = ChatMistralAI(
    name="open-mixtral-8x7b", max_tokens=300, temperature=0
)

# LARGE_LANGUAGE_MODEL = ChatAnthropic(
#     model_name="claude-3-haiku-20240307", max_tokens=300, temperature=0
# )


db_manifestos = VectorDatabase(
    data_path="../data/manifestos",
    embedding_model=embedding_model,
    database_directory=DATABASE_DIR_MANIFESTOS,
    source_type="manifestos",
)

db_debates = VectorDatabase(
    data_path="../data/debates",
    embedding_model=embedding_model,
    database_directory=DATABASE_DIR_DEBATES,
    source_type="debates",
)

chain = generate_chain(
    [db_manifestos, db_debates],
    llm=LARGE_LANGUAGE_MODEL,
    language="Deutsch",
    k=3,
)

reloaded database
reloaded database


# Create dataset with question, context, and answer for chain

In [22]:
recreate_dataset = True
if recreate_dataset:
    # Create dataset with question, context and answer

    # Load all test questions
    # df_test_simple = pd.read_csv(os.path.join(TEST_DATA_DIR, "simple_questions.csv"))
    # df_test_complex = pd.read_csv(os.path.join(TEST_DATA_DIR, "complex_questions.csv"))
    df_test = pd.read_csv(TEST_DATA_PATH, delimiter=";")

    # Here we select, which parties we want to evaluate
    parties = ["afd", "spd", "linke", "gruene", "fdp", "cdu"]

    # Here we select the questions that should be evaluated
    questions = df_test["Deutsch"]

    # Dictionary to save results
    dataset_dict = {
        party: {"question": [], "contexts": [], "answer": []} for party in parties
    }

    # Loop through all questions and get chain answer
    for question in tqdm(questions):
        print(question)
        response = chain.invoke(question)
        response = format_response(response)

        # for pary in parties:
        for party in parties:
            contexts = [
                doc.page_content for doc in response["docs"]["manifestos"][party]
            ]
            answer = response["answer"][party]

            dataset_dict[party]["question"].append(question)
            dataset_dict[party]["contexts"].append(contexts)
            dataset_dict[party]["answer"].append(answer)

    dataset = DatasetDict(
        {party: Dataset.from_dict(dataset_dict[party]) for party in parties}
    )

    # Save
    dataset.save_to_disk(
        f"Experiments/{experiment_name}/dataset_{experiment_run_name}")

  0%|          | 0/24 [00:00<?, ?it/s]

Sollen die Militärausgaben erhöht werden?
Sollte es ein Selbstbestimmungsrecht für Trans-Personen geben?
Sollen wir in Europa mehr für den Natur- und Artenschutz tun?
Wie soll die Landwirtschaft unterstützt werden? 
Wie soll Europa Handel mit anderen Ländern treiben? (z.B. Handelsabkommen)
Wie sollen Flüchtlinge in Europa verteilt werden? 
Wie stehen die Parteien zur gleichgeschlechtlichen Ehe?
Wie sollten wir Fischfang betreiben damit das Meer und die Tiere geschützt werden?
Was wollen die Parteien tun um das Tierwohl in der Viehhaltung zu verbessern? 
Wie sollte die Zusammenarbeit mit den USA aussehen?
Wie können wir Jungendarbeitslosigkeit reduzieren? 
Wie sehen die Pläne zum Erreichen der Klimaneutralität aus?
Wie soll das Verhältnis zu China aussehen im Bezug auf Protektionismus?
Wie sollen Kameras und Technik dabei helfen Verbrechen zu stoppen ohne dass unsere Privatsphäre gestört wird?
Sollten mehr Länder der EU beitreten?
Wie sollen europäische high-tech Innovationen gefördert 

Saving the dataset (0/1 shards):   0%|          | 0/24 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/24 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/24 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/24 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/24 [00:00<?, ? examples/s]

Saving the dataset (0/1 shards):   0%|          | 0/24 [00:00<?, ? examples/s]

In [23]:
dataset = load_from_disk(f"Experiments/{experiment_name}/dataset_{experiment_run_name}")

# Import Ragas and translate prompts to German

In [24]:
import ragas
from ragas.metrics import faithfulness, answer_relevancy, context_relevancy


# llm used for adaptation
openai_model_adaption = ChatOpenAI(model_name="gpt-4")
ragas.adapt(
    metrics=[faithfulness, answer_relevancy, context_relevancy],
    language="german",
    llm=openai_model_adaption,
)

# Compute metrics with ragas

In [25]:
from ragas.run_config import RunConfig

# Configuration for ragas.evaluate
run_config = RunConfig()
run_config.max_retries = 3
run_config.max_timeout = 60  # in s
run_config.max_wait = 20  # in s

num_questions = len(dataset["afd"])
questions = [dataset["fdp"][i]["question"] for i in range(num_questions)]

results_dict = {}
# Define which parties should be evaluated
parties = ["afd", "spd", "linke", "gruene", "fdp", "cdu"]

# Define which metrics should be evaluated
list_of_metrics = [faithfulness, answer_relevancy]

for i in tqdm(range(num_questions)):
    results_dict.update({questions[i]: {}})
    print(f"Question: {questions[i]}")
    for party in parties:
        print(f"Evaluating party: {party}")
        ragas_score = ragas.evaluate(
            dataset[party].select([i]), metrics=list_of_metrics, run_config=run_config
        )
        results_dict[questions[i]].update({party: ragas_score})
    print(results_dict[questions[i]])

  0%|          | 0/24 [00:00<?, ?it/s]

Question: Sollen die Militärausgaben erhöht werden?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.7500, 'answer_relevancy': 0.8317}, 'spd': {'faithfulness': 0.7500, 'answer_relevancy': 0.8528}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.8607}, 'gruene': {'faithfulness': 0.8333, 'answer_relevancy': 0.0000}, 'fdp': {'faithfulness': 0.8000, 'answer_relevancy': 0.0000}, 'cdu': {'faithfulness': 0.8000, 'answer_relevancy': 0.8277}}
Question: Sollte es ein Selbstbestimmungsrecht für Trans-Personen geben?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


{'afd': {'faithfulness': 0.5000, 'answer_relevancy': 0.0000}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.0000}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.8762}, 'gruene': {'faithfulness': 0.0000, 'answer_relevancy': 0.8022}, 'fdp': {'faithfulness': nan, 'answer_relevancy': 0.0000}, 'cdu': {'faithfulness': nan, 'answer_relevancy': 0.0000}}
Question: Sollen wir in Europa mehr für den Natur- und Artenschutz tun?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.5000, 'answer_relevancy': 0.9049}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.9115}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.8687}, 'gruene': {'faithfulness': 0.6667, 'answer_relevancy': 0.8959}, 'fdp': {'faithfulness': nan, 'answer_relevancy': 0.8887}, 'cdu': {'faithfulness': 0.1667, 'answer_relevancy': 0.8780}}
Question: Wie soll die Landwirtschaft unterstützt werden? 
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8690}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.9052}, 'linke': {'faithfulness': 0.5714, 'answer_relevancy': 0.0000}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.8971}, 'fdp': {'faithfulness': 0.6667, 'answer_relevancy': 0.8963}, 'cdu': {'faithfulness': 1.0000, 'answer_relevancy': 0.9078}}
Question: Wie soll Europa Handel mit anderen Ländern treiben? (z.B. Handelsabkommen)
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.7000, 'answer_relevancy': 0.8724}, 'spd': {'faithfulness': 0.3750, 'answer_relevancy': 0.8863}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.7945}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.8812}, 'fdp': {'faithfulness': 1.0000, 'answer_relevancy': 0.8464}, 'cdu': {'faithfulness': 1.0000, 'answer_relevancy': 0.8815}}
Question: Wie sollen Flüchtlinge in Europa verteilt werden? 
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.4286, 'answer_relevancy': 0.9218}, 'spd': {'faithfulness': 0.4000, 'answer_relevancy': 0.8956}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.8767}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.8995}, 'fdp': {'faithfulness': 0.4000, 'answer_relevancy': 0.9287}, 'cdu': {'faithfulness': 0.8000, 'answer_relevancy': 0.8864}}
Question: Wie stehen die Parteien zur gleichgeschlechtlichen Ehe?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': nan, 'answer_relevancy': 0.0000}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.9023}, 'linke': {'faithfulness': 0.8000, 'answer_relevancy': 0.0000}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.8555}, 'fdp': {'faithfulness': 1.0000, 'answer_relevancy': 0.8555}, 'cdu': {'faithfulness': 0.0000, 'answer_relevancy': 0.0000}}
Question: Wie sollten wir Fischfang betreiben damit das Meer und die Tiere geschützt werden?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.4000, 'answer_relevancy': 0.9195}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8790}, 'linke': {'faithfulness': nan, 'answer_relevancy': 0.9181}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.9031}, 'fdp': {'faithfulness': 1.0000, 'answer_relevancy': 0.8874}, 'cdu': {'faithfulness': 0.0000, 'answer_relevancy': 0.8979}}
Question: Was wollen die Parteien tun um das Tierwohl in der Viehhaltung zu verbessern? 
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': nan, 'answer_relevancy': 0.8925}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8664}, 'linke': {'faithfulness': 0.7143, 'answer_relevancy': 0.9094}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.8983}, 'fdp': {'faithfulness': nan, 'answer_relevancy': 0.0000}, 'cdu': {'faithfulness': 0.0000, 'answer_relevancy': 0.9550}}
Question: Wie sollte die Zusammenarbeit mit den USA aussehen?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.8333, 'answer_relevancy': 0.9136}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.9106}, 'linke': {'faithfulness': 0.2000, 'answer_relevancy': 0.9167}, 'gruene': {'faithfulness': 0.9000, 'answer_relevancy': 0.0000}, 'fdp': {'faithfulness': 0.5000, 'answer_relevancy': 0.9008}, 'cdu': {'faithfulness': 0.5714, 'answer_relevancy': 0.9211}}
Question: Wie können wir Jungendarbeitslosigkeit reduzieren? 
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.0000, 'answer_relevancy': 0.8335}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8534}, 'linke': {'faithfulness': 0.7143, 'answer_relevancy': 0.8875}, 'gruene': {'faithfulness': 0.8333, 'answer_relevancy': 0.8949}, 'fdp': {'faithfulness': 0.0000, 'answer_relevancy': 0.9241}, 'cdu': {'faithfulness': 0.0000, 'answer_relevancy': 0.8502}}
Question: Wie sehen die Pläne zum Erreichen der Klimaneutralität aus?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.5000, 'answer_relevancy': 0.8808}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8786}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.9008}, 'gruene': {'faithfulness': nan, 'answer_relevancy': 0.9519}, 'fdp': {'faithfulness': 0.6000, 'answer_relevancy': 0.8632}, 'cdu': {'faithfulness': 1.0000, 'answer_relevancy': 0.9089}}
Question: Wie soll das Verhältnis zu China aussehen im Bezug auf Protektionismus?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.8333, 'answer_relevancy': 0.9124}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8903}, 'linke': {'faithfulness': 0.2000, 'answer_relevancy': 0.8848}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.0000}, 'fdp': {'faithfulness': 1.0000, 'answer_relevancy': 0.8878}, 'cdu': {'faithfulness': 1.0000, 'answer_relevancy': 0.8933}}
Question: Wie sollen Kameras und Technik dabei helfen Verbrechen zu stoppen ohne dass unsere Privatsphäre gestört wird?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.6667, 'answer_relevancy': 0.8088}, 'spd': {'faithfulness': 0.3333, 'answer_relevancy': 0.8020}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.8191}, 'gruene': {'faithfulness': nan, 'answer_relevancy': 0.7829}, 'fdp': {'faithfulness': 0.2500, 'answer_relevancy': 0.0000}, 'cdu': {'faithfulness': 0.0000, 'answer_relevancy': 0.0000}}
Question: Sollten mehr Länder der EU beitreten?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.0000, 'answer_relevancy': 0.8456}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8637}, 'linke': {'faithfulness': 0.7500, 'answer_relevancy': 0.8534}, 'gruene': {'faithfulness': nan, 'answer_relevancy': 0.8830}, 'fdp': {'faithfulness': 1.0000, 'answer_relevancy': 0.8589}, 'cdu': {'faithfulness': 0.0000, 'answer_relevancy': 0.8294}}
Question: Wie sollen europäische high-tech Innovationen gefördert werden?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.0000, 'answer_relevancy': 0.0000}, 'spd': {'faithfulness': nan, 'answer_relevancy': 0.9440}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.9489}, 'gruene': {'faithfulness': 1.0000, 'answer_relevancy': 0.9269}, 'fdp': {'faithfulness': 1.0000, 'answer_relevancy': 0.9388}, 'cdu': {'faithfulness': 0.8000, 'answer_relevancy': 0.9690}}
Question: Wie kann Europa weniger dreckige Energie nutzen und mehr saubere Energie?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 0.5714, 'answer_relevancy': 0.8214}, 'spd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8305}, 'linke': {'faithfulness': 0.8333, 'answer_relevancy': 0.7910}, 'gruene': {'faithfulness': 0.8750, 'answer_relevancy': 0.7910}, 'fdp': {'faithfulness': 1.0000, 'answer_relevancy': 0.8561}, 'cdu': {'faithfulness': 1.0000, 'answer_relevancy': 0.7910}}
Question: Wie möchten die Parteien unsere Geschichte und Kulturen in Europa bewahren und zeigen?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: linke


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: gruene


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: fdp


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

  value = np.nanmean(self.scores[cn])


Evaluating party: cdu


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

{'afd': {'faithfulness': 1.0000, 'answer_relevancy': 0.8729}, 'spd': {'faithfulness': 0.2500, 'answer_relevancy': 0.0000}, 'linke': {'faithfulness': 1.0000, 'answer_relevancy': 0.9074}, 'gruene': {'faithfulness': 0.6250, 'answer_relevancy': 0.8766}, 'fdp': {'faithfulness': nan, 'answer_relevancy': 0.0000}, 'cdu': {'faithfulness': 0.0000, 'answer_relevancy': 0.9083}}
Question: Sollen Geflüchtete ausgewiesen werden wenn sie sich strafbar machen?
Evaluating party: afd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

Evaluating party: spd


Evaluating:   0%|          | 0/2 [00:00<?, ?it/s]

24

In [29]:
# Flatten the dictionary
flattened_data = {}
for question, parties in results_dict.items():
    for party, metrics in parties.items():
        for metric, value in metrics.items():
            flattened_data[(question, party, metric)] = value

# Create a multi-index DataFrame
index = pd.MultiIndex.from_tuples(flattened_data.keys())
df = pd.Series(flattened_data, index=index).unstack().unstack()

file_name = f"Experiments/{experiment_name}/metrics_{experiment_run_name}.csv"

df.to_csv(file_name)

In [30]:
# Check if file saved correctly
df = pd.read_csv(file_name, header=[0, 1], index_col=[0, 1])
df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,answer_relevancy,answer_relevancy,answer_relevancy,answer_relevancy,answer_relevancy,faithfulness,faithfulness,faithfulness,faithfulness,faithfulness,faithfulness
Unnamed: 0_level_1,Unnamed: 1_level_1,cdu,fdp,gruene,linke,spd,afd,cdu,fdp,gruene,linke,spd
Soll Atomkraft weiterhin genutzt werden?,0.909923,0.892304,0.0,0.90621,0.916723,0.921107,0.0,0.0,0.0,0.4,1.0,0.75
Sollen Geflüchtete ausgewiesen werden wenn sie sich strafbar machen?,0.882801,0.0,0.0,0.804816,0.834492,0.0,0.8,0.285714,0.75,1.0,0.857143,0.666667
Sollen die Militärausgaben erhöht werden?,0.831757,0.81404,0.0,0.0,0.845158,0.852843,0.75,0.8,0.833333,0.833333,0.25,0.75
Sollen wir in Europa mehr für den Natur- und Artenschutz tun?,0.912383,0.884228,0.878255,0.89596,0.868656,0.911455,0.4,0.0,1.0,0.714286,1.0,1.0
Sollte es ein Selbstbestimmungsrecht für Trans-Personen geben?,0.0,0.0,0.0,0.802226,0.877838,0.0,0.5,,,0.0,1.0,0.8
