In [5]:
import sys
import os

sys.path.append(os.path.abspath(".."))

DATABASE_DIR_MANIFESTOS = "../../data/manifestos/chroma/openai"
DATABASE_DIR_DEBATES = "../../data/manifestos/chroma/openai"
TEST_DATA_DIR = "../../data/questions"

In [11]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm

from database.vector_database import VectorDatabase
from models.embedding import ManifestoBertaEmbeddings
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_mistralai.chat_models import ChatMistralAI


from datasets import Dataset, DatasetDict, load_from_disk

# Create simple chain

In [7]:
from models.generation import generate_chain_with_balanced_retrieval

# embedding_model = ManifestoBertaEmbeddings()
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")

LARGE_LANGUAGE_MODEL = ChatOpenAI(
    model_name="gpt-3.5-turbo", max_tokens=2000, temperature=0
)

# LARGE_LANGUAGE_MODEL = ChatMistralAI(
#     name="open-mixtral-8x7b", max_tokens=2000, temperature=0
# )


db_manifestos = VectorDatabase(
    data_path="../data/manifestos",
    embedding_model=embedding_model,
    database_directory=DATABASE_DIR_MANIFESTOS,
    source_type="manifestos",
)

chain = generate_chain_with_balanced_retrieval(
    [db_manifestos],
    llm=LARGE_LANGUAGE_MODEL,
    return_context=True,
    language="Deutsch",
    k=5,
)

reloaded database


In [8]:
# Create dataset with question, context and answer

df_test_simple = pd.read_csv(os.path.join(
    TEST_DATA_DIR, "simple_questions.csv"))
df_test_complex = pd.read_csv(os.path.join(
    TEST_DATA_DIR, "complex_questions.csv"))

parties = ["afd", "spd", "linke", "gruene", "fdp", "cdu"]

questions = df_test_simple["EINFACHE FRAGEN"]

dataset_dict = {
    party: {"question": [], "contexts": [], "answer": []} for party in parties
}
for question in tqdm(questions):
    response = chain.invoke(question)

    # for pary in parties:
    for party in parties:
        contexts = [doc.page_content for doc in response["docs"]
                    ["manifestos"][party]]
        answer = response["answer"][party]

        dataset_dict[party]["question"].append(question)
        dataset_dict[party]["contexts"].append(contexts)
        dataset_dict[party]["answer"].append(answer)

dataset = DatasetDict(
    {party: Dataset.from_dict(dataset_dict[party]) for party in parties}
)

dataset.save_to_disk("dataset")

  0%|          | 0/33 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [15]:
dataset = load_from_disk("dataset")

In [16]:
dataset["afd"].select(range(3))

Dataset({
    features: ['question', 'contexts', 'answer'],
    num_rows: 3
})

In [17]:
from ragas.metrics import (
    faithfulness,
    answer_correctness,
)

from ragas import adapt

# llm used for adaptation
openai_model_adaption = ChatOpenAI(model_name="gpt-4")
adapt(metrics=[faithfulness], language="german", llm=openai_model_adaption)

In [23]:
from ragas import evaluate

# result_dict = {}
parties = ["cdu"]

for party in parties:
    print(f"Evaluating party: {party}")
    ragas_score = evaluate(dataset[party].select(range(5)), metrics=[faithfulness])
    result_dict.update({party: ragas_score})

Evaluating party: cdu


Exception in thread Exception in threading.excepthook:
Exception ignored in thread started by: <bound method Thread._bootstrap of <Runner(Thread-19, stopped 10754224128)>>
Traceback (most recent call last):
  File "/Users/christianliedl/anaconda3/envs/bundestag/lib/python3.11/threading.py", line 1002, in _bootstrap
    self._bootstrap_inner()
  File "/Users/christianliedl/anaconda3/envs/bundestag/lib/python3.11/threading.py", line 1047, in _bootstrap_inner
    self._invoke_excepthook(self)
  File "/Users/christianliedl/anaconda3/envs/bundestag/lib/python3.11/threading.py", line 1359, in invoke_excepthook
    local_print("Exception in threading.excepthook:",
  File "/Users/christianliedl/anaconda3/envs/bundestag/lib/python3.11/site-packages/ipykernel/iostream.py", line 604, in flush
    self.pub_thread.schedule(self._flush)
  File "/Users/christianliedl/anaconda3/envs/bundestag/lib/python3.11/site-packages/ipykernel/iostream.py", line 267, in schedule
    self._event_pipe.send(b"")
  Fi

KeyboardInterrupt: 

In [24]:
result_dict

{'afd': {'faithfulness': 0.6333},
 'spd': {'faithfulness': 0.7500},
 'linke': {'faithfulness': 1.0000},
 'gruene': {'faithfulness': 0.7500}}