In [1]:
from dotenv import load_dotenv
import os

load_dotenv()

True

In [2]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader("../data/Constitución Española/Constitución española.pdf")
documents = loader.load()
for document in documents:
    document.metadata['filename'] = document.metadata['source']

In [3]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=450,
    chunk_overlap=50,
    length_function=len,
    is_separator_regex=False,
)

texts = text_splitter.split_documents(documents)

In [4]:
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings

generator_llm = AzureChatOpenAI(
    openai_api_version=os.environ["OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_GPT3TURBO_DEPLOYMENT"],
)
critic_llm = AzureChatOpenAI(
    openai_api_version=os.environ["OPENAI_API_VERSION"],
    azure_deployment=os.environ["AZURE_GPT4TURBO_DEPLOYMENT"],
)
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=os.environ["AZURE_ADA2_DEPLOYMENT"],
    openai_api_version=os.environ["OPENAI_API_VERSION"],
)

generator = TestsetGenerator.from_langchain(
    generator_llm,
    critic_llm,
    embeddings
)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
testset = generator.generate_with_langchain_docs(documents, test_size=20, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})

Generating: 100%|██████████| 20/20 [07:50<00:00, 23.51s/it]       


In [6]:
df = testset.to_pandas()

In [7]:
df.head()

Unnamed: 0,question,contexts,ground_truth,evolution_type,metadata,episode_done
0,What is the role of the Presidente del Gobiern...,[Artículo 94. \n1. La prestación del consenti...,The Presidente del Gobierno in the Spanish gov...,simple,[{'source': '../data/Constitución Española/Con...,True
1,What does the principle of solidarity guarante...,"[El Tribunal de Cuentas, sin perjuicio de su p...",The State guarantees the effective realization...,simple,[{'source': '../data/Constitución Española/Con...,True
2,How do the powers of the state promote the mod...,[Artículo 129. \n1. La ley establecerá las fo...,The powers of the state promote the modernizat...,simple,[{'source': '../data/Constitución Española/Con...,True
3,What is the role of the Organización Territori...,"[El Tribunal de Cuentas, sin perjuicio de su p...",The role of the Organización Territorial del E...,simple,[{'source': '../data/Constitución Española/Con...,True
4,What rights do Spanish citizens have regarding...,[Artículo 17. \n1. Toda persona tiene derecho...,Spanish citizens have the right to choose thei...,simple,[{'source': '../data/Constitución Española/Con...,True


In [8]:
df.to_json('../data/Constitución Española/Constitución española_testset.json', orient='records')
