In [1]:
from langchain_community.document_loaders import DirectoryLoader

path = "C:/NTUST/Research/llm-evaluate/document/"
loader = DirectoryLoader(path)
docs = loader.load()

In [None]:
import os
os.environ["OPENAI_API_KEY"] = os.environ["AZURE_OPENAI_API_KEY"]

# other configuration
azure_config = {
    "base_url": "https://llmresource.openai.azure.com/",  # your endpoint
    "model_deployment": "gpt-4o-mini",  # your model deployment name
    "model_name": "gpt-4o-mini",  # your model name
    "embedding_deployment": "text-embedding-3-small",  # your embedding deployment name
    "embedding_name": "text-embedding-3-small",  # your embedding name
}

In [None]:
from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

generator_llm = LangchainLLMWrapper(AzureChatOpenAI(
    openai_api_version="2024-08-01-preview",
    azure_endpoint=azure_config["base_url"],
    azure_deployment=azure_config["model_deployment"],
    model=azure_config["model_name"],
    validate_base_url=False,
))

# init the embeddings for answer_relevancy, answer_correctness and answer_similarity
generator_embeddings = LangchainEmbeddingsWrapper(AzureOpenAIEmbeddings(
    openai_api_version="2023-05-15",
    azure_endpoint=azure_config["base_url"],
    azure_deployment=azure_config["embedding_deployment"],
    model=azure_config["embedding_name"],
))

In [None]:
from ragas.testset import TestsetGenerator

generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings)
dataset = generator.generate_with_langchain_docs(docs, testset_size=10)

In [None]:
dataset.to_pandas()