In [2]:
import os
from dotenv import load_dotenv

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
dotenv_path = os.path.join(parent_dir, '.env')
load_dotenv(dotenv_path)

True

In [2]:
from llama_index.core import SimpleDirectoryReader

def load_docs(folder_path: str):
    reader = SimpleDirectoryReader(folder_path, recursive=True)
    docs = reader.load_data(show_progress=True)
    return docs

KeyboardInterrupt: 

In [33]:
from ragas.testset.graph import KnowledgeGraph
from ragas.testset.graph import Node, NodeType

documents = load_docs("./data")

kg = KnowledgeGraph()
for doc in documents:
    kg.nodes.append(
        Node(
            type=NodeType.DOCUMENT,
            properties={
                "page_content": doc.text,
                "document_metadata": doc.metadata,
            }
        )
    )

Loading files:   0%|          | 0/3 [00:00<?, ?file/s]

Loading files: 100%|██████████| 3/3 [00:01<00:00,  1.86file/s]


In [3]:
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding
from ragas.llms import LlamaIndexLLMWrapper
from ragas.embeddings import LlamaIndexEmbeddingsWrapper
from ragas.testset.transforms import default_transforms, apply_transforms

CHAT_MODEL = os.getenv("OLLAMA_CHAT_MODEL")
EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL")

ollama_llm_wrapper = LlamaIndexLLMWrapper(
    Ollama(
        model=CHAT_MODEL
    )
)
ollama_embedding_wrapper = LlamaIndexEmbeddingsWrapper(
    OllamaEmbedding(
        model_name=EMBEDDING_MODEL
    )
)

In [None]:
trans = default_transforms(llm=ollama_llm_wrapper, embedding_model=ollama_embedding_wrapper)
apply_transforms(kg, trans)

In [40]:
kg.save("knowledge_graph.json")

In [4]:
from ragas.testset.graph import KnowledgeGraph

loaded_kg = KnowledgeGraph.load("knowledge_graph.json")
loaded_kg

KnowledgeGraph(nodes: 79, relationships: 27)

In [None]:
from ragas.testset import TestsetGenerator
from ragas.testset.synthesizers import default_query_distribution

generator = TestsetGenerator(llm=ollama_llm_wrapper,embedding_model=ollama_embedding_wrapper, knowledge_graph=loaded_kg)
query_distribution = default_query_distribution(ollama_llm_wrapper)

try:
    testset = generator.generate(testset_size=100)
    testset.to_pandas().to_excel("testset.xlsx")
except Exception as e:
    print(e)


Generating personas:   0%|          | 0/3 [00:00<?, ?it/s]