In [1]:
import os
from dotenv import load_dotenv

parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))
dotenv_path = os.path.join(parent_dir, '.env')
load_dotenv(dotenv_path)

True

In [2]:
from llama_index.core import SimpleDirectoryReader

def load_docs(folder_path: str):
    reader = SimpleDirectoryReader(folder_path, recursive=True)
    docs = reader.load_data()
    lc_docs = [doc.to_langchain_format() for doc in docs]
    return lc_docs

In [7]:
docs = load_docs("./data")

In [8]:
from ragas.testset.graph import KnowledgeGraph, Node, NodeType

def create_knowledge_graph(docs) -> KnowledgeGraph:
    kg = KnowledgeGraph()
    for doc in docs:
        kg.nodes.append(
            Node(
                type=NodeType.DOCUMENT,
                properties={
                    "page_content": doc.page_content, 
                    "document_metadata": doc.metadata
                }
            )
    )
    return kg

In [9]:
kg = create_knowledge_graph(docs)

In [4]:
from langchain_ollama.llms import OllamaLLM
from langchain_ollama import OllamaEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

CHAT_MODEL = os.getenv("OLLAMA_CHAT_MODEL")
EMBEDDING_MODEL = os.getenv("OLLAMA_EMBEDDING_MODEL")

ollama_llm = OllamaLLM(model=CHAT_MODEL)
ollama_embedding = OllamaEmbeddings(model=EMBEDDING_MODEL)

ollama_llm_wrapper = LangchainLLMWrapper(ollama_llm)
ollama_embedding_wrapper = LangchainEmbeddingsWrapper(ollama_embedding)

In [14]:
from ragas.testset.transforms import apply_transforms, default_transforms

transformations = default_transforms(
    documents=docs, 
    llm=ollama_llm_wrapper, 
    embedding_model=ollama_embedding_wrapper
)
apply_transforms(kg, transformations)

Applying HeadlinesExtractor:  40%|███▉      | 29/73 [28:40<53:34, 73.06s/it]   unable to apply transformation: 'StringIO' object has no attribute 'headlines'
Applying HeadlinesExtractor:  48%|████▊     | 35/73 [46:01<1:23:28, 131.80s/it]Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt headlines_extractor_prompt failed to parse output: The output parser failed to parse the output including retries.
unable to apply transformation: The output parser failed to parse the output including retries.
Applying HeadlinesExtractor:  52%|█████▏    | 38/73 [48:25<46:49, 80.26s/it]   Prompt fix_output_format failed to parse output: The output parser failed to parse the output including retries.
Prompt fix_out

In [None]:
kg.save("knowledge_graph.json")

In [11]:
kg = kg.load("knowledge_graph_3.json")

In [17]:
filtered_nodes = [node for node in kg.nodes if node.properties.get('summary_embedding')]

In [21]:
filtered_relations = [relation for relation in kg.relationships if relation.source in filtered_nodes or relation.target in filtered_nodes]

In [24]:
filtered_kg = KnowledgeGraph(filtered_nodes, filtered_relations)

In [25]:
from ragas.testset.transforms import apply_transforms
from ragas.testset.transforms.relationship_builders import CosineSimilarityBuilder

transformations = [
    CosineSimilarityBuilder(
        property_name="summary_embedding",
        new_property_name="summary_similarity",
        threshold=0.7,
    )
]

apply_transforms(filtered_kg, transformations)

                                                                       

In [None]:
from ragas.testset.synthesizers import default_query_distribution

query_distribution = default_query_distribution(ollama_llm_wrapper)

In [None]:
from ragas.testset import TestsetGenerator

kg = KnowledgeGraph.load("knowledge_graph.json")

generator = TestsetGenerator.from_langchain(
    llm=ollama_llm, 
    embedding_model=ollama_embedding,
    knowledge_graph=kg
)