In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
import os
from getpass import getpass
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import Testset
from ragas.llms import LangchainLLMWrapper
from docu_bot.utils import create_chatopenai_model, create_openai_embeddings
from docu_bot.document_loaders.git_document_loader import GitDocumentLoader
from docu_bot.document_loaders.utils import LoadedRepositoriesAndFiles
from docu_bot.datasets.generate_synthetic_data_ragas import (
    generate_dataset,
    create_generator,
)
from docu_bot.evaluation.evaluate import Evaluator
from docu_bot.retrievals.document_retrival import DocumentRetrieval
from docu_bot.retrievals.empty_retrieval import EmptyRetrieval
from docu_bot.retrievals.context_query_alteration_retrieval import ContextQueryAlterationDocumentRetrieval
from docu_bot.retrievals.query_alteration_retrieval import QueryAlterationDocumentRetrieval
from docu_bot.retrievals.generative_retrieval import GenerativeDocumentRetrieval

from docu_bot.stores.docstore import DocumentStore
from docu_bot.stores.utils import create_vector_store_from_document_loader, LoadedVectorStores

In [3]:
model_type = "gpt-4o-mini"
embedding_model_type="text-embedding-3-small"
api_key = getpass("Enter your OpenAI API key: ")

## Generate Sythetic Datase


In [8]:
llm_model = LangchainLLMWrapper(create_chatopenai_model(model_type=model_type, api_key=api_key))
embeddings_model = LangchainEmbeddingsWrapper(create_openai_embeddings(model_type=embedding_model_type, api_key=api_key))
generator = create_generator(llm_model, embeddings_model)
document_loader = GitDocumentLoader(
    repo_path="https://github.com/dCache/dcache.git", branch="9.2", loaded_repositories_and_files=LoadedRepositoriesAndFiles()
)

In [9]:
synthetic_data = generate_dataset(generator, document_loader.load(), dataset_size=25)

Generating Scenarios: 100%|██████████| 1/1 [00:55<00:00, 55.02s/it]   
Generating Samples: 100%|██████████| 25/25 [00:06<00:00,  3.58it/s]


In [10]:
synthetic_data.to_jsonl(
    os.path.join(os.path.abspath(''), "..", "datasets", "dcache_synthetic_data.jsonl"),
)

### Use Sythetic Data to test the model

In [13]:
model_type = "llama3.3:latest"
api_key = getpass("Enter your Metacentrum API key: ")
evaluator_llm_model_type = "gpt-4o-mini"
evaluator_embeddings_model_type = "text-embedding-3-small"
open_ai_api_key = getpass("Enter your OpenAI API key: ")

In [14]:
synthetic_data = Testset.from_jsonl(os.path.join(os.path.abspath(''), "..", "datasets", "dcache_synthetic_data.jsonl"))
document_loader = GitDocumentLoader(
    repo_path="https://github.com/dCache/dcache.git", branch="9.2", loaded_repositories_and_files=LoadedRepositoriesAndFiles()
)
docstore =DocumentStore()
cached_vector_store = LoadedVectorStores()
vector_store =create_vector_store_from_document_loader(document_loader, docstore, cached_vector_store)

In [15]:
evaluator = Evaluator(
    evaluator_llm=create_chatopenai_model(model_type=evaluator_llm_model_type, api_key=open_ai_api_key),
    evaluator_embedding_model=create_openai_embeddings(model_type=evaluator_embeddings_model_type, api_key=open_ai_api_key),
)

In [16]:
eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=create_chatopenai_model(model_type=model_type, api_key=api_key),
    document_retriever=DocumentRetrieval(
        vectorstore=vector_store, 
        docstore=docstore, 
        search_kwargs={"min_score": 0.5, "k": 5}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [01:47<00:00,  1.86it/s]


{'factual_correctness': 0.2072, 'faithfulness': 0.5882, 'context_recall': 0.1200, 'semantic_similarity': 0.6592, 'non_llm_context_recall': 0.0000, 'llm_context_precision_with_reference': 0.0533, 'non_llm_context_precision_with_reference': 0.0000, 'context_entity_recall': 0.1002}

In [17]:
eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=create_chatopenai_model(model_type=model_type, api_key=api_key),
    document_retriever=EmptyRetrieval(
        docstore=docstore,
        search_kwargs={}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [01:39<00:00,  2.01it/s]


{'factual_correctness': 0.2284, 'faithfulness': 0.4229, 'context_recall': 0.0400, 'semantic_similarity': 0.6927, 'non_llm_context_recall': 0.0000, 'llm_context_precision_with_reference': 0.0000, 'non_llm_context_precision_with_reference': 0.0000, 'context_entity_recall': 0.0000}

In [18]:
llm = create_chatopenai_model(model_type=model_type, api_key=api_key)

eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=llm,
    document_retriever=GenerativeDocumentRetrieval(
        llm=llm,
        docstore=docstore,
        search_kwargs={"k": 1}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [02:06<00:00,  1.59it/s]


{'factual_correctness': 0.1452, 'faithfulness': 0.3945, 'context_recall': 0.1493, 'semantic_similarity': 0.7394, 'non_llm_context_recall': 0.0000, 'llm_context_precision_with_reference': 0.6000, 'non_llm_context_precision_with_reference': 0.0000, 'context_entity_recall': 0.2521}

In [19]:
llm = create_chatopenai_model(model_type=model_type, api_key=api_key)

eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=llm,
    document_retriever=QueryAlterationDocumentRetrieval(
        llm=llm,
        vectorstore=vector_store, 
        docstore=docstore, 
        search_kwargs={"min_score": 0.5, "k": 5, "num_custom_queires": 5}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [01:21<00:00,  2.44it/s]


{'factual_correctness': 0.1468, 'faithfulness': 0.5575, 'context_recall': 0.1827, 'semantic_similarity': 0.6702, 'non_llm_context_recall': 0.0000, 'llm_context_precision_with_reference': 0.1133, 'non_llm_context_precision_with_reference': 0.0000, 'context_entity_recall': 0.0698}

In [None]:
llm = create_chatopenai_model(model_type=model_type, api_key=api_key)

eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=llm,
    document_retriever=ContextQueryAlterationDocumentRetrieval(
        vectorstore=vector_store, 
        docstore=docstore, 
        llm=llm,
        search_kwargs={"min_score": 0.5, "k": 5}
    )
)
eval_results