In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import time
import pandas as pd
from getpass import getpass
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import Testset
from ragas.llms import LangchainLLMWrapper
from docu_bot.utils import create_chatopenai_model, create_openai_embeddings
from docu_bot.document_loaders.git_document_loader import GitDocumentLoader
from docu_bot.document_loaders.utils import LoadedRepositoriesAndFiles
from docu_bot.datasets.generate_synthetic_data_ragas import (
    generate_dataset,
    create_generator,
)
from docu_bot.evaluation.evaluate import Evaluator
from docu_bot.retrievals.document_retrival import DocumentRetrieval
from docu_bot.retrievals.empty_retrieval import EmptyRetrieval
from docu_bot.retrievals.context_query_alteration_retrieval import ContextQueryAlterationDocumentRetrieval
from docu_bot.retrievals.query_alteration_retrieval import QueryAlterationDocumentRetrieval
from docu_bot.retrievals.generative_retrieval import GenerativeDocumentRetrieval
from docu_bot.retrievals.rerank_retrieval import RerankDocumentRetrieval

from docu_bot.stores.docstore import DocumentStore
from docu_bot.stores.utils import create_vector_store_from_document_loader, LoadedVectorStores

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
model_type = "gpt-4o-mini"
embedding_model_type="text-embedding-3-small"
api_key = getpass("Enter your OpenAI API key: ")

## Generate Sythetic Datase


In [4]:
llm_model = LangchainLLMWrapper(create_chatopenai_model(model_type=model_type, api_key=api_key))
embeddings_model = LangchainEmbeddingsWrapper(create_openai_embeddings(model_type=embedding_model_type, api_key=api_key))
generator = create_generator(llm_model, embeddings_model)
document_loader = GitDocumentLoader(
    repo_path="https://code.it4i.cz/sccs/docs.it4i.cz.git", branch="master", loaded_repositories_and_files=LoadedRepositoriesAndFiles()
)

In [None]:
synthetic_data_list = []
for i in range(10):
    print(f"Generating synthetic data {i}")
    synthetic_data = generate_dataset(generator, document_loader.load(), dataset_size=100)
    synthetic_data_list.append(synthetic_data.to_pandas())
    print(f"Sleeping for 10 seconds to prevent Token Limit Error" )
    time.sleep(10)
synthetic_data = pd.concat(synthetic_data_list)

In [9]:
synthetic_data

Unnamed: 0,user_input,reference_contexts,reference,synthesizer_name
0,What is IT4Innovations and how do I install it...,[# IT4Inoovations Documentation\n\nThis projec...,IT4Innovations is a project that contains user...,single_hop_specifc_query_synthesizer
1,Can you explain what EOSC is and how it relate...,[# What Is DICE Project?\n\nDICE (Data Infrast...,The EOSC (European Open Science Cloud) project...,single_hop_specifc_query_synthesizer
2,Can you explain the significance of IT4I in th...,[# Migration to e-INFRA CZ\n\n## Introduction\...,IT4Innovations is a crucial part of e-INFRA CZ...,single_hop_specifc_query_synthesizer
3,What is the default shell available on IT4Inno...,[# Environment and Modules\n\n## Shells on Clu...,The default shell available on IT4Innovations ...,single_hop_specifc_query_synthesizer
4,Why is CentOS recommended for new developers w...,[# Documentation\n\nWelcome to the IT4Innovati...,CentOS is recommended for new developers becau...,single_hop_specifc_query_synthesizer
...,...,...,...,...
45,What is IT4Innovashuns and what does it do?,[IT4Innovations national supercomputing center...,IT4Innovations is a national supercomputing ce...,single_hop_specifc_query_synthesizer
46,Wht is DGX-A100?,"[Savings GPU\n....\nSpetko, Vysocky, Jansik, R...",The DGX-A100 is referenced in the context of a...,single_hop_specifc_query_synthesizer
47,How can I contact support at it4i?,[# Satisfaction and Feedback\n\nIT4Innovations...,"For acute, pressing issues and immediate conta...",single_hop_specifc_query_synthesizer
48,Can you explain how to use HyperQueue on Karol...,[# HyperQueue\n\nHyperQueue lets you build a c...,"To use HyperQueue on Karolina, you first need ...",single_hop_specifc_query_synthesizer


In [None]:
synthetic_data.to_feather(
    os.path.join(os.path.abspath(''), "..", "datasets", "it4i_large_synthetic_data.feather")
)

### Use Sythetic Data to test the model

In [3]:
model_type = "aya-expanse:latest"
embedding_model_type = "text-embedding-3-small"
api_key = getpass("Enter your Metacentrum API key: ")
evaluator_llm_model_type = "gpt-4o-mini"
evaluator_embeddings_model_type = "text-embedding-3-small"
open_ai_api_key = getpass("Enter your OpenAI API key: ")

In [4]:
synthetic_data = Testset.from_pandas(pd.read_feather(os.path.join(os.path.abspath(''), "..", "datasets", "it4i_synthetic_data.feather")))
document_loader = GitDocumentLoader(
    repo_path="https://code.it4i.cz/sccs/docs.it4i.cz.git", branch="master", loaded_repositories_and_files=LoadedRepositoriesAndFiles()
)
docstore =DocumentStore()
cached_vector_store = LoadedVectorStores(embedding_model=embedding_model_type, api_key=open_ai_api_key)
vector_store =create_vector_store_from_document_loader(document_loader, docstore, cached_vector_store, embedding_model_type, open_ai_api_key)

In [5]:
evaluator = Evaluator(
    evaluator_llm=create_chatopenai_model(model_type=evaluator_llm_model_type, api_key=open_ai_api_key),
    evaluator_embedding_model=create_openai_embeddings(model_type=evaluator_embeddings_model_type, api_key=open_ai_api_key),
)

In [6]:
eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=create_chatopenai_model(model_type=model_type, api_key=api_key),
    document_retriever=DocumentRetrieval(
        vectorstore=vector_store, 
        docstore=docstore, 
        search_kwargs={"min_score": 0.3, "k": 5}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [02:01<00:00,  1.64it/s]


{'factual_correctness': 0.3976, 'faithfulness': 0.6689, 'context_recall': 0.4547, 'semantic_similarity': 0.7469, 'non_llm_context_recall': 0.2000, 'llm_context_precision_with_reference': 0.4900, 'non_llm_context_precision_with_reference': 0.1800, 'context_entity_recall': 0.1875}

In [7]:
eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=create_chatopenai_model(model_type=model_type, api_key=api_key),
    document_retriever=EmptyRetrieval(
        docstore=docstore,
        search_kwargs={}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [02:01<00:00,  1.64it/s]


{'factual_correctness': 0.1492, 'faithfulness': 0.6988, 'context_recall': 0.0000, 'semantic_similarity': 0.4976, 'non_llm_context_recall': 0.0000, 'llm_context_precision_with_reference': 0.0000, 'non_llm_context_precision_with_reference': 0.0000, 'context_entity_recall': 0.0000}

In [6]:
llm = create_chatopenai_model(model_type=model_type, api_key=api_key)

eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=llm,
    document_retriever=GenerativeDocumentRetrieval(
        llm=llm,
        docstore=docstore,
        search_kwargs={"k": 1}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [01:49<00:00,  1.83it/s]


{'factual_correctness': 0.1956, 'faithfulness': 0.5297, 'context_recall': 0.1700, 'semantic_similarity': 0.7282, 'non_llm_context_recall': 0.0000, 'llm_context_precision_with_reference': 0.4400, 'non_llm_context_precision_with_reference': 0.0000, 'context_entity_recall': 0.2057}

In [7]:
llm = create_chatopenai_model(model_type=model_type, api_key=api_key)

eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=llm,
    document_retriever=QueryAlterationDocumentRetrieval(
        llm=llm,
        vectorstore=vector_store, 
        docstore=docstore, 
        search_kwargs={"min_score": 0.3, "k": 5, "num_custom_queires": 5}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [01:51<00:00,  1.80it/s]


{'factual_correctness': 0.2472, 'faithfulness': 0.6955, 'context_recall': 0.1533, 'semantic_similarity': 0.6022, 'non_llm_context_recall': 0.0800, 'llm_context_precision_with_reference': 0.1900, 'non_llm_context_precision_with_reference': 0.0800, 'context_entity_recall': 0.0298}

In [8]:
llm = create_chatopenai_model(model_type=model_type, api_key=api_key)

eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=llm,
    document_retriever=ContextQueryAlterationDocumentRetrieval(
        vectorstore=vector_store, 
        docstore=docstore, 
        llm=llm,
        search_kwargs={"min_score": 0.3, "k": 5}
    )
)
eval_results

Evaluating: 100%|██████████| 200/200 [01:42<00:00,  1.96it/s]


{'factual_correctness': 0.2352, 'faithfulness': 0.6492, 'context_recall': 0.3013, 'semantic_similarity': 0.6146, 'non_llm_context_recall': 0.2000, 'llm_context_precision_with_reference': 0.3567, 'non_llm_context_precision_with_reference': 0.1467, 'context_entity_recall': 0.1307}

In [14]:
llm = create_chatopenai_model(model_type=model_type, api_key=api_key)

eval_results = evaluator.evaluate_configuration(
    dataset=synthetic_data,
    rag_llm=llm,
    document_retriever=RerankDocumentRetrieval(
        vectorstore=vector_store, 
        docstore=docstore, 
        llm=llm,
        search_kwargs={"min_score": 0.1, "k": 5}
    )
)
eval_results

content='Yes' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 2, 'prompt_tokens': 235, 'total_tokens': 237, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': None, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': None, 'response_token/s': 32.79, 'prompt_token/s': 117500.0, 'total_duration': 105855569, 'load_duration': 40585935, 'prompt_eval_count': 235, 'prompt_eval_duration': 2000000, 'eval_count': 2, 'eval_duration': 61000000, 'approximate_total': '0h0m0s'}, 'model_name': 'llama3.3:latest', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None} id='run-7d719f88-862f-4e29-a1aa-f53462f41756-0' usage_metadata={'input_tokens': 235, 'output_tokens': 2, 'total_tokens': 237, 'input_token_details': {}, 'output_token_details': {'reasoning': 0}}
Yes


AttributeError: 'NoneType' object has no attribute 'get'