In [1]:
%load_ext autoreload
%autoreload 2

In [12]:
from getpass import getpass
from ragas.embeddings import LangchainEmbeddingsWrapper
from ragas.testset import Testset
from ragas.llms import LangchainLLMWrapper
from ragas.metrics import (
    FactualCorrectness,
    Faithfulness,
    LLMContextRecall,
    SemanticSimilarity,
    NonLLMContextRecall,
    LLMContextPrecisionWithReference,
    NonLLMContextPrecisionWithReference,
    ContextEntityRecall,
)
from docu_bot.utils import create_chatopenai_model, create_openai_embeddings
from docu_bot.document_loaders.git_document_loader import GitDocumentLoader
from docu_bot.document_loaders.utils import LoadedRepositoriesAndFiles
from docu_bot.datasets.generate_synthetic_data_ragas import (
    generate_dataset,
    create_generator,
)

In [3]:
model_type = "gpt-4o-mini"
api_key = getpass("Enter your OpenAI API key: ")

## Generate Sythetic Datase


In [4]:
llm_model = LangchainLLMWrapper(create_chatopenai_model(model_type=model_type, api_key=api_key))
embeddings_model = LangchainEmbeddingsWrapper(create_openai_embeddings())
generator = create_generator(llm_model, embeddings_model)
document_loader = GitDocumentLoader(
    repo_path="https://github.com/jinymusim/serve-model.git", branch="main", loaded_repositories_and_files=LoadedRepositoriesAndFiles()
)

In [5]:
synthetic_data = generate_dataset(generator, document_loader.load(), dataset_size=50)

Generating Scenarios: 100%|██████████| 1/1 [00:06<00:00,  6.96s/it] 
Generating Samples: 100%|██████████| 40/40 [00:07<00:00,  5.23it/s]


In [10]:
synthetic_data.to_jsonl("synthetic_data.jsonl")

### USe Sythetic Data to test the model

In [13]:
synthetic_data = Testset.from_jsonl("synthetic_data.jsonl")