From 1ac0ef0d5672b68bdb8ae3f1f3483016129fd9dd Mon Sep 17 00:00:00 2001 From: "V.Prasanna kumar" Date: Sun, 18 Feb 2024 23:56:27 +0530 Subject: [PATCH 1/2] Added testset generation for bedrock --- docs/howtos/customisations/aws-bedrock.ipynb | 133 +++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/docs/howtos/customisations/aws-bedrock.ipynb b/docs/howtos/customisations/aws-bedrock.ipynb index 341e618a5..d417ea36c 100644 --- a/docs/howtos/customisations/aws-bedrock.ipynb +++ b/docs/howtos/customisations/aws-bedrock.ipynb @@ -330,6 +330,139 @@ "df.head()" ] }, + { + "cell_type": "markdown", + "id": "b133aff0", + "metadata": {}, + "source": [ + "# Test Data Generation" + ] + }, + { + "cell_type": "markdown", + "id": "4c7192f2", + "metadata": {}, + "source": [ + "Get the documents you for which you are going to create the test set" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "529266ad", + "metadata": {}, + "outputs": [], + "source": [ + "from langchain_community.document_loaders import UnstructuredURLLoader\n", + "\n", + "urls = [\n", + " \"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-8-2023\",\n", + " \"https://www.understandingwar.org/backgrounder/russian-offensive-campaign-assessment-february-9-2023\",\n", + "]\n", + "loader = UnstructuredURLLoader(urls=urls)\n", + "documents = loader.load()" + ] + }, + { + "cell_type": "markdown", + "id": "87587749", + "metadata": {}, + "source": [ + "now we have documents created in the form of langchain `Document`\n", + "Next step is to wrap the embedding and llm model into ragas schema." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d5eaed2", + "metadata": {}, + "outputs": [], + "source": [ + "from ragas.llms import BaseRagasLLM, LangchainLLMWrapper\n", + "from ragas.embeddings.base import BaseRagasEmbeddings, LangchainEmbeddingsWrapper\n", + "\n", + "bedrock_model = LangchainLLMWrapper(bedrock_model)\n", + "bedrock_embeddings = LangchainEmbeddingsWrapper(bedrock_embeddings)\n" + ] + }, + { + "cell_type": "markdown", + "id": "d7d17468", + "metadata": {}, + "source": [ + "Next Step is to create chunks from the documents and store the chunks InMemoryDocumentStore" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4e717c13", + "metadata": {}, + "outputs": [], + "source": [ + "from ragas.testset.extractor import KeyphraseExtractor\n", + "from langchain.text_splitter import TokenTextSplitter\n", + "from ragas.testset.docstore import InMemoryDocumentStore\n", + "\n", + "splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=100)\n", + "keyphrase_extractor = KeyphraseExtractor(llm=generator_llm)\n", + "docstore = InMemoryDocumentStore(\n", + "splitter=splitter,\n", + "embeddings=bedrock_embeddings,\n", + "extractor=keyphrase_extractor,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7773f4b5", + "metadata": {}, + "source": [ + "Customising the the testset generation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "495ff805", + "metadata": {}, + "outputs": [], + "source": [ + "from ragas.testset import TestsetGenerator\n", + "from ragas.testset.evolutions import simple, reasoning, multi_context\n", + "\n", + "test_generator = TestsetGenerator(\n", + " generator_llm=generator_llm,\n", + " critic_llm=critic_llm,\n", + " embeddings=bedrock_embeddings,\n", + " docstore=docstore,\n", + ")\n", + "\n", + "# use generator.generate_with_llamaindex_docs if you use llama-index as document loader\n", + "testset = test_generator.generate_with_langchain_docs(documents=documents,\n", + " test_size=10,distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})" + ] + }, + { + "cell_type": "markdown", + "id": "8a80046b", + "metadata": {}, + "source": [ + "Export the results into pandas¶" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b4633c8", + "metadata": {}, + "outputs": [], + "source": [ + "test_df = testset.to_pandas()\n", + "test_df.head()" + ] + }, { "cell_type": "markdown", "id": "f668fce1", From 4e69a60494422dc289b18db28d16f1d0b518d763 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sun, 18 Feb 2024 12:55:21 -0800 Subject: [PATCH 2/2] fix initialization --- docs/howtos/customisations/aws-bedrock.ipynb | 51 +++++++++++++------- 1 file changed, 33 insertions(+), 18 deletions(-) diff --git a/docs/howtos/customisations/aws-bedrock.ipynb b/docs/howtos/customisations/aws-bedrock.ipynb index d417ea36c..7d88266b1 100644 --- a/docs/howtos/customisations/aws-bedrock.ipynb +++ b/docs/howtos/customisations/aws-bedrock.ipynb @@ -9,7 +9,10 @@ "\n", "Amazon Bedrock is a fully managed service that makes FMs from leading AI startups and Amazon available via an API, so you can choose from a wide range of FMs to find the model that is best suited for your use case.\n", "\n", - "This tutorial will show you how to use Amazon Bedrock endpoints and LangChain." + "This tutorial will show you how to use Amazon Bedrock with Ragas.\n", + "\n", + "1. [Metrics](#load-sample-dataset)\n", + "2. [Testset generation](#test-data-generation)" ] }, { @@ -22,6 +25,14 @@ ":::" ] }, + { + "cell_type": "markdown", + "id": "f466494a", + "metadata": {}, + "source": [ + "## Metrics" + ] + }, { "cell_type": "markdown", "id": "e54b5e01", @@ -335,7 +346,7 @@ "id": "b133aff0", "metadata": {}, "source": [ - "# Test Data Generation" + "## Test Data Generation" ] }, { @@ -343,7 +354,7 @@ "id": "4c7192f2", "metadata": {}, "source": [ - "Get the documents you for which you are going to create the test set" + "Load the documents using desired dataloader." ] }, { @@ -379,11 +390,11 @@ "metadata": {}, "outputs": [], "source": [ - "from ragas.llms import BaseRagasLLM, LangchainLLMWrapper\n", - "from ragas.embeddings.base import BaseRagasEmbeddings, LangchainEmbeddingsWrapper\n", + "from ragas.llms import LangchainLLMWrapper\n", + "from ragas.embeddings.base import LangchainEmbeddingsWrapper\n", "\n", "bedrock_model = LangchainLLMWrapper(bedrock_model)\n", - "bedrock_embeddings = LangchainEmbeddingsWrapper(bedrock_embeddings)\n" + "bedrock_embeddings = LangchainEmbeddingsWrapper(bedrock_embeddings)" ] }, { @@ -391,7 +402,7 @@ "id": "d7d17468", "metadata": {}, "source": [ - "Next Step is to create chunks from the documents and store the chunks InMemoryDocumentStore" + "Next Step is to create chunks from the documents and store the chunks `InMemoryDocumentStore`" ] }, { @@ -406,11 +417,12 @@ "from ragas.testset.docstore import InMemoryDocumentStore\n", "\n", "splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=100)\n", - "keyphrase_extractor = KeyphraseExtractor(llm=generator_llm)\n", + "keyphrase_extractor = KeyphraseExtractor(llm=bedrock_model)\n", + "\n", "docstore = InMemoryDocumentStore(\n", - "splitter=splitter,\n", - "embeddings=bedrock_embeddings,\n", - "extractor=keyphrase_extractor,\n", + " splitter=splitter,\n", + " embeddings=bedrock_embeddings,\n", + " extractor=keyphrase_extractor,\n", ")" ] }, @@ -419,7 +431,7 @@ "id": "7773f4b5", "metadata": {}, "source": [ - "Customising the the testset generation" + "Initializing `TestsetGenerator` with required arguments and generating data" ] }, { @@ -433,15 +445,18 @@ "from ragas.testset.evolutions import simple, reasoning, multi_context\n", "\n", "test_generator = TestsetGenerator(\n", - " generator_llm=generator_llm,\n", - " critic_llm=critic_llm,\n", - " embeddings=bedrock_embeddings,\n", - " docstore=docstore,\n", + " generator_llm=bedrock_model,\n", + " critic_llm=bedrock_model,\n", + " embeddings=bedrock_embeddings,\n", + " docstore=docstore,\n", ")\n", "\n", + "distributions = {simple: 0.5, reasoning: 0.25, multi_context: 0.25}\n", + "\n", "# use generator.generate_with_llamaindex_docs if you use llama-index as document loader\n", - "testset = test_generator.generate_with_langchain_docs(documents=documents,\n", - " test_size=10,distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})" + "testset = test_generator.generate_with_langchain_docs(\n", + " documents=documents, test_size=10, distributions=distributions\n", + ")" ] }, {