diff --git a/docs/howtos/integrations/llamaindex.ipynb b/docs/howtos/integrations/llamaindex.ipynb index b4ae3034b..eac1bf351 100644 --- a/docs/howtos/integrations/llamaindex.ipynb +++ b/docs/howtos/integrations/llamaindex.ipynb @@ -1,71 +1,301 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "id": "fd8d6ad7", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, { "cell_type": "markdown", "id": "d2451aff", "metadata": {}, "source": [ - "# Evaluating LlamaIndex\n", + "# LlamaIndex\n", "\n", - "[LlamaIndex](https://github.com/jerryjliu/llama_index) is a data framework for LLM applications to ingest, structure, and access private or domain-specific data. Makes it super easy to connect LLMs with your own data. But in order to figure out the best configuration for llamaIndex and your data you need a object measure of the performance. This is where ragas comes in. Ragas will help you evaluate your `QueryEngine` and gives you the confidence to tweak the configuration to get hightest score.\n", + "[LlamaIndex](https://github.com/run-llama/llama_index) is a data framework for LLM applications to ingest, structure, and access private or domain-specific data. Makes it super easy to connect LLMs with your own data. But in order to figure out the best configuration for llamaIndex and your data you need a object measure of the performance. This is where ragas comes in. Ragas will help you evaluate your `QueryEngine` and gives you the confidence to tweak the configuration to get hightest score.\n", "\n", "This guide assumes you have familarity with the LlamaIndex framework." ] }, + { + "cell_type": "markdown", + "id": "ea0553ea", + "metadata": {}, + "source": [ + "## Building the Testset\n", + "\n", + "You will need an testset to evaluate your `QueryEngine` against. You can either build one yourself or use the [Testset Generator Module](../../getstarted/testset_generation.md) in Ragas to get started with a small synthetic one.\n", + "\n", + "Let's see how that works with Llamaindex" + ] + }, { "cell_type": "code", - "execution_count": 1, - "id": "37a9b094", + "execution_count": 3, + "id": "096e5af0", "metadata": {}, "outputs": [], "source": [ - "# attach to the same event-loop\n", - "import nest_asyncio\n", + "# load the documents\n", + "from llama_index.core import SimpleDirectoryReader\n", "\n", - "nest_asyncio.apply()" + "documents = SimpleDirectoryReader(\"./nyc_wikipedia\").load_data()" ] }, { "cell_type": "markdown", - "id": "abaf6538", + "id": "012d81a1", "metadata": {}, "source": [ - "## Building the `VectorStoreIndex` and `QueryEngine`\n", - "\n", - "To start lets build an `VectorStoreIndex` over the New York Citie's [wikipedia page](https://en.wikipedia.org/wiki/New_York_City) as an example and use ragas to evaluate it." + "Now lets init the `TestsetGenerator` object with the corresponding generator and critic llms" ] }, { "cell_type": "code", - "execution_count": 2, - "id": "37c4a1cb", + "execution_count": 4, + "id": "e2107b62", "metadata": {}, "outputs": [], "source": [ - "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", + "from ragas.testset.generator import TestsetGenerator\n", + "from ragas.testset.evolutions import simple, reasoning, multi_context\n", + "from llama_index.llms.openai import OpenAI\n", + "from llama_index.embeddings.openai import OpenAIEmbedding\n", + "\n", + "# generator with openai models\n", + "generator_llm = OpenAI(model=\"gpt-3.5-turbo-16k\")\n", + "critic_llm = OpenAI(model=\"gpt-4\")\n", + "embeddings = OpenAIEmbedding()\n", "\n", - "import pandas as pd" + "generator = TestsetGenerator.from_llama_index(\n", + " generator_llm=generator_llm,\n", + " critic_llm=critic_llm,\n", + " embeddings=embeddings,\n", + ")" ] }, { "cell_type": "markdown", - "id": "9019540b", + "id": "f8d8d31c", "metadata": {}, "source": [ - "load the data, build the `VectorStoreIndex` and create the `QueryEngine`." + "Now you are all set to generate the dataset" ] }, { "cell_type": "code", - "execution_count": 3, - "id": "85e75230", + "execution_count": 5, + "id": "fe03839d", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "e555d31a1f8f494a9533605c03ec4140", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "embedding nodes: 0%| | 0/54 [00:00\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
questioncontextsground_truthevolution_typemetadataepisode_done
0What cultural movement began in New York City ...[ Others cite the end of the crack epidemic an...The Harlem Renaissancesimple[{'file_path': '/home/jjmachan/jjmachan/explod...True
1What is the significance of New York City's tr...[ consisting of 51 council members whose distr...New York City's transportation system is both ...simple[{'file_path': '/home/jjmachan/jjmachan/explod...True
2What factors led to the creation of Central Pa...[ next ten years with British troops stationed...Public-minded members of the contemporaneous b...reasoning[{'file_path': '/home/jjmachan/jjmachan/explod...True
3What was the impact of the Treaty of Breda on ...[ British raids. In 1626, the Dutch colonial D...The Treaty of Breda confirmed the transfer of ...multi_context[{'file_path': '/home/jjmachan/jjmachan/explod...True
4What role did New York play in the American Re...[ British raids. In 1626, the Dutch colonial D...New York played a significant role in the Amer...simple[{'file_path': '/home/jjmachan/jjmachan/explod...True
\n", + "" + ], + "text/plain": [ + " question \\\n", + "0 What cultural movement began in New York City ... \n", + "1 What is the significance of New York City's tr... \n", + "2 What factors led to the creation of Central Pa... \n", + "3 What was the impact of the Treaty of Breda on ... \n", + "4 What role did New York play in the American Re... \n", + "\n", + " contexts \\\n", + "0 [ Others cite the end of the crack epidemic an... \n", + "1 [ consisting of 51 council members whose distr... \n", + "2 [ next ten years with British troops stationed... \n", + "3 [ British raids. In 1626, the Dutch colonial D... \n", + "4 [ British raids. In 1626, the Dutch colonial D... \n", + "\n", + " ground_truth evolution_type \\\n", + "0 The Harlem Renaissance simple \n", + "1 New York City's transportation system is both ... simple \n", + "2 Public-minded members of the contemporaneous b... reasoning \n", + "3 The Treaty of Breda confirmed the transfer of ... multi_context \n", + "4 New York played a significant role in the Amer... simple \n", + "\n", + " metadata episode_done \n", + "0 [{'file_path': '/home/jjmachan/jjmachan/explod... True \n", + "1 [{'file_path': '/home/jjmachan/jjmachan/explod... True \n", + "2 [{'file_path': '/home/jjmachan/jjmachan/explod... True \n", + "3 [{'file_path': '/home/jjmachan/jjmachan/explod... True \n", + "4 [{'file_path': '/home/jjmachan/jjmachan/explod... True " + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = testset.to_pandas()\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "6107ea8b", + "metadata": {}, + "source": [ + "with a test dataset to test our `QueryEngine` lets now build one and evaluate it." + ] + }, + { + "cell_type": "markdown", + "id": "abaf6538", + "metadata": {}, + "source": [ + "## Building the `QueryEngine`\n", + "\n", + "To start lets build an `VectorStoreIndex` over the New York Citie's [wikipedia page](https://en.wikipedia.org/wiki/New_York_City) as an example and use ragas to evaluate it. \n", + "\n", + "Since we already loaded the dataset into `documents` lets use that." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "37c4a1cb", "metadata": {}, "outputs": [], "source": [ - "documents = SimpleDirectoryReader(\"./nyc_wikipedia/\").load_data()\n", - "vector_index = VectorStoreIndex.from_documents(\n", - " documents, service_context=ServiceContext.from_defaults(chunk_size=512)\n", - ")\n", + "# build query engine\n", + "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", + "from llama_index.core.settings import Settings\n", + "\n", + "vector_index = VectorStoreIndex.from_documents(documents)\n", "\n", "query_engine = vector_index.as_query_engine()" ] @@ -75,12 +305,35 @@ "id": "13d676c0", "metadata": {}, "source": [ - "Lets try an sample question to see if it is working" + "Lets try an sample question from the generated testset to see if it is working" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 9, + "id": "895d95b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'What cultural movement began in New York City and established the African-American literary canon in the United States?'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# convert it to pandas dataset\n", + "df = testset.to_pandas()\n", + "df[\"question\"][0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, "id": "a25026c2", "metadata": {}, "outputs": [ @@ -88,13 +341,12 @@ "name": "stdout", "output_type": "stream", "text": [ - "\n", - "New York City was named in honor of the Duke of York, who would become King James II of England. In 1664, King Charles II appointed the Duke as proprietor of the former territory of New Netherland, including the city of New Amsterdam, when England seized it from Dutch control. The city was then renamed New York in his honor.\n" + "The Harlem Renaissance was the cultural movement that began in New York City and established the African-American literary canon in the United States.\n" ] } ], "source": [ - "response_vector = query_engine.query(\"How did New York City get its name?\")\n", + "response_vector = query_engine.query(df[\"question\"][0])\n", "\n", "print(response_vector)" ] @@ -104,7 +356,7 @@ "id": "b678501e", "metadata": {}, "source": [ - "## Evaluating with Ragas\n", + "## Evaluating the `QueryEngine`\n", "\n", "Now that we have a `QueryEngine` for the `VectorStoreIndex` we can use the llama_index integration Ragas has to evaluate it. \n", "\n", @@ -123,32 +375,6 @@ "first lets generate the questions. Ideally you should use that you see in production so that the distribution of question with which we evaluate matches the distribution of questions seen in production. This ensures that the scores reflect the performance seen in production but to start off we'll be using a few example question." ] }, - { - "cell_type": "code", - "execution_count": 5, - "id": "751dc988", - "metadata": {}, - "outputs": [], - "source": [ - "eval_questions = [\n", - " \"What is the population of New York City as of 2020?\",\n", - " \"Which borough of New York City has the highest population?\",\n", - " \"What is the economic significance of New York City?\",\n", - " \"How did New York City get its name?\",\n", - " \"What is the significance of the Statue of Liberty in New York City?\",\n", - "]\n", - "\n", - "eval_answers = [\n", - " \"8,804,000\", # incorrect answer\n", - " \"Queens\", # incorrect answer\n", - " \"New York City's economic significance is vast, as it serves as the global financial capital, housing Wall Street and major financial institutions. Its diverse economy spans technology, media, healthcare, education, and more, making it resilient to economic fluctuations. NYC is a hub for international business, attracting global companies, and boasts a large, skilled labor force. Its real estate market, tourism, cultural industries, and educational institutions further fuel its economic prowess. The city's transportation network and global influence amplify its impact on the world stage, solidifying its status as a vital economic player and cultural epicenter.\",\n", - " \"New York City got its name when it came under British control in 1664. King Charles II of England granted the lands to his brother, the Duke of York, who named the city New York in his own honor.\",\n", - " \"The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It greeted millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries, representing hope and freedom for those seeking a better life. It has since become an iconic landmark and a global symbol of cultural diversity and freedom.\",\n", - "]\n", - "\n", - "eval_answers = [[a] for a in eval_answers]" - ] - }, { "cell_type": "markdown", "id": "843bddb8", @@ -159,7 +385,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "9875132a", "metadata": {}, "outputs": [], @@ -181,6 +407,66 @@ "]" ] }, + { + "cell_type": "markdown", + "id": "8230a307", + "metadata": {}, + "source": [ + "now lets init the evaluator model" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f8049166", + "metadata": {}, + "outputs": [], + "source": [ + "from llama_index.llms.openai import OpenAI\n", + "from llama_index.embeddings.openai import OpenAIEmbedding\n", + "\n", + "# using GPT 3.5, use GPT 4 / 4-turbo for better accuracy\n", + "evaluator_llm = OpenAI(model=\"gpt-3.5-turbo\")" + ] + }, + { + "cell_type": "markdown", + "id": "605e5d96", + "metadata": {}, + "source": [ + "the `evaluate()` function expects a dict of \"question\" and \"ground_truth\" for metrics. You can easily convert the `testset` to that format" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "4b2a81ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['The Harlem Renaissance',\n", + " \"New York City's transportation system is both complex and extensive, with a comprehensive mass transit system that accounts for one in every three users of mass transit in the United States. The New York City Subway system is the largest rapid transit system in the world, and the city has a high usage of public transport, with a majority of households not owning a car. Due to their reliance on mass transit, New Yorkers spend less of their household income on transportation compared to the national average.\",\n", + " 'Public-minded members of the contemporaneous business elite lobbied for the establishment of Central Park',\n", + " 'The Treaty of Breda confirmed the transfer of New Amsterdam to English control and the renaming of the settlement as New York. The Duke of York, who would later become King James II and VII, played a significant role in the naming of New York City.',\n", + " 'New York played a significant role in the American Revolution. The Stamp Act Congress met in New York in October 1765, and the city became a center for the Sons of Liberty organization. Skirmishes and battles took place in and around New York, including the Battle of Long Island and the Battle of Saratoga. The city was occupied by British forces for much of the war, but it was eventually liberated by American troops in 1783.']" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# convert to HF dataset\n", + "ds = testset.to_dataset()\n", + "\n", + "ds_dict = ds.to_dict()\n", + "ds_dict[\"question\"]\n", + "ds_dict[\"ground_truth\"]" + ] + }, { "cell_type": "markdown", "id": "8ae4a2d1", @@ -191,90 +477,65 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 24, "id": "05633cc2", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [faithfulness]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|█████████████████████████████████████████████████████████████| 1/1 [00:51<00:00, 51.40s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [answer_relevancy]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|█████████████████████████████████████████████████████████████| 1/1 [00:09<00:00, 9.64s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [context_precision]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|█████████████████████████████████████████████████████████████| 1/1 [00:41<00:00, 41.21s/it]\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "evaluating with [context_recall]\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "100%|█████████████████████████████████████████████████████████████| 1/1 [00:34<00:00, 34.97s/it]\n" - ] + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "261183e30af84047a6d8c18fdbef1d72", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Running Query Engine: 0%| | 0/5 [00:00\n", " \n", " 0\n", - " What is the population of New York City as of ...\n", - " [Aeromedical Staging Squadron, and a military ...\n", - " \\nThe population of New York City as of 2020 i...\n", - " [8,804,000]\n", - " 1.0\n", - " 0.999999\n", - " 0.320000\n", + " What cultural movement began in New York City ...\n", + " [=== 19th century ===\\n\\nOver the course of th...\n", + " The Harlem Renaissance of literary and cultura...\n", + " The Harlem Renaissance\n", + " 0.5\n", + " 0.907646\n", + " 0.5\n", " 1.0\n", " 0\n", " \n", " \n", " 1\n", - " Which borough of New York City has the highest...\n", - " [co-extensive with New York County, the boroug...\n", - " \\nThe borough of Manhattan has the highest pop...\n", - " [Queens]\n", - " 0.0\n", - " 0.998524\n", - " 0.038462\n", - " 0.9\n", + " What is the significance of New York City's tr...\n", + " [== Transportation ==\\n\\nNew York City's compr...\n", + " New York City's transportation system is signi...\n", + " New York City's transportation system is both ...\n", + " 1.0\n", + " 0.986921\n", + " 1.0\n", + " 1.0\n", " 0\n", " \n", " \n", " 2\n", - " What is the economic significance of New York ...\n", - " [health care and life sciences, medical techno...\n", - " \\nNew York City is a major global economic cen...\n", - " [New York City's economic significance is vast...\n", + " What factors led to the creation of Central Pa...\n", + " [=== 19th century ===\\n\\nOver the course of th...\n", + " Prominent American literary figures lived in N...\n", + " Public-minded members of the contemporaneous b...\n", + " 1.0\n", + " 0.805014\n", " 1.0\n", - " 0.904272\n", - " 0.423077\n", " 1.0\n", " 0\n", " \n", " \n", " 3\n", - " How did New York City get its name?\n", - " [a US$1 billion research and education center ...\n", - " \\nNew York City was named in honor of the Duke...\n", - " [New York City got its name when it came under...\n", + " What was the impact of the Treaty of Breda on ...\n", + " [=== Dutch rule ===\\n\\nA permanent European pr...\n", + " The Treaty of Breda resulted in the transfer o...\n", + " The Treaty of Breda confirmed the transfer of ...\n", + " 1.0\n", + " 0.860931\n", " 1.0\n", - " 0.929719\n", - " 0.333333\n", " 1.0\n", " 0\n", " \n", " \n", " 4\n", - " What is the significance of the Statue of Libe...\n", - " [(stylized I ❤ NY) is both a logo and a song t...\n", - " \\nThe Statue of Liberty is a symbol of the Uni...\n", - " [The Statue of Liberty in New York City holds ...\n", - " 0.5\n", - " 0.942676\n", - " 0.052632\n", + " What role did New York play in the American Re...\n", + " [=== Province of New York and slavery ===\\n\\nI...\n", + " New York served as a significant location duri...\n", + " New York played a significant role in the Amer...\n", + " 1.0\n", + " 0.935846\n", + " 1.0\n", " 1.0\n", " 0\n", " \n", @@ -404,42 +665,42 @@ ], "text/plain": [ " question \\\n", - "0 What is the population of New York City as of ... \n", - "1 Which borough of New York City has the highest... \n", - "2 What is the economic significance of New York ... \n", - "3 How did New York City get its name? \n", - "4 What is the significance of the Statue of Libe... \n", + "0 What cultural movement began in New York City ... \n", + "1 What is the significance of New York City's tr... \n", + "2 What factors led to the creation of Central Pa... \n", + "3 What was the impact of the Treaty of Breda on ... \n", + "4 What role did New York play in the American Re... \n", "\n", " contexts \\\n", - "0 [Aeromedical Staging Squadron, and a military ... \n", - "1 [co-extensive with New York County, the boroug... \n", - "2 [health care and life sciences, medical techno... \n", - "3 [a US$1 billion research and education center ... \n", - "4 [(stylized I ❤ NY) is both a logo and a song t... \n", + "0 [=== 19th century ===\\n\\nOver the course of th... \n", + "1 [== Transportation ==\\n\\nNew York City's compr... \n", + "2 [=== 19th century ===\\n\\nOver the course of th... \n", + "3 [=== Dutch rule ===\\n\\nA permanent European pr... \n", + "4 [=== Province of New York and slavery ===\\n\\nI... \n", "\n", " answer \\\n", - "0 \\nThe population of New York City as of 2020 i... \n", - "1 \\nThe borough of Manhattan has the highest pop... \n", - "2 \\nNew York City is a major global economic cen... \n", - "3 \\nNew York City was named in honor of the Duke... \n", - "4 \\nThe Statue of Liberty is a symbol of the Uni... \n", + "0 The Harlem Renaissance of literary and cultura... \n", + "1 New York City's transportation system is signi... \n", + "2 Prominent American literary figures lived in N... \n", + "3 The Treaty of Breda resulted in the transfer o... \n", + "4 New York served as a significant location duri... \n", "\n", - " ground_truth faithfulness \\\n", - "0 [8,804,000] 1.0 \n", - "1 [Queens] 0.0 \n", - "2 [New York City's economic significance is vast... 1.0 \n", - "3 [New York City got its name when it came under... 1.0 \n", - "4 [The Statue of Liberty in New York City holds ... 0.5 \n", + " ground_truth faithfulness \\\n", + "0 The Harlem Renaissance 0.5 \n", + "1 New York City's transportation system is both ... 1.0 \n", + "2 Public-minded members of the contemporaneous b... 1.0 \n", + "3 The Treaty of Breda confirmed the transfer of ... 1.0 \n", + "4 New York played a significant role in the Amer... 1.0 \n", "\n", " answer_relevancy context_precision context_recall harmfulness \n", - "0 0.999999 0.320000 1.0 0 \n", - "1 0.998524 0.038462 0.9 0 \n", - "2 0.904272 0.423077 1.0 0 \n", - "3 0.929719 0.333333 1.0 0 \n", - "4 0.942676 0.052632 1.0 0 " + "0 0.907646 0.5 1.0 0 \n", + "1 0.986921 1.0 1.0 0 \n", + "2 0.805014 1.0 1.0 0 \n", + "3 0.860931 1.0 1.0 0 \n", + "4 0.935846 1.0 1.0 0 " ] }, - "execution_count": 13, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } diff --git a/src/ragas/embeddings/__init__.py b/src/ragas/embeddings/__init__.py index e78a95b99..cc7c1215d 100644 --- a/src/ragas/embeddings/__init__.py +++ b/src/ragas/embeddings/__init__.py @@ -2,6 +2,7 @@ BaseRagasEmbeddings, HuggingfaceEmbeddings, LangchainEmbeddingsWrapper, + LlamaIndexEmbeddingsWrapper, embedding_factory, ) @@ -9,5 +10,6 @@ "HuggingfaceEmbeddings", "BaseRagasEmbeddings", "LangchainEmbeddingsWrapper", + "LlamaIndexEmbeddingsWrapper", "embedding_factory", ] diff --git a/src/ragas/embeddings/base.py b/src/ragas/embeddings/base.py index dded0fa01..4f5c3a533 100644 --- a/src/ragas/embeddings/base.py +++ b/src/ragas/embeddings/base.py @@ -13,6 +13,9 @@ from ragas.run_config import RunConfig, add_async_retry, add_retry +if t.TYPE_CHECKING: + from llama_index.core.base.embeddings.base import BaseEmbedding + DEFAULT_MODEL_NAME = "BAAI/bge-small-en-v1.5" @@ -153,6 +156,28 @@ def predict(self, texts: List[List[str]]) -> List[List[float]]: return predictions.tolist() +class LlamaIndexEmbeddingsWrapper(BaseRagasEmbeddings): + def __init__( + self, embeddings: BaseEmbedding, run_config: t.Optional[RunConfig] = None + ): + self.embeddings = embeddings + if run_config is None: + run_config = RunConfig() + self.set_run_config(run_config) + + def embed_query(self, text: str) -> t.List[float]: + return self.embeddings.get_query_embedding(text) + + def embed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]: + return self.embeddings.get_text_embedding_batch(texts) + + async def aembed_query(self, text: str) -> t.List[float]: + return await self.embeddings.aget_query_embedding(text) + + async def aembed_documents(self, texts: t.List[str]) -> t.List[t.List[float]]: + return await self.embeddings.aget_text_embedding_batch(texts) + + def embedding_factory( model: str = "text-embedding-ada-002", run_config: t.Optional[RunConfig] = None ) -> BaseRagasEmbeddings: diff --git a/src/ragas/executor.py b/src/ragas/executor.py index 70c6e0a00..f5266a267 100644 --- a/src/ragas/executor.py +++ b/src/ragas/executor.py @@ -17,7 +17,6 @@ def runner_exception_hook(args: threading.ExceptHookArgs): - print(args) raise args.exc_type diff --git a/src/ragas/integrations/llama_index.py b/src/ragas/integrations/llama_index.py new file mode 100644 index 000000000..c66a822ea --- /dev/null +++ b/src/ragas/integrations/llama_index.py @@ -0,0 +1,113 @@ +from __future__ import annotations + +import logging +import typing as t +from copy import copy +from uuid import uuid4 + +from datasets import Dataset + +from ragas.embeddings import LlamaIndexEmbeddingsWrapper +from ragas.evaluation import evaluate as ragas_evaluate +from ragas.exceptions import ExceptionInRunner +from ragas.executor import Executor +from ragas.llms import LlamaIndexLLMWrapper +from ragas.validation import EVALMODE_TO_COLUMNS, validate_evaluation_modes + +if t.TYPE_CHECKING: + from llama_index.core.base.embeddings.base import ( + BaseEmbedding as LlamaIndexEmbeddings, + ) + from llama_index.core.base.llms.base import BaseLLM as LlamaindexLLM + + from ragas.evaluation import Result + from ragas.metrics.base import Metric + + +logger = logging.getLogger(__name__) + + +def validate_dataset(dataset: dict, metrics: list[Metric]): + # change EVALMODE_TO_COLUMNS for usecase with no contexts and answer + evalmod_to_columns_llamaindex = copy(EVALMODE_TO_COLUMNS) + for mode in EVALMODE_TO_COLUMNS: + if "answer" in EVALMODE_TO_COLUMNS[mode]: + EVALMODE_TO_COLUMNS[mode].remove("answer") + if "contexts" in EVALMODE_TO_COLUMNS[mode]: + EVALMODE_TO_COLUMNS[mode].remove("contexts") + + hf_dataset = Dataset.from_dict(dataset) + validate_evaluation_modes(hf_dataset, metrics, evalmod_to_columns_llamaindex) + + +def evaluate( + query_engine, + dataset: dict, + metrics: list[Metric], + llm: t.Optional[LlamaindexLLM] = None, + embeddings: t.Optional[LlamaIndexEmbeddings] = None, + raise_exceptions: bool = True, + column_map: t.Optional[t.Dict[str, str]] = None, +) -> Result: + column_map = column_map or {} + + # wrap llms and embeddings + li_llm = None + if llm is not None: + li_llm = LlamaIndexLLMWrapper(llm) + li_embeddings = None + if embeddings is not None: + li_embeddings = LlamaIndexEmbeddingsWrapper(embeddings) + + # validate and transform dataset + if dataset is None: + raise ValueError("Provide dataset!") + + exec = Executor( + desc="Running Query Engine", + keep_progress_bar=True, + raise_exceptions=raise_exceptions, + ) + + # get query + queries = dataset["question"] + for i, q in enumerate(queries): + exec.submit(query_engine.aquery, q, name=f"query-{i}") + + answers: t.List[str] = [] + contexts: t.List[t.List[str]] = [] + try: + results = exec.results() + if results == []: + raise ExceptionInRunner() + except Exception as e: + raise e + else: + for r in results: + answers.append(r.response) + contexts.append([n.node.text for n in r.source_nodes]) + + # create HF dataset + hf_dataset = Dataset.from_dict( + { + "question": queries, + "contexts": contexts, + "answer": answers, + } + ) + if "ground_truth" in dataset: + hf_dataset = hf_dataset.add_column( + name="ground_truth", + column=dataset["ground_truth"], + new_fingerprint=str(uuid4()), + ) + + results = ragas_evaluate( + dataset=hf_dataset, + metrics=metrics, + llm=li_llm, + embeddings=li_embeddings, + raise_exceptions=raise_exceptions, + ) + + return results diff --git a/src/ragas/llms/__init__.py b/src/ragas/llms/__init__.py index dc1e37fb5..27ca2521b 100644 --- a/src/ragas/llms/__init__.py +++ b/src/ragas/llms/__init__.py @@ -1,7 +1,13 @@ -from ragas.llms.base import BaseRagasLLM, LangchainLLMWrapper, llm_factory +from ragas.llms.base import ( + BaseRagasLLM, + LangchainLLMWrapper, + LlamaIndexLLMWrapper, + llm_factory, +) __all__ = [ "BaseRagasLLM", "LangchainLLMWrapper", + "LlamaIndexLLMWrapper", "llm_factory", ] diff --git a/src/ragas/llms/base.py b/src/ragas/llms/base.py index 75e8b6615..c62aa2341 100644 --- a/src/ragas/llms/base.py +++ b/src/ragas/llms/base.py @@ -10,7 +10,7 @@ from langchain_community.chat_models.vertexai import ChatVertexAI from langchain_community.llms import VertexAI from langchain_core.language_models import BaseLanguageModel -from langchain_core.outputs import LLMResult +from langchain_core.outputs import Generation, LLMResult from langchain_openai.chat_models import AzureChatOpenAI, ChatOpenAI from langchain_openai.llms import AzureOpenAI, OpenAI from langchain_openai.llms.base import BaseOpenAI @@ -19,6 +19,7 @@ if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks + from llama_index.core.base.llms.base import BaseLLM from ragas.llms.prompt import PromptValue @@ -203,6 +204,78 @@ def set_run_config(self, run_config: RunConfig): self.run_config.exception_types = RateLimitError +class LlamaIndexLLMWrapper(BaseRagasLLM): + """ + A Adaptor for LlamaIndex LLMs + """ + + def __init__( + self, + llm: BaseLLM, + run_config: t.Optional[RunConfig] = None, + ): + self.llm = llm + + self._signature = "" + if type(self.llm).__name__.lower() == "bedrock": + self._signature = "bedrock" + if run_config is None: + run_config = RunConfig() + self.set_run_config(run_config) + + def check_args( + self, + n: int, + temperature: float, + stop: t.Optional[t.List[str]], + callbacks: Callbacks, + ) -> dict[str, t.Any]: + if n != 1: + logger.warning("n values greater than 1 not support for LlamaIndex LLMs") + if temperature != 1e-8: + logger.info("temperature kwarg passed to LlamaIndex LLM") + if stop is not None: + logger.info("stop kwarg passed to LlamaIndex LLM") + if callbacks is not None: + logger.info( + "callbacks not supported for LlamaIndex LLMs, ignoring callbacks" + ) + if self._signature == "bedrock": + return {"temperature": temperature} + else: + return { + "n": n, + "temperature": temperature, + "stop": stop, + } + + def generate_text( + self, + prompt: PromptValue, + n: int = 1, + temperature: float = 1e-8, + stop: t.Optional[t.List[str]] = None, + callbacks: Callbacks = None, + ) -> LLMResult: + kwargs = self.check_args(n, temperature, stop, callbacks) + li_response = self.llm.complete(prompt.to_string(), **kwargs) + + return LLMResult(generations=[[Generation(text=li_response.text)]]) + + async def agenerate_text( + self, + prompt: PromptValue, + n: int = 1, + temperature: float = 1e-8, + stop: t.Optional[t.List[str]] = None, + callbacks: Callbacks = None, + ) -> LLMResult: + kwargs = self.check_args(n, temperature, stop, callbacks) + li_response = await self.llm.acomplete(prompt.to_string(), **kwargs) + + return LLMResult(generations=[[Generation(text=li_response.text)]]) + + def llm_factory( model: str = "gpt-3.5-turbo", run_config: t.Optional[RunConfig] = None ) -> BaseRagasLLM: diff --git a/src/ragas/llms/prompt.py b/src/ragas/llms/prompt.py index d65619b84..d56616ba9 100644 --- a/src/ragas/llms/prompt.py +++ b/src/ragas/llms/prompt.py @@ -44,12 +44,12 @@ class Prompt(BaseModel): language (str): The language of the prompt (default: "english"). """ - name: str + name: str = "" instruction: str output_format_instruction: str = "" examples: t.List[Example] = [] - input_keys: t.List[str] - output_key: str + input_keys: t.List[str] = [""] + output_key: str = "" output_type: t.Literal["json", "str"] = "json" language: str = "english" diff --git a/src/ragas/metrics/_context_relevancy.py b/src/ragas/metrics/_context_relevancy.py index efedc12e8..62e07c84a 100644 --- a/src/ragas/metrics/_context_relevancy.py +++ b/src/ragas/metrics/_context_relevancy.py @@ -60,7 +60,6 @@ def _compute_score(self, response: str, row: t.Dict) -> float: if response.lower() != "insufficient information." else [] ) - # print(len(indices)) if len(context_sents) == 0: return 0 else: diff --git a/src/ragas/metrics/base.py b/src/ragas/metrics/base.py index 936d5ebfc..4e14c3901 100644 --- a/src/ragas/metrics/base.py +++ b/src/ragas/metrics/base.py @@ -61,11 +61,13 @@ def get_required_columns( class Metric(ABC): @property @abstractmethod - def name(self) -> str: ... + def name(self) -> str: + ... @property @abstractmethod - def evaluation_mode(self) -> EvaluationMode: ... + def evaluation_mode(self) -> EvaluationMode: + ... @abstractmethod def init(self, run_config: RunConfig): @@ -127,9 +129,8 @@ async def ascore( return score @abstractmethod - async def _ascore( - self, row: t.Dict, callbacks: Callbacks, is_async: bool - ) -> float: ... + async def _ascore(self, row: t.Dict, callbacks: Callbacks, is_async: bool) -> float: + ... @dataclass diff --git a/src/ragas/testset/generator.py b/src/ragas/testset/generator.py index 68ec43f03..65c97dbd8 100644 --- a/src/ragas/testset/generator.py +++ b/src/ragas/testset/generator.py @@ -7,16 +7,18 @@ import pandas as pd from datasets import Dataset -from langchain_core.embeddings import Embeddings -from langchain_core.language_models import BaseLanguageModel from langchain_openai.chat_models import ChatOpenAI from langchain_openai.embeddings import OpenAIEmbeddings from ragas._analytics import TestsetGenerationEvent, track -from ragas.embeddings.base import BaseRagasEmbeddings, LangchainEmbeddingsWrapper +from ragas.embeddings.base import ( + BaseRagasEmbeddings, + LangchainEmbeddingsWrapper, + LlamaIndexEmbeddingsWrapper, +) from ragas.exceptions import ExceptionInRunner from ragas.executor import Executor -from ragas.llms import BaseRagasLLM, LangchainLLMWrapper +from ragas.llms import BaseRagasLLM, LangchainLLMWrapper, LlamaIndexLLMWrapper from ragas.run_config import RunConfig from ragas.testset.docstore import Document, DocumentStore, InMemoryDocumentStore from ragas.testset.evolutions import ( @@ -34,6 +36,12 @@ if t.TYPE_CHECKING: from langchain_core.documents import Document as LCDocument + from langchain_core.embeddings import Embeddings as LangchainEmbeddings + from langchain_core.language_models import BaseLanguageModel as LangchainLLM + from llama_index.core.base.embeddings.base import ( + BaseEmbedding as LlamaIndexEmbeddings, + ) + from llama_index.core.base.llms.base import BaseLLM as LlamaindexLLM from llama_index.core.schema import Document as LlamaindexDocument logger = logging.getLogger(__name__) @@ -75,9 +83,9 @@ class TestsetGenerator: @classmethod def from_langchain( cls, - generator_llm: BaseLanguageModel, - critic_llm: BaseLanguageModel, - embeddings: Embeddings, + generator_llm: LangchainLLM, + critic_llm: LangchainLLM, + embeddings: LangchainEmbeddings, docstore: t.Optional[DocumentStore] = None, run_config: t.Optional[RunConfig] = None, chunk_size: int = 1024, @@ -104,6 +112,36 @@ def from_langchain( docstore=docstore, ) + @classmethod + def from_llama_index( + cls, + generator_llm: LlamaindexLLM, + critic_llm: LlamaindexLLM, + embeddings: LlamaIndexEmbeddings, + docstore: t.Optional[DocumentStore] = None, + run_config: t.Optional[RunConfig] = None, + ) -> "TestsetGenerator": + generator_llm_model = LlamaIndexLLMWrapper(generator_llm) + critic_llm_model = LlamaIndexLLMWrapper(critic_llm) + embeddings_model = LlamaIndexEmbeddingsWrapper(embeddings) + keyphrase_extractor = KeyphraseExtractor(llm=generator_llm_model) + if docstore is None: + from langchain.text_splitter import TokenTextSplitter + + splitter = TokenTextSplitter(chunk_size=1024, chunk_overlap=0) + docstore = InMemoryDocumentStore( + splitter=splitter, + embeddings=embeddings_model, + extractor=keyphrase_extractor, + run_config=run_config, + ) + return cls( + generator_llm=generator_llm_model, + critic_llm=critic_llm_model, + embeddings=embeddings_model, + docstore=docstore, + ) + @classmethod @deprecated("0.1.4", removal="0.2.0", alternative="from_langchain") def with_openai( @@ -242,7 +280,7 @@ def generate( ] total_evolutions = 0 for evolution, probability in distributions.items(): - for i in sample(range(test_size),round(probability * test_size)): + for i in sample(range(test_size), round(probability * test_size)): exec.submit( evolution.evolve, current_nodes[i], diff --git a/src/ragas/validation.py b/src/ragas/validation.py index f77137a1f..5222162d1 100644 --- a/src/ragas/validation.py +++ b/src/ragas/validation.py @@ -70,7 +70,9 @@ def validate_column_dtypes(ds: Dataset): } -def validate_evaluation_modes(ds: Dataset, metrics: list[Metric]): +def validate_evaluation_modes( + ds: Dataset, metrics: list[Metric], evalmode_to_columns: dict = EVALMODE_TO_COLUMNS +): """ validates the dataset and returns the evaluation type @@ -81,7 +83,7 @@ def validate_evaluation_modes(ds: Dataset, metrics: list[Metric]): 4. (g,a) """ for m in metrics: - required_columns = set(EVALMODE_TO_COLUMNS[m.evaluation_mode]) + required_columns = set(evalmode_to_columns[m.evaluation_mode]) available_columns = set(ds.features.keys()) if not required_columns.issubset(available_columns): extra_msg = "" diff --git a/tests/benchmarks/benchmark_testsetgen.py b/tests/benchmarks/benchmark_testsetgen.py index 30b6e0b91..90414ebdb 100644 --- a/tests/benchmarks/benchmark_testsetgen.py +++ b/tests/benchmarks/benchmark_testsetgen.py @@ -1,7 +1,7 @@ import time from langchain_openai import ChatOpenAI, OpenAIEmbeddings -from llama_index import download_loader +from llama_index.core import download_loader from ragas.testset.evolutions import conditional, multi_context, reasoning, simple from ragas.testset.generator import TestsetGenerator diff --git a/tests/e2e/test_amnesty_in_ci.py b/tests/e2e/test_amnesty_in_ci.py index 96e9757ea..42b44fc20 100644 --- a/tests/e2e/test_amnesty_in_ci.py +++ b/tests/e2e/test_amnesty_in_ci.py @@ -4,9 +4,9 @@ from ragas import evaluate from ragas.metrics import ( answer_relevancy, - faithfulness, - context_recall, context_precision, + context_recall, + faithfulness, ) # loading the V2 dataset