From bd1f4828e4457fc6a6f262009ad5685ef5f50909 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 15:18:59 -0800 Subject: [PATCH 1/9] handle non dict output --- src/ragas/metrics/_context_precision.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ragas/metrics/_context_precision.py b/src/ragas/metrics/_context_precision.py index a5b723066..0a8128f4c 100644 --- a/src/ragas/metrics/_context_precision.py +++ b/src/ragas/metrics/_context_precision.py @@ -93,6 +93,9 @@ def _context_precision_prompt(self, row: t.Dict) -> t.List[PromptValue]: def _calculate_average_precision(self, json_responses: t.List[t.Dict]) -> float: score = np.nan + json_responses = [ + item if isinstance(item, dict) else {} for item in json_responses + ] verdict_list = [ int("1" == resp.get("verdict", "0").strip()) if resp.get("verdict") From 628dce33edaa5b8f82228c97c46ce46c7e4608f5 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 16:37:08 -0800 Subject: [PATCH 2/9] adapt for testset generator --- .../applications/use_prompt_adaptation.md | 73 ++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/docs/howtos/applications/use_prompt_adaptation.md b/docs/howtos/applications/use_prompt_adaptation.md index e087a9fa9..3dceb35f8 100644 --- a/docs/howtos/applications/use_prompt_adaptation.md +++ b/docs/howtos/applications/use_prompt_adaptation.md @@ -1,9 +1,13 @@ # Automatic language adaptation +1. [Metrics](#metrics) +2. [Testset generation](#testset-generation) + +## Metrics This is a tutorial notebook showcasing how to successfully use ragas with data from any given language. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas metrics to a Hindi RAG evaluation dataset. -## Dataset +### Dataset Here I’m using a dataset containing all the relevant columns in Hindi language. ```{code-block} python @@ -75,7 +79,7 @@ Extracted statements: The instruction and key objects are kept unchanged intentionally to allow consuming and processing results in ragas. During inspection, if any of the demonstrations seem faulty translated you can always correct it by going to the saved location. -## Evaluate +### Evaluate ```{code-block} python from ragas import evaluate @@ -85,3 +89,68 @@ ragas_score = evaluate(dataset['train'], metrics=[faithfulness,answer_correctnes You will observe much better performance now with Hindi language as prompts are tailored to it. + +## Testset Generation + +This is a tutorial notebook showcasing how to successfully use ragas test data generation feature to generate data samples of any language using list of documents. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas test set generation to a Hindi to produce a question answer dataset in Hindi. + +### Documents +Here I'm using a corpus of wikipedia articles written in Hindi. You can download the articles by + + +```{code-block} bash +git lfs install +git clone https://huggingface.co/datasets/explodinggradients/hindi-wikipedia +``` + +Now you can load the documents using a document loader, here I am using `DirectoryLoader` + +```{code-block} python +from langchain.document_loaders import DirectoryLoader + +loader = DirectoryLoader("/Users/shahules/Myprojects/ragas/hindi-wikipedia/") +documents = loader.load() + +# add metadata +for document in documents: + document.metadata['file_name'] = document.metadata['source'] + +``` + +### Import and adapt evolutions +Now we can import all the required evolutions and adapt it using `generator.adapt`. This will also adapt all the necessary filters required for the corresponding evolutions. Once adapted, it's better to save and inspect the adapted prompts. + + +```{code-block} python + +from ragas.testset.generator import TestsetGenerator +from ragas.testset.evolutions import simple, reasoning, multi_context,conditional + +# generator with openai models +generator = TestsetGenerator.with_openai() + +# adapt to language +language = "hindi" + +generator.adapt(language, evolutions=[simple, reasoning,conditional,multi_context]) +generator.save(evolutions=[simple, reasoning, multi_context,conditional]) +``` + +### Generate dataset +Once adapted you can use the evolutions and generator just like before to generate data samples for any given distribution. + +```{code-block} python +# determine distribution + +distributions = { + simple:0.4, + reasoning:0.2, + multi_context:0.2, + conditional:0.2 + } + + +# generate testset +testset = generator.generate_with_langchain_docs(documents, 10,distributions,with_debugging_logs=True) +testset.to_pandas() +``` \ No newline at end of file From c7e048ffc4b5f56d7eec3330a16bd896e748c8a2 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 16:40:40 -0800 Subject: [PATCH 3/9] change path --- docs/howtos/applications/use_prompt_adaptation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/howtos/applications/use_prompt_adaptation.md b/docs/howtos/applications/use_prompt_adaptation.md index 3dceb35f8..821c1d20e 100644 --- a/docs/howtos/applications/use_prompt_adaptation.md +++ b/docs/howtos/applications/use_prompt_adaptation.md @@ -108,7 +108,7 @@ Now you can load the documents using a document loader, here I am using `Directo ```{code-block} python from langchain.document_loaders import DirectoryLoader -loader = DirectoryLoader("/Users/shahules/Myprojects/ragas/hindi-wikipedia/") +loader = DirectoryLoader("./hindi-wikipedia/") documents = loader.load() # add metadata From 37ae84fbef98f2e95abba2da8ada1516fd75d799 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 16:47:04 -0800 Subject: [PATCH 4/9] fix linking --- docs/howtos/applications/use_prompt_adaptation.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/howtos/applications/use_prompt_adaptation.md b/docs/howtos/applications/use_prompt_adaptation.md index 821c1d20e..6217c4a31 100644 --- a/docs/howtos/applications/use_prompt_adaptation.md +++ b/docs/howtos/applications/use_prompt_adaptation.md @@ -1,9 +1,9 @@ # Automatic language adaptation -1. [Metrics](#metrics) -2. [Testset generation](#testset-generation) +1. [Metrics](#language-adaptation-for-metrics) +2. [Testset generation](#language-adaptation-for-testset-generation) -## Metrics +## Language Adaptation for Metrics This is a tutorial notebook showcasing how to successfully use ragas with data from any given language. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas metrics to a Hindi RAG evaluation dataset. @@ -90,7 +90,7 @@ ragas_score = evaluate(dataset['train'], metrics=[faithfulness,answer_correctnes You will observe much better performance now with Hindi language as prompts are tailored to it. -## Testset Generation +## Language Adaptation for Testset Generation This is a tutorial notebook showcasing how to successfully use ragas test data generation feature to generate data samples of any language using list of documents. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas test set generation to a Hindi to produce a question answer dataset in Hindi. From 602e448abc914ad0a99084fa06380aebf662adf4 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 17:49:37 -0800 Subject: [PATCH 5/9] change testset generation --- .../howtos/applications/compare_embeddings.md | 19 ++++++++++----- docs/howtos/applications/compare_llms.md | 24 ++++++++++++++----- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/docs/howtos/applications/compare_embeddings.md b/docs/howtos/applications/compare_embeddings.md index 01b7a8d13..eb72f9736 100644 --- a/docs/howtos/applications/compare_embeddings.md +++ b/docs/howtos/applications/compare_embeddings.md @@ -28,18 +28,25 @@ For this tutorial notebook, I am using papers from Semantic Scholar that is rela ```{code-block} python :caption: load documents using llama-hub and create test data from llama_index import download_loader -from ragas.testset import TestsetGenerator +from ragas.testset.evolutions import simple, reasoning, multi_context SemanticScholarReader = download_loader("SemanticScholarReader") loader = SemanticScholarReader() query_space = "large language models" documents = loader.load_data(query=query_space, limit=100) -testsetgenerator = TestsetGenerator.from_default() -test_size = 30 -testset = testsetgenerator.generate(documents, test_size=test_size) -test_df = testset.to_pandas() -test_df.head() +# generator with openai models +generator = TestsetGenerator.with_openai() + +distributions = { + simple: 0.5, + multi_context: 0.4, + reasoning: 0.1 +} + +# generate testset +testset = generator.generate_with_llama_index_docs(documents, 100,distributions) +testset.to_pandas() ```

diff --git a/docs/howtos/applications/compare_llms.md b/docs/howtos/applications/compare_llms.md index 95897ee2f..b472a36be 100644 --- a/docs/howtos/applications/compare_llms.md +++ b/docs/howtos/applications/compare_llms.md @@ -33,15 +33,27 @@ Generate a set of 50+ samples using Testset generator for better results import os from llama_index import download_loader, SimpleDirectoryReader from ragas.testset import TestsetGenerator -os.environ['OPENAI_API_KEY'] = 'Your OPEN AI token' +from ragas.testset.generator import TestsetGenerator +from ragas.testset.evolutions import simple, reasoning, multi_context +os.environ['OPENAI_API_KEY'] = 'Your OPEN AI key' + +# load documents reader = SimpleDirectoryReader("./arxiv-papers/",num_files_limit=30) documents = reader.load_data() -testsetgenerator = TestsetGenerator.from_default() -test_size = 30 # Number of samples to generate -testset = testsetgenerator.generate(documents, test_size=test_size) -test_df = testset.to_pandas() -test_df.head() + +# generator with openai models +generator = TestsetGenerator.with_openai() + +distributions = { + simple: 0.5, + multi_context: 0.4, + reasoning: 0.1 +} + +# generate testset +testset = generator.generate_with_llama_index_docs(documents, 100,distributions) +testset.to_pandas() ```

From ccbec3c95053ca7d05c929c91dffe4df14ea9a7e Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 16:37:08 -0800 Subject: [PATCH 6/9] adapt for testset generator --- .../applications/use_prompt_adaptation.md | 73 ++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/docs/howtos/applications/use_prompt_adaptation.md b/docs/howtos/applications/use_prompt_adaptation.md index e087a9fa9..3dceb35f8 100644 --- a/docs/howtos/applications/use_prompt_adaptation.md +++ b/docs/howtos/applications/use_prompt_adaptation.md @@ -1,9 +1,13 @@ # Automatic language adaptation +1. [Metrics](#metrics) +2. [Testset generation](#testset-generation) + +## Metrics This is a tutorial notebook showcasing how to successfully use ragas with data from any given language. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas metrics to a Hindi RAG evaluation dataset. -## Dataset +### Dataset Here I’m using a dataset containing all the relevant columns in Hindi language. ```{code-block} python @@ -75,7 +79,7 @@ Extracted statements: The instruction and key objects are kept unchanged intentionally to allow consuming and processing results in ragas. During inspection, if any of the demonstrations seem faulty translated you can always correct it by going to the saved location. -## Evaluate +### Evaluate ```{code-block} python from ragas import evaluate @@ -85,3 +89,68 @@ ragas_score = evaluate(dataset['train'], metrics=[faithfulness,answer_correctnes You will observe much better performance now with Hindi language as prompts are tailored to it. + +## Testset Generation + +This is a tutorial notebook showcasing how to successfully use ragas test data generation feature to generate data samples of any language using list of documents. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas test set generation to a Hindi to produce a question answer dataset in Hindi. + +### Documents +Here I'm using a corpus of wikipedia articles written in Hindi. You can download the articles by + + +```{code-block} bash +git lfs install +git clone https://huggingface.co/datasets/explodinggradients/hindi-wikipedia +``` + +Now you can load the documents using a document loader, here I am using `DirectoryLoader` + +```{code-block} python +from langchain.document_loaders import DirectoryLoader + +loader = DirectoryLoader("/Users/shahules/Myprojects/ragas/hindi-wikipedia/") +documents = loader.load() + +# add metadata +for document in documents: + document.metadata['file_name'] = document.metadata['source'] + +``` + +### Import and adapt evolutions +Now we can import all the required evolutions and adapt it using `generator.adapt`. This will also adapt all the necessary filters required for the corresponding evolutions. Once adapted, it's better to save and inspect the adapted prompts. + + +```{code-block} python + +from ragas.testset.generator import TestsetGenerator +from ragas.testset.evolutions import simple, reasoning, multi_context,conditional + +# generator with openai models +generator = TestsetGenerator.with_openai() + +# adapt to language +language = "hindi" + +generator.adapt(language, evolutions=[simple, reasoning,conditional,multi_context]) +generator.save(evolutions=[simple, reasoning, multi_context,conditional]) +``` + +### Generate dataset +Once adapted you can use the evolutions and generator just like before to generate data samples for any given distribution. + +```{code-block} python +# determine distribution + +distributions = { + simple:0.4, + reasoning:0.2, + multi_context:0.2, + conditional:0.2 + } + + +# generate testset +testset = generator.generate_with_langchain_docs(documents, 10,distributions,with_debugging_logs=True) +testset.to_pandas() +``` \ No newline at end of file From c8cf883150570353755685a5772cf67abffbc7c7 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 16:40:40 -0800 Subject: [PATCH 7/9] change path --- docs/howtos/applications/use_prompt_adaptation.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/howtos/applications/use_prompt_adaptation.md b/docs/howtos/applications/use_prompt_adaptation.md index 3dceb35f8..821c1d20e 100644 --- a/docs/howtos/applications/use_prompt_adaptation.md +++ b/docs/howtos/applications/use_prompt_adaptation.md @@ -108,7 +108,7 @@ Now you can load the documents using a document loader, here I am using `Directo ```{code-block} python from langchain.document_loaders import DirectoryLoader -loader = DirectoryLoader("/Users/shahules/Myprojects/ragas/hindi-wikipedia/") +loader = DirectoryLoader("./hindi-wikipedia/") documents = loader.load() # add metadata From 81227af08fbcc2923e549ac5d51dc74f338891c1 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 16:47:04 -0800 Subject: [PATCH 8/9] fix linking --- docs/howtos/applications/use_prompt_adaptation.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/howtos/applications/use_prompt_adaptation.md b/docs/howtos/applications/use_prompt_adaptation.md index 821c1d20e..6217c4a31 100644 --- a/docs/howtos/applications/use_prompt_adaptation.md +++ b/docs/howtos/applications/use_prompt_adaptation.md @@ -1,9 +1,9 @@ # Automatic language adaptation -1. [Metrics](#metrics) -2. [Testset generation](#testset-generation) +1. [Metrics](#language-adaptation-for-metrics) +2. [Testset generation](#language-adaptation-for-testset-generation) -## Metrics +## Language Adaptation for Metrics This is a tutorial notebook showcasing how to successfully use ragas with data from any given language. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas metrics to a Hindi RAG evaluation dataset. @@ -90,7 +90,7 @@ ragas_score = evaluate(dataset['train'], metrics=[faithfulness,answer_correctnes You will observe much better performance now with Hindi language as prompts are tailored to it. -## Testset Generation +## Language Adaptation for Testset Generation This is a tutorial notebook showcasing how to successfully use ragas test data generation feature to generate data samples of any language using list of documents. This is achieved using Ragas prompt adaptation feature. The tutorial specifically applies ragas test set generation to a Hindi to produce a question answer dataset in Hindi. From 609f923c08c3df247cda01c8d378762a3a1a8ab3 Mon Sep 17 00:00:00 2001 From: Shahules786 Date: Sat, 3 Feb 2024 17:49:37 -0800 Subject: [PATCH 9/9] change testset generation --- .../howtos/applications/compare_embeddings.md | 19 ++++++++++----- docs/howtos/applications/compare_llms.md | 24 ++++++++++++++----- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/docs/howtos/applications/compare_embeddings.md b/docs/howtos/applications/compare_embeddings.md index 01b7a8d13..eb72f9736 100644 --- a/docs/howtos/applications/compare_embeddings.md +++ b/docs/howtos/applications/compare_embeddings.md @@ -28,18 +28,25 @@ For this tutorial notebook, I am using papers from Semantic Scholar that is rela ```{code-block} python :caption: load documents using llama-hub and create test data from llama_index import download_loader -from ragas.testset import TestsetGenerator +from ragas.testset.evolutions import simple, reasoning, multi_context SemanticScholarReader = download_loader("SemanticScholarReader") loader = SemanticScholarReader() query_space = "large language models" documents = loader.load_data(query=query_space, limit=100) -testsetgenerator = TestsetGenerator.from_default() -test_size = 30 -testset = testsetgenerator.generate(documents, test_size=test_size) -test_df = testset.to_pandas() -test_df.head() +# generator with openai models +generator = TestsetGenerator.with_openai() + +distributions = { + simple: 0.5, + multi_context: 0.4, + reasoning: 0.1 +} + +# generate testset +testset = generator.generate_with_llama_index_docs(documents, 100,distributions) +testset.to_pandas() ```

diff --git a/docs/howtos/applications/compare_llms.md b/docs/howtos/applications/compare_llms.md index 95897ee2f..b472a36be 100644 --- a/docs/howtos/applications/compare_llms.md +++ b/docs/howtos/applications/compare_llms.md @@ -33,15 +33,27 @@ Generate a set of 50+ samples using Testset generator for better results import os from llama_index import download_loader, SimpleDirectoryReader from ragas.testset import TestsetGenerator -os.environ['OPENAI_API_KEY'] = 'Your OPEN AI token' +from ragas.testset.generator import TestsetGenerator +from ragas.testset.evolutions import simple, reasoning, multi_context +os.environ['OPENAI_API_KEY'] = 'Your OPEN AI key' + +# load documents reader = SimpleDirectoryReader("./arxiv-papers/",num_files_limit=30) documents = reader.load_data() -testsetgenerator = TestsetGenerator.from_default() -test_size = 30 # Number of samples to generate -testset = testsetgenerator.generate(documents, test_size=test_size) -test_df = testset.to_pandas() -test_df.head() + +# generator with openai models +generator = TestsetGenerator.with_openai() + +distributions = { + simple: 0.5, + multi_context: 0.4, + reasoning: 0.1 +} + +# generate testset +testset = generator.generate_with_llama_index_docs(documents, 100,distributions) +testset.to_pandas() ```