explodinggradients · jjmachan · Aug 22, 2023 · Aug 2, 2023 · Aug 2, 2023 · Aug 2, 2023
diff --git a/docs/integrations/assets/langsmith-dataset.png b/docs/integrations/assets/langsmith-dataset.png
diff --git a/docs/integrations/assets/langsmith-evaluation.png b/docs/integrations/assets/langsmith-evaluation.png
diff --git a/docs/integrations/assets/langsmith-feedback.png b/docs/integrations/assets/langsmith-feedback.png
diff --git a/docs/integrations/assets/langsmith-ragas-chain-trace.png b/docs/integrations/assets/langsmith-ragas-chain-trace.png
diff --git a/docs/integrations/langchain.ipynb b/docs/integrations/langchain.ipynb
diff --git a/docs/integrations/langsmith.ipynb b/docs/integrations/langsmith.ipynb
@@ -1,165 +1,165 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "id": "a0b3171b",
-            "metadata": {},
-            "source": [
-                "# Langsmith Integrations\n",
-                "\n",
-                "[Langsmith](https://docs.smith.langchain.com/) in a platform for building production-grade LLM applications from the langchain team. It helps you with tracing, debugging and evaluting LLM applications.\n",
-                "\n",
-                "The langsmith + ragas integrations offer 2 features\n",
-                "1. View the traces of ragas `evaluator` \n",
-                "2. Use ragas metrics in langchain evaluation - (soon)\n",
-                "\n",
-                "\n",
-                "### Tracing ragas metrics\n",
-                "\n",
-                "since ragas uses langchain under the hood all you have to do is setup langsmith and your traces will be logged.\n",
-                "\n",
-                "to setup langsmith make sure the following env-vars are set (you can read more in the [langsmith docs](https://docs.smith.langchain.com/#quick-start)\n",
-                "\n",
-                "```bash\n",
-                "export LANGCHAIN_TRACING_V2=true\n",
-                "export LANGCHAIN_ENDPOINT=https://api.smith.langchain.com\n",
-                "export LANGCHAIN_API_KEY=<your-api-key>\n",
-                "export LANGCHAIN_PROJECT=<your-project>  # if not specified, defaults to \"default\"\n",
-                "```\n",
-                "\n",
-                "Once langsmith is setup, just run the evaluations as your normally would"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 1,
-            "id": "39375103",
-            "metadata": {},
-            "outputs": [
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "Found cached dataset fiqa (/home/jjmachan/.cache/huggingface/datasets/explodinggradients___fiqa/ragas_eval/1.0.0/3dc7b639f5b4b16509a3299a2ceb78bf5fe98ee6b5fee25e7d5e4d290c88efb8)\n"
-                    ]
-                },
-                {
-                    "data": {
-                        "application/vnd.jupyter.widget-view+json": {
-                            "model_id": "dc5a62b3aebb45d690d9f0dcc783deea",
-                            "version_major": 2,
-                            "version_minor": 0
-                        },
-                        "text/plain": [
-                            "  0%|          | 0/1 [00:00<?, ?it/s]"
-                        ]
-                    },
-                    "metadata": {},
-                    "output_type": "display_data"
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "evaluating with [context_ relevancy]\n"
-                    ]
-                },
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "100%|████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.90s/it]\n"
-                    ]
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "evaluating with [faithfulness]\n"
-                    ]
-                },
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "100%|████████████████████████████████████████████████████████████| 1/1 [00:21<00:00, 21.01s/it]\n"
-                    ]
-                },
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "evaluating with [answer_relevancy]\n"
-                    ]
-                },
-                {
-                    "name": "stderr",
-                    "output_type": "stream",
-                    "text": [
-                        "100%|████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.36s/it]\n"
-                    ]
-                },
-                {
-                    "data": {
-                        "text/plain": [
-                            "{'ragas_score': 0.1837, 'context_ relevancy': 0.0707, 'faithfulness': 0.8889, 'answer_relevancy': 0.9403}"
-                        ]
-                    },
-                    "execution_count": 1,
-                    "metadata": {},
-                    "output_type": "execute_result"
-                }
-            ],
-            "source": [
-                "from datasets import load_dataset\n",
-                "from ragas.metrics import context_relevancy, answer_relevancy, faithfulness\n",
-                "from ragas import evaluate\n",
-                "\n",
-                "\n",
-                "fiqa_eval = load_dataset(\"explodinggradients/fiqa\", \"ragas_eval\")\n",
-                "\n",
-                "result = evaluate(\n",
-                "    fiqa_eval[\"baseline\"].select(range(3)), \n",
-                "    metrics=[context_relevancy, faithfulness, answer_relevancy]\n",
-                ")\n",
-                "\n",
-                "result"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "id": "8ce1c649",
-            "metadata": {},
-            "source": [
-                "Voila! Now you can head over to your project and see the traces\n",
-                "\n",
-                "![](../assets/langsmith-tracing-overview.png)\n",
-                "this shows the langsmith tracing dashboard overview\n",
-                "\n",
-                "![](../assets/langsmith-tracing-faithfullness.png)\n",
-                "this shows the traces for the faithfullness metrics. As you can see being able to view the reasons why the metric gives the score is helpful in figuring out how to improving it."
-            ]
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "Python 3 (ipykernel)",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.10.12"
-        }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a0b3171b",
+   "metadata": {},
+   "source": [
+    "# Langsmith Integrations\n",
+    "\n",
+    "[Langsmith](https://docs.smith.langchain.com/) in a platform for building production-grade LLM applications from the langchain team. It helps you with tracing, debugging and evaluting LLM applications.\n",
+    "\n",
+    "The langsmith + ragas integrations offer 2 features\n",
+    "1. View the traces of ragas `evaluator` \n",
+    "2. Use ragas metrics in langchain evaluation - (soon)\n",
+    "\n",
+    "\n",
+    "### Tracing ragas metrics\n",
+    "\n",
+    "since ragas uses langchain under the hood all you have to do is setup langsmith and your traces will be logged.\n",
+    "\n",
+    "to setup langsmith make sure the following env-vars are set (you can read more in the [langsmith docs](https://docs.smith.langchain.com/#quick-start)\n",
+    "\n",
+    "```bash\n",
+    "export LANGCHAIN_TRACING_V2=true\n",
+    "export LANGCHAIN_ENDPOINT=https://api.smith.langchain.com\n",
+    "export LANGCHAIN_API_KEY=<your-api-key>\n",
+    "export LANGCHAIN_PROJECT=<your-project>  # if not specified, defaults to \"default\"\n",
+    "```\n",
+    "\n",
+    "Once langsmith is setup, just run the evaluations as your normally would"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "39375103",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Found cached dataset fiqa (/home/jjmachan/.cache/huggingface/datasets/explodinggradients___fiqa/ragas_eval/1.0.0/3dc7b639f5b4b16509a3299a2ceb78bf5fe98ee6b5fee25e7d5e4d290c88efb8)\n"
+     ]
     },
-    "nbformat": 4,
-    "nbformat_minor": 5
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "dc5a62b3aebb45d690d9f0dcc783deea",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/1 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "evaluating with [context_ relevancy]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.90s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "evaluating with [faithfulness]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████████████████████████████████| 1/1 [00:21<00:00, 21.01s/it]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "evaluating with [answer_relevancy]\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|████████████████████████████████████████████████████████████| 1/1 [00:07<00:00,  7.36s/it]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'ragas_score': 0.1837, 'context_ relevancy': 0.0707, 'faithfulness': 0.8889, 'answer_relevancy': 0.9403}"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "from ragas.metrics import context_relevancy, answer_relevancy, faithfulness\n",
+    "from ragas import evaluate\n",
+    "\n",
+    "\n",
+    "fiqa_eval = load_dataset(\"explodinggradients/fiqa\", \"ragas_eval\")\n",
+    "\n",
+    "result = evaluate(\n",
+    "    fiqa_eval[\"baseline\"].select(range(3)),\n",
+    "    metrics=[context_relevancy, faithfulness, answer_relevancy],\n",
+    ")\n",
+    "\n",
+    "result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8ce1c649",
+   "metadata": {},
+   "source": [
+    "Voila! Now you can head over to your project and see the traces\n",
+    "\n",
+    "![](../assets/langsmith-tracing-overview.png)\n",
+    "this shows the langsmith tracing dashboard overview\n",
+    "\n",
+    "![](../assets/langsmith-tracing-faithfullness.png)\n",
+    "this shows the traces for the faithfullness metrics. As you can see being able to view the reasons why the metric gives the score is helpful in figuring out how to improving it."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/src/ragas/async_utils.py b/src/ragas/async_utils.py
@@ -1,6 +1,7 @@
 """Async utils."""
 import asyncio
-from typing import Any, Coroutine, List
+from itertools import zip_longest
+from typing import Any, Coroutine, Iterable, List
 
 
 def run_async_tasks(
@@ -29,6 +30,8 @@ async def _tqdm_gather() -> List[Any]:
         # run the operation w/o tqdm on hitting a fatal
         # may occur in some environments where tqdm.asyncio
         # is not supported
+        except ImportError as e:
+            print(e)
         except Exception:
             pass
 
@@ -37,3 +40,20 @@ async def _gather() -> List[Any]:
 
     outputs: List[Any] = asyncio.run(_gather())
     return outputs
+
+
+def chunks(iterable: Iterable, size: int) -> Iterable:
+    args = [iter(iterable)] * size
+    return zip_longest(*args, fillvalue=None)
+
+
+async def batch_gather(
+    tasks: List[Coroutine], batch_size: int = 10, verbose: bool = False
+) -> List[Any]:
+    output: List[Any] = []
+    for task_chunk in chunks(tasks, batch_size):
+        output_chunk = await asyncio.gather(*task_chunk)
+        output.extend(output_chunk)
+        if verbose:
+            print(f"Completed {len(output)} out of {len(tasks)} tasks")
+    return output
diff --git a/src/ragas/langchain/__init__.py b/src/ragas/langchain/__init__.py