From 3471a1aeeb6b06e504f9c236d17237e1731ae6a4 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 16:56:37 +0530
Subject: [PATCH 1/3] docs: quickstart with pre-commit hooks

---
 .pre-commit-config.yaml   |  15 +
 examples/data_prep.py     |  44 ++
 examples/quickstart.ipynb | 860 +++++++++++++++++++++++++++++---------
 pyproject.toml            |   5 +
 4 files changed, 727 insertions(+), 197 deletions(-)
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 examples/data_prep.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 000000000..aa8978dc9
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,15 @@
+# See https://pre-commit.com for more information
+# See https://pre-commit.com/hooks.html for more hooks
+repos:
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v3.2.0
+    hooks:
+    -   id: trailing-whitespace
+    -   id: end-of-file-fixer
+    -   id: check-yaml
+    -   id: check-added-large-files
+-   repo: https://github.com/psf/black
+    rev: 22.10.0
+    hooks:
+    -   id: black
+    -   id: black-jupyter
diff --git a/examples/data_prep.py b/examples/data_prep.py
new file mode 100644
index 000000000..df2763104
--- /dev/null
+++ b/examples/data_prep.py
@@ -0,0 +1,44 @@
+from datasets import concatenate_datasets, load_dataset
+
+
+def format_for_belar(row):
+    row["context"] = row["selftext"]
+    row["prompt"] = row["title"]
+    row["ground_truth"] = row["answers"]["text"]
+    return row
+
+
+d = load_dataset("eli5")
+ds = d["test_eli5"].map(format_for_belar, batched=False)
+ds = ds.select_columns(["context", "prompt", "ground_truth"])
+
+ds = ds.shuffle(seed=42).select(range(500))
+ds.shape, ds.column_names
+
+import concurrent.futures as f
+
+from langchain.llms import OpenAI
+
+llm = OpenAI()
+prompt = """
+{context}
+with the above context explain like I'm five: {prompt}
+"""
+
+
+def get_answers(row):
+    qs, cs = row["prompt"], row["context"]
+
+    generated_answers = []
+    with f.ThreadPoolExecutor(max_workers=10) as executor:
+        results = executor.map(
+            llm, [prompt.format(context=cs[i], prompt=qs[i]) for i in range(len(qs))]
+        )
+        for result in results:
+            generated_answers.append(result)
+
+    row["generated_answers"] = generated_answers
+    return row
+
+
+ds = ds.map(get_answers, batched=True, batch_size=10)
diff --git a/examples/quickstart.ipynb b/examples/quickstart.ipynb
index 6effae29b..f726fcf23 100644
--- a/examples/quickstart.ipynb
+++ b/examples/quickstart.ipynb
@@ -1,11 +1,28 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "aeb5819b",
+   "metadata": {},
+   "source": [
+    "# Quickstart"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "992c777a",
+   "execution_count": 30,
+   "id": "22c7dd25",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "The autoreload extension is already loaded. To reload it, use:\n",
+      "  %reload_ext autoreload\n"
+     ]
+    }
+   ],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2"
@@ -13,108 +30,89 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "5eaf4729",
+   "execution_count": 2,
+   "id": "0b5d4d41",
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Found cached dataset eli5 (/home/jjmachan/.cache/huggingface/datasets/eli5/LFQA_reddit/1.0.0/17574e5502a10f41bbd17beba83e22475b499fa62caa1384a3d093fc856fe6fa)\n"
+      "Found cached dataset parquet (/home/jjmachan/.cache/huggingface/datasets/explodinggradients___parquet/explodinggradients--eli5-test-217d92ce20e19249/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n"
      ]
     },
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a2231863e61c4ffd8d695c8531a48139",
-       "version_major": 2,
-       "version_minor": 0
-      },
       "text/plain": [
-       "  0%|          | 0/9 [00:00<?, ?it/s]"
+       "Dataset({\n",
+       "    features: ['context', 'prompt', 'ground_truth', 'references', 'generated_text'],\n",
+       "    num_rows: 500\n",
+       "})"
       ]
      },
+     "execution_count": 2,
      "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Loading cached processed dataset at /home/jjmachan/.cache/huggingface/datasets/eli5/LFQA_reddit/1.0.0/17574e5502a10f41bbd17beba83e22475b499fa62caa1384a3d093fc856fe6fa/cache-f3427cd7a8a8674f.arrow\n"
-     ]
+     "output_type": "execute_result"
     }
    ],
    "source": [
     "from datasets import load_dataset, concatenate_datasets\n",
     "\n",
-    "def format_for_belar(row):\n",
-    "    row[\"context\"] = row[\"selftext\"]\n",
-    "    row[\"prompt\"] = row[\"title\"]\n",
-    "    row['ground_truth'] = row[\"answers\"][\"text\"]\n",
-    "    return row\n",
-    "    \n",
-    "d = load_dataset(\"eli5\")\n",
-    "ds = d['test_eli5'].map(format_for_belar, batched=False)\n",
-    "ds = ds.select_columns([\"context\", \"prompt\", \"ground_truth\"])"
+    "ds = load_dataset(\"explodinggradients/eli5-test\", split=\"test_eli5\")\n",
+    "ds"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "a501c296",
+   "execution_count": 24,
+   "id": "0b5abd7d",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(500, 3)"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "ds = ds.shuffle(seed=42).select(range(500))\n",
-    "ds.shape"
+    "from belar.metrics import (\n",
+    "    Rouge1,\n",
+    "    Evaluation,\n",
+    "    Rouge2,\n",
+    "    RougeL,\n",
+    "    SBERTScore,\n",
+    "    EntailmentScore,\n",
+    "    EditRatio,\n",
+    "    EditDistance,\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "763335eb",
+   "execution_count": 28,
+   "id": "a77c805d",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "['context', 'prompt', 'ground_truth']"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.\n",
+      "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.\n",
+      "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.\n",
+      "The `xla_device` argument has been deprecated in v4.4.0 of Transformers. It is ignored and you can safely remove it from your `config.json` file.\n"
+     ]
     }
    ],
    "source": [
-    "ds.column_names"
+    "sbert_score = SBERTScore(similarity_metric=\"cosine\")\n",
+    "entail = EntailmentScore(max_length=512)\n",
+    "\n",
+    "e = Evaluation(\n",
+    "    metrics=[Rouge1, Rouge2, RougeL, sbert_score, EditDistance, EditRatio, entail],\n",
+    "    batched=False,\n",
+    "    batch_size=30,\n",
+    ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "6aff2ae9",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "id": "c32c39f5",
+   "execution_count": 29,
+   "id": "e879f51b",
    "metadata": {},
    "outputs": [
     {
@@ -133,210 +131,678 @@
     }
    ],
    "source": [
-    "import concurrent.futures as f\n",
-    "from langchain.llms import OpenAI\n",
-    "\n",
-    "llm = OpenAI()\n",
-    "prompt = \"\"\"\n",
-    "{context}\n",
-    "with the above context explain like I'm five: {prompt}\n",
-    "\"\"\"\n",
-    "\n",
-    "def get_answers(row):\n",
-    "    qs, cs = row[\"prompt\"], row[\"context\"]\n",
-    "    \n",
-    "    generated_answers = []\n",
-    "    with f.ThreadPoolExecutor(max_workers=10) as executor:\n",
-    "        results = executor.map(llm, \n",
-    "            [prompt.format(context=cs[i], prompt=qs[i]) for i in range(len(qs))])\n",
-    "        for result in results:\n",
-    "            generated_answers.append(result)\n",
-    "     \n",
-    "    row[\"generated_answers\"] = generated_answers\n",
-    "    return row\n",
-    "    \n",
-    "ds = ds.map(get_answers, batched=True, batch_size=10)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "c5ddec2d",
-   "metadata": {},
-   "source": [
-    "## Evalutate"
+    "r = e.eval(ds[\"ground_truth\"], ds[\"generated_text\"])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
-   "id": "2d0a7ba7",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from belar.metrics import Rouge1, Evaluation, Rouge2, RougeL"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "001ab431",
+   "execution_count": 20,
+   "id": "f64c1915",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['context', 'question', 'answers', 'answer_generated']"
+       "{'rouge1_score': 0.27777314683149845, 'rouge2_score': 0.05593454553750915, 'rougeL_score': 0.16365190027294899, 'SBERT_cosine_score': 0.37552570906095206, 'edit_distance_score': 735.114, 'edit_ratio_score': 0.41482407945510713}"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "ds.column_names"
+    "r"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
-   "id": "ce9448b9",
+   "execution_count": 21,
+   "id": "7c812dfe",
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "WARNING:datasets.arrow_dataset:Loading cached processed dataset at /home/jjmachan/.cache/huggingface/datasets/eli5/LFQA_reddit/1.0.0/17574e5502a10f41bbd17beba83e22475b499fa62caa1384a3d093fc856fe6fa/cache-de2b7e402d0a395c.arrow\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
-       "Dataset({\n",
-       "    features: ['context', 'prompt', 'references', 'ground_truth', 'generated_text'],\n",
-       "    num_rows: 100\n",
-       "})"
+       "0.27777314683149845"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "def rename(row):\n",
-    "    row[\"prompt\"] = row[\"question\"]\n",
-    "    row[\"references\"] = []\n",
-    "    row[\"ground_truth\"] = row[\"answers\"]\n",
-    "    row[\"generated_text\"] = row[\"answer_generated\"]\n",
-    "    \n",
-    "    return row\n",
-    "\n",
-    "ds = ds.map(rename).select_columns(\n",
-    "    [\"prompt\", \"context\", \"references\", \"ground_truth\", \"generated_text\"]\n",
-    ")\n",
-    "ds"
+    "r[\"rouge1_score\"]"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 47,
-   "id": "49565367",
+   "execution_count": 22,
+   "id": "4c8c51b1",
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a14aebe3baca44e386442688a86b26ad",
-       "version_major": 2,
-       "version_minor": 0
-      },
       "text/plain": [
-       "Pushing dataset shards to the dataset hub:   0%|          | 0/1 [00:00<?, ?it/s]"
+       "{'rouge1_score': {'mean': 0.27777314683149845,\n",
+       "  '25%': 0.22222222222222224,\n",
+       "  '50%': 0.28116554054054055,\n",
+       "  '75%': 0.33333333333333337,\n",
+       "  'min': 0.03333333333333333,\n",
+       "  'max': 0.49498327759197325,\n",
+       "  'std': 0.07709937733409833},\n",
+       " 'rouge2_score': {'mean': 0.05593454553750915,\n",
+       "  '25%': 0.029795467108899944,\n",
+       "  '50%': 0.05203595980962454,\n",
+       "  '75%': 0.07713675213675214,\n",
+       "  'min': 0.0,\n",
+       "  'max': 0.22499999999999998,\n",
+       "  'std': 0.03659179594928787},\n",
+       " 'rougeL_score': {'mean': 0.16365190027294899,\n",
+       "  '25%': 0.13122438524590163,\n",
+       "  '50%': 0.1639344262295082,\n",
+       "  '75%': 0.19366875300914782,\n",
+       "  'min': 0.03333333333333333,\n",
+       "  'max': 0.3087248322147651,\n",
+       "  'std': 0.04582111082128693},\n",
+       " 'SBERT_cosine_score': {'mean': 0.37552570906095206,\n",
+       "  '25%': 0.2123386301100254,\n",
+       "  '50%': 0.33269713819026947,\n",
+       "  '75%': 0.5326416194438934,\n",
+       "  'min': 0.007017173804342747,\n",
+       "  'max': 0.9106802940368652,\n",
+       "  'std': 0.2075585785391846},\n",
+       " 'edit_distance_score': {'mean': 735.114,\n",
+       "  '25%': 311.5,\n",
+       "  '50%': 476.5,\n",
+       "  '75%': 864.25,\n",
+       "  'min': 106,\n",
+       "  'max': 6370,\n",
+       "  'std': 729.5287718822336},\n",
+       " 'edit_ratio_score': {'mean': 0.41482407945510713,\n",
+       "  '25%': 0.39987631416202846,\n",
+       "  '50%': 0.42918677093154384,\n",
+       "  '75%': 0.4495093721921233,\n",
+       "  'min': 0.10218156228008446,\n",
+       "  'max': 0.5729166666666667,\n",
+       "  'std': 0.05807177049561045}}"
       ]
      },
+     "execution_count": 22,
      "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ed53f7de6f7c4638ad664b9553e14789",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Creating parquet from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "39b71f5f8c0742f3b90da786e910eef3",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Upload 1 LFS files:   0%|          | 0/1 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "ds.push_to_hub(\"explodinggradients/eli5-test\")"
+    "r.describe()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ebf0a29d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t_not_batched = ds_eval[\"rouge1_score\"]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4882982d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "np.array(t_batched) - np.array(t_not_batched)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 81,
-   "id": "1dcbdbd1",
+   "execution_count": 5,
+   "id": "08ef4d51",
    "metadata": {},
    "outputs": [
     {
      "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "",
-       "version_major": 2,
-       "version_minor": 0
-      },
       "text/plain": [
-       "Map:   0%|          | 0/100 [00:00<?, ? examples/s]"
+       "['ground_truth', 'generated_text', 'SBERT_cosine_score']"
       ]
      },
+     "execution_count": 5,
      "metadata": {},
-     "output_type": "display_data"
-    },
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds_eval.column_names"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "f8a58fa8",
+   "metadata": {},
+   "outputs": [
     {
      "data": {
       "text/plain": [
-       "Dataset({\n",
-       "    features: ['ground_truth', 'generated_text', 'rouge1_score', 'rouge2_score', 'rougeL_score'],\n",
-       "    num_rows: 100\n",
-       "})"
+       "[0.3033774197101593,\n",
+       " 0.016349632292985916,\n",
+       " 0.4478442072868347,\n",
+       " 0.1860141158103943,\n",
+       " 0.03600190579891205,\n",
+       " 0.6023079752922058,\n",
+       " 0.289838969707489,\n",
+       " 0.08502114564180374,\n",
+       " 0.17191164195537567,\n",
+       " 0.3593299984931946,\n",
+       " 0.1715232878923416,\n",
+       " 0.3805505037307739,\n",
+       " 0.5519564151763916,\n",
+       " 0.2677731215953827,\n",
+       " 0.6183438301086426,\n",
+       " 0.10611602663993835,\n",
+       " 0.19605034589767456,\n",
+       " 0.08165217190980911,\n",
+       " 0.29304254055023193,\n",
+       " 0.35943326354026794,\n",
+       " 0.38164564967155457,\n",
+       " 0.03771442547440529,\n",
+       " 0.11554502695798874,\n",
+       " 0.47948333621025085,\n",
+       " 0.23276342451572418,\n",
+       " 0.4236215353012085,\n",
+       " 0.1943129450082779,\n",
+       " 0.1942053735256195,\n",
+       " 0.12668733298778534,\n",
+       " 0.2597537338733673,\n",
+       " 0.33301281929016113,\n",
+       " 0.3094521462917328,\n",
+       " 0.3279588520526886,\n",
+       " 0.32722654938697815,\n",
+       " 0.38284799456596375,\n",
+       " 0.2851578891277313,\n",
+       " 0.23893719911575317,\n",
+       " 0.6166086196899414,\n",
+       " 0.2423057109117508,\n",
+       " 0.7267876267433167,\n",
+       " 0.08813111484050751,\n",
+       " 0.48606470227241516,\n",
+       " 0.6568448543548584,\n",
+       " 0.1358499825000763,\n",
+       " 0.4515664577484131,\n",
+       " 0.23441915214061737,\n",
+       " 0.4741160571575165,\n",
+       " 0.18968994915485382,\n",
+       " 0.382995069026947,\n",
+       " 0.7173715233802795,\n",
+       " 0.7269276976585388,\n",
+       " 0.2834068834781647,\n",
+       " 0.2564486265182495,\n",
+       " 0.9106802940368652,\n",
+       " 0.3905271291732788,\n",
+       " 0.1269465684890747,\n",
+       " 0.09796524047851562,\n",
+       " 0.6954237222671509,\n",
+       " 0.49959367513656616,\n",
+       " 0.3481505811214447,\n",
+       " 0.2524052858352661,\n",
+       " 0.20396579802036285,\n",
+       " 0.4261414706707001,\n",
+       " 0.35149627923965454,\n",
+       " 0.060562025755643845,\n",
+       " 0.29626941680908203,\n",
+       " 0.33264321088790894,\n",
+       " 0.32353609800338745,\n",
+       " 0.0929298847913742,\n",
+       " 0.694779634475708,\n",
+       " 0.42692476511001587,\n",
+       " 0.6740735769271851,\n",
+       " 0.26791706681251526,\n",
+       " 0.30361559987068176,\n",
+       " 0.6142315864562988,\n",
+       " 0.8581538200378418,\n",
+       " 0.1934203803539276,\n",
+       " 0.17560303211212158,\n",
+       " 0.39025163650512695,\n",
+       " 0.2257130742073059,\n",
+       " 0.10104137659072876,\n",
+       " 0.5671371221542358,\n",
+       " 0.2376122921705246,\n",
+       " 0.7245509624481201,\n",
+       " 0.33550819754600525,\n",
+       " 0.16170960664749146,\n",
+       " 0.3289082944393158,\n",
+       " 0.21686506271362305,\n",
+       " 0.5573591589927673,\n",
+       " 0.39316579699516296,\n",
+       " 0.3452097177505493,\n",
+       " 0.7620242238044739,\n",
+       " 0.612403154373169,\n",
+       " 0.20761919021606445,\n",
+       " 0.3436463177204132,\n",
+       " 0.35804855823516846,\n",
+       " 0.5422661304473877,\n",
+       " 0.2482432872056961,\n",
+       " 0.24608035385608673,\n",
+       " 0.43996310234069824,\n",
+       " 0.7638659477233887,\n",
+       " 0.4832608997821808,\n",
+       " 0.3723938763141632,\n",
+       " 0.16313855350017548,\n",
+       " 0.17755097150802612,\n",
+       " 0.7125013470649719,\n",
+       " 0.21019332110881805,\n",
+       " 0.2878414988517761,\n",
+       " 0.7330911755561829,\n",
+       " 0.5391034483909607,\n",
+       " 0.3856879770755768,\n",
+       " 0.21089066565036774,\n",
+       " 0.21917514503002167,\n",
+       " 0.5970359444618225,\n",
+       " 0.10427114367485046,\n",
+       " 0.5017147660255432,\n",
+       " 0.32604700326919556,\n",
+       " 0.26022183895111084,\n",
+       " 0.2217114269733429,\n",
+       " 0.5664410591125488,\n",
+       " 0.6097017526626587,\n",
+       " 0.6790091395378113,\n",
+       " 0.6737412810325623,\n",
+       " 0.3198738396167755,\n",
+       " 0.3233138620853424,\n",
+       " 0.27815982699394226,\n",
+       " 0.5739132165908813,\n",
+       " 0.8073441982269287,\n",
+       " 0.393609881401062,\n",
+       " 0.34070584177970886,\n",
+       " 0.1426166594028473,\n",
+       " 0.3649061918258667,\n",
+       " 0.21035610139369965,\n",
+       " 0.15468955039978027,\n",
+       " 0.15301679074764252,\n",
+       " 0.3864727020263672,\n",
+       " 0.3432256877422333,\n",
+       " 0.27995312213897705,\n",
+       " 0.45306405425071716,\n",
+       " 0.152155339717865,\n",
+       " 0.5590802431106567,\n",
+       " 0.14337098598480225,\n",
+       " 0.5684935450553894,\n",
+       " 0.06331620365381241,\n",
+       " 0.7308592200279236,\n",
+       " 0.3433731496334076,\n",
+       " 0.49904948472976685,\n",
+       " 0.24472254514694214,\n",
+       " 0.17057321965694427,\n",
+       " 0.17359305918216705,\n",
+       " 0.1405472606420517,\n",
+       " 0.21779431402683258,\n",
+       " 0.6882146596908569,\n",
+       " 0.39259153604507446,\n",
+       " 0.5250310301780701,\n",
+       " 0.29845374822616577,\n",
+       " 0.6535312533378601,\n",
+       " 0.3323957920074463,\n",
+       " 0.6179606318473816,\n",
+       " 0.6263958215713501,\n",
+       " 0.25900962948799133,\n",
+       " 0.35419002175331116,\n",
+       " 0.33175551891326904,\n",
+       " 0.1691923886537552,\n",
+       " 0.6974550485610962,\n",
+       " 0.5213074088096619,\n",
+       " 0.032654277980327606,\n",
+       " 0.34367528557777405,\n",
+       " 0.405593603849411,\n",
+       " 0.08452585339546204,\n",
+       " 0.11424578726291656,\n",
+       " 0.6650150418281555,\n",
+       " 0.2742277681827545,\n",
+       " 0.28393787145614624,\n",
+       " 0.29564306139945984,\n",
+       " 0.5309538245201111,\n",
+       " 0.022119097411632538,\n",
+       " 0.5228688716888428,\n",
+       " 0.6862163543701172,\n",
+       " 0.4796127676963806,\n",
+       " 0.331642746925354,\n",
+       " 0.469801127910614,\n",
+       " 0.2787094712257385,\n",
+       " 0.15432526171207428,\n",
+       " 0.13090954720973969,\n",
+       " 0.5296900272369385,\n",
+       " 0.5006809830665588,\n",
+       " 0.31476107239723206,\n",
+       " 0.6327821612358093,\n",
+       " 0.27751827239990234,\n",
+       " 0.08453290164470673,\n",
+       " 0.152990460395813,\n",
+       " 0.2828467786312103,\n",
+       " 0.21192562580108643,\n",
+       " 0.23361067473888397,\n",
+       " 0.1100977212190628,\n",
+       " 0.729167640209198,\n",
+       " 0.25679513812065125,\n",
+       " 0.29639971256256104,\n",
+       " 0.19549258053302765,\n",
+       " 0.01892801746726036,\n",
+       " 0.7945613265037537,\n",
+       " 0.7499642372131348,\n",
+       " 0.15835057199001312,\n",
+       " 0.6000410914421082,\n",
+       " 0.38472887873649597,\n",
+       " 0.27581414580345154,\n",
+       " 0.6135129332542419,\n",
+       " 0.30333641171455383,\n",
+       " 0.6530413627624512,\n",
+       " 0.32561489939689636,\n",
+       " 0.6843974590301514,\n",
+       " 0.7383497953414917,\n",
+       " 0.1791287064552307,\n",
+       " 0.15797390043735504,\n",
+       " 0.1897229701280594,\n",
+       " 0.34278005361557007,\n",
+       " 0.523197591304779,\n",
+       " 0.2993963062763214,\n",
+       " 0.24305762350559235,\n",
+       " 0.2124125361442566,\n",
+       " 0.23200078308582306,\n",
+       " 0.5277230739593506,\n",
+       " 0.3923065960407257,\n",
+       " 0.2338612824678421,\n",
+       " 0.6605720520019531,\n",
+       " 0.4534214735031128,\n",
+       " 0.7204974889755249,\n",
+       " 0.4256589412689209,\n",
+       " 0.1377628594636917,\n",
+       " 0.1862977296113968,\n",
+       " 0.6173402070999146,\n",
+       " 0.2129381150007248,\n",
+       " 0.18199223279953003,\n",
+       " 0.4077472388744354,\n",
+       " 0.5461190938949585,\n",
+       " 0.7703336477279663,\n",
+       " 0.7089384198188782,\n",
+       " 0.12397469580173492,\n",
+       " 0.3445894420146942,\n",
+       " 0.29747506976127625,\n",
+       " 0.12937960028648376,\n",
+       " 0.6808912754058838,\n",
+       " 0.44350528717041016,\n",
+       " 0.0622265450656414,\n",
+       " 0.800916314125061,\n",
+       " 0.196528360247612,\n",
+       " 0.40886160731315613,\n",
+       " 0.5457544326782227,\n",
+       " 0.7547292113304138,\n",
+       " 0.17570790648460388,\n",
+       " 0.33092451095581055,\n",
+       " 0.3909622132778168,\n",
+       " 0.1750270575284958,\n",
+       " 0.21135497093200684,\n",
+       " 0.2844017744064331,\n",
+       " 0.6711058616638184,\n",
+       " 0.7111238241195679,\n",
+       " 0.39750146865844727,\n",
+       " 0.3603275716304779,\n",
+       " 0.20594996213912964,\n",
+       " 0.26992928981781006,\n",
+       " 0.32206788659095764,\n",
+       " 0.5537823438644409,\n",
+       " 0.6196168065071106,\n",
+       " 0.17448124289512634,\n",
+       " 0.8145052194595337,\n",
+       " 0.13209058344364166,\n",
+       " 0.6009707450866699,\n",
+       " 0.1729992777109146,\n",
+       " 0.605941891670227,\n",
+       " 0.16112592816352844,\n",
+       " 0.7443314790725708,\n",
+       " 0.27183473110198975,\n",
+       " 0.6732509732246399,\n",
+       " 0.34409621357917786,\n",
+       " 0.6225290894508362,\n",
+       " 0.7111546397209167,\n",
+       " 0.25248128175735474,\n",
+       " 0.25385937094688416,\n",
+       " 0.4553792476654053,\n",
+       " 0.007017173804342747,\n",
+       " 0.5378240942955017,\n",
+       " 0.6920719146728516,\n",
+       " 0.5118893980979919,\n",
+       " 0.7575575113296509,\n",
+       " 0.053049687296152115,\n",
+       " 0.34726738929748535,\n",
+       " 0.625588595867157,\n",
+       " 0.2684467136859894,\n",
+       " 0.21171455085277557,\n",
+       " 0.16874279081821442,\n",
+       " 0.6806609034538269,\n",
+       " 0.6409006118774414,\n",
+       " 0.43180617690086365,\n",
+       " 0.36487576365470886,\n",
+       " 0.25573742389678955,\n",
+       " 0.8596686124801636,\n",
+       " 0.7924257516860962,\n",
+       " 0.2288934737443924,\n",
+       " 0.37159034609794617,\n",
+       " 0.21388927102088928,\n",
+       " 0.7443233132362366,\n",
+       " 0.1677546203136444,\n",
+       " 0.590474009513855,\n",
+       " 0.2609856426715851,\n",
+       " 0.2530490458011627,\n",
+       " 0.26618924736976624,\n",
+       " 0.25583404302597046,\n",
+       " 0.20902562141418457,\n",
+       " 0.5943877696990967,\n",
+       " 0.07199332863092422,\n",
+       " 0.44120875000953674,\n",
+       " 0.3591962456703186,\n",
+       " 0.6544501781463623,\n",
+       " 0.12697549164295197,\n",
+       " 0.3532907962799072,\n",
+       " 0.4480339288711548,\n",
+       " 0.7042593359947205,\n",
+       " 0.11615218967199326,\n",
+       " 0.6357651948928833,\n",
+       " 0.24792085587978363,\n",
+       " 0.3313771188259125,\n",
+       " 0.5221624970436096,\n",
+       " 0.35108593106269836,\n",
+       " 0.135896697640419,\n",
+       " 0.15817011892795563,\n",
+       " 0.8391244411468506,\n",
+       " 0.2277119904756546,\n",
+       " 0.04543468356132507,\n",
+       " 0.25068429112434387,\n",
+       " 0.1133192926645279,\n",
+       " 0.28534117341041565,\n",
+       " 0.8111948370933533,\n",
+       " 0.3385901153087616,\n",
+       " 0.49840831756591797,\n",
+       " 0.4116763174533844,\n",
+       " 0.16915757954120636,\n",
+       " 0.3262860178947449,\n",
+       " 0.10765945911407471,\n",
+       " 0.1261938512325287,\n",
+       " 0.3500753939151764,\n",
+       " 0.2676033079624176,\n",
+       " 0.6120821833610535,\n",
+       " 0.62961345911026,\n",
+       " 0.27265217900276184,\n",
+       " 0.7611227035522461,\n",
+       " 0.2189398556947708,\n",
+       " 0.271114706993103,\n",
+       " 0.7538965940475464,\n",
+       " 0.1766694337129593,\n",
+       " 0.26010769605636597,\n",
+       " 0.14162400364875793,\n",
+       " 0.15965068340301514,\n",
+       " 0.30319979786872864,\n",
+       " 0.23467262089252472,\n",
+       " 0.7990760207176208,\n",
+       " 0.3484833538532257,\n",
+       " 0.3364700973033905,\n",
+       " 0.36943286657333374,\n",
+       " 0.37875810265541077,\n",
+       " 0.5377050042152405,\n",
+       " 0.2255283147096634,\n",
+       " 0.6214497089385986,\n",
+       " 0.572303295135498,\n",
+       " 0.5672966241836548,\n",
+       " 0.4602000117301941,\n",
+       " 0.6925125122070312,\n",
+       " 0.19061176478862762,\n",
+       " 0.750962495803833,\n",
+       " 0.057794470340013504,\n",
+       " 0.22833339869976044,\n",
+       " 0.12149019539356232,\n",
+       " 0.5187497735023499,\n",
+       " 0.43326133489608765,\n",
+       " 0.7459068298339844,\n",
+       " 0.28757017850875854,\n",
+       " 0.060881346464157104,\n",
+       " 0.19995999336242676,\n",
+       " 0.2332974374294281,\n",
+       " 0.5807837843894958,\n",
+       " 0.4985215663909912,\n",
+       " 0.2317824810743332,\n",
+       " 0.20419657230377197,\n",
+       " 0.2929933965206146,\n",
+       " 0.22726529836654663,\n",
+       " 0.36383742094039917,\n",
+       " 0.26542332768440247,\n",
+       " 0.33275106549263,\n",
+       " 0.1817902773618698,\n",
+       " 0.019586173817515373,\n",
+       " 0.6501842737197876,\n",
+       " 0.5130109786987305,\n",
+       " 0.04855664074420929,\n",
+       " 0.327665239572525,\n",
+       " 0.33484169840812683,\n",
+       " 0.18408897519111633,\n",
+       " 0.8089461326599121,\n",
+       " 0.2609926760196686,\n",
+       " 0.35048383474349976,\n",
+       " 0.3380715847015381,\n",
+       " 0.19198913872241974,\n",
+       " 0.47304245829582214,\n",
+       " 0.13059648871421814,\n",
+       " 0.388828307390213,\n",
+       " 0.6691229939460754,\n",
+       " 0.1510116457939148,\n",
+       " 0.20976220071315765,\n",
+       " 0.4316028952598572,\n",
+       " 0.5592595934867859,\n",
+       " 0.4931623339653015,\n",
+       " 0.40056753158569336,\n",
+       " 0.1390654295682907,\n",
+       " 0.7112942337989807,\n",
+       " 0.30744668841362,\n",
+       " 0.2824617028236389,\n",
+       " 0.29495444893836975,\n",
+       " 0.8129028081893921,\n",
+       " 0.04778153821825981,\n",
+       " 0.3677351772785187,\n",
+       " 0.38807204365730286,\n",
+       " 0.23143930733203888,\n",
+       " 0.3730814754962921,\n",
+       " 0.3903065323829651,\n",
+       " 0.10604582726955414,\n",
+       " 0.375832736492157,\n",
+       " 0.32024890184402466,\n",
+       " 0.3080943822860718,\n",
+       " 0.6008120775222778,\n",
+       " 0.8878772258758545,\n",
+       " 0.4099455773830414,\n",
+       " 0.4919497072696686,\n",
+       " 0.21881842613220215,\n",
+       " 0.7104718089103699,\n",
+       " 0.40945085883140564,\n",
+       " 0.7066667675971985,\n",
+       " 0.3884510099887848,\n",
+       " 0.29029491543769836,\n",
+       " 0.48201748728752136,\n",
+       " 0.645422637462616,\n",
+       " 0.46089968085289,\n",
+       " 0.26423460245132446,\n",
+       " 0.3575299084186554,\n",
+       " 0.12025940418243408,\n",
+       " 0.3637012839317322,\n",
+       " 0.5629667043685913,\n",
+       " 0.21808886528015137,\n",
+       " 0.20087826251983643,\n",
+       " 0.19176578521728516,\n",
+       " 0.521368145942688,\n",
+       " 0.4651867747306824,\n",
+       " 0.2771470844745636,\n",
+       " 0.15467087924480438,\n",
+       " 0.06321043521165848,\n",
+       " 0.727550208568573,\n",
+       " 0.6326872706413269,\n",
+       " 0.2524058222770691,\n",
+       " 0.40928635001182556,\n",
+       " 0.2859940230846405,\n",
+       " 0.24548542499542236,\n",
+       " 0.25654155015945435,\n",
+       " 0.1554943472146988,\n",
+       " 0.2810353636741638,\n",
+       " 0.39291778206825256,\n",
+       " 0.7448244094848633,\n",
+       " 0.36232057213783264,\n",
+       " 0.2249329537153244,\n",
+       " 0.5934489369392395,\n",
+       " 0.36474189162254333,\n",
+       " 0.16170084476470947,\n",
+       " 0.2098686695098877,\n",
+       " 0.3690999746322632,\n",
+       " 0.6965110898017883,\n",
+       " 0.21211691200733185,\n",
+       " 0.6880887150764465,\n",
+       " 0.7315702438354492,\n",
+       " 0.2110704928636551,\n",
+       " 0.8123224973678589,\n",
+       " 0.7990055680274963,\n",
+       " 0.14683164656162262,\n",
+       " 0.25454556941986084,\n",
+       " 0.11940312385559082,\n",
+       " 0.2454526573419571,\n",
+       " 0.5912683010101318,\n",
+       " 0.4947351813316345,\n",
+       " 0.4511561691761017,\n",
+       " 0.13149523735046387,\n",
+       " 0.1972067654132843,\n",
+       " 0.3593907356262207,\n",
+       " 0.5928145051002502,\n",
+       " 0.25529202818870544,\n",
+       " 0.2567807137966156,\n",
+       " 0.20362421870231628,\n",
+       " 0.30127424001693726,\n",
+       " 0.6847882270812988,\n",
+       " 0.6155568957328796,\n",
+       " 0.2527444660663605,\n",
+       " 0.4813864529132843,\n",
+       " 0.3825063407421112,\n",
+       " 0.3193434178829193]"
       ]
      },
-     "execution_count": 81,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "e = Evaluation(metrics=[Rouge1, Rouge2, RougeL])\n",
-    "e.eval(ds.select_columns([\"ground_truth\"]), ds[\"generated_text\"])"
+    "ds_eval[\"SBERT_cosine_score\"]"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "27b0b9a7",
+   "cell_type": "markdown",
+   "id": "3893e1c7",
    "metadata": {},
-   "outputs": [],
    "source": []
   }
  ],
diff --git a/pyproject.toml b/pyproject.toml
index 1396ac80e..ed76e07b4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,11 @@ dependencies = [
 ]
 dynamic = ["version", "readme"]
 
+[project.optional-dependencies]
+dev = [
+    "pre-commit",
+]
+
 [tool.setuptools.dynamic]
 readme = {file = ["README.md"], content-type = "text/plain"}
 

From 0f099001037f561da48aea4233a6963c79e17c20 Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sat, 13 May 2023 17:32:14 +0530
Subject: [PATCH 2/3] remove pre-commit hooks

---
 .pre-commit-config.yaml | 15 ---------------
 pyproject.toml          |  1 -
 2 files changed, 16 deletions(-)
 delete mode 100644 .pre-commit-config.yaml

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
deleted file mode 100644
index aa8978dc9..000000000
--- a/.pre-commit-config.yaml
+++ /dev/null
@@ -1,15 +0,0 @@
-# See https://pre-commit.com for more information
-# See https://pre-commit.com/hooks.html for more hooks
-repos:
--   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v3.2.0
-    hooks:
-    -   id: trailing-whitespace
-    -   id: end-of-file-fixer
-    -   id: check-yaml
-    -   id: check-added-large-files
--   repo: https://github.com/psf/black
-    rev: 22.10.0
-    hooks:
-    -   id: black
-    -   id: black-jupyter
diff --git a/pyproject.toml b/pyproject.toml
index ed76e07b4..7ae0e0218 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -8,7 +8,6 @@ dynamic = ["version", "readme"]
 
 [project.optional-dependencies]
 dev = [
-    "pre-commit",
 ]
 
 [tool.setuptools.dynamic]

From 92b11365f1cce66e2603e5d53efcc61ed931b9fa Mon Sep 17 00:00:00 2001
From: Jithin James <jjmachan@pop-os.localdomain>
Date: Sun, 14 May 2023 10:33:17 +0530
Subject: [PATCH 3/3] fix mistakes

---
 examples/data_prep.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/examples/data_prep.py b/examples/data_prep.py
index df2763104..75305f659 100644
--- a/examples/data_prep.py
+++ b/examples/data_prep.py
@@ -1,4 +1,7 @@
-from datasets import concatenate_datasets, load_dataset
+import concurrent.futures as f
+
+from datasets import DatasetDict, load_dataset
+from langchain.llms import OpenAI
 
 
 def format_for_belar(row):
@@ -9,17 +12,15 @@ def format_for_belar(row):
 
 
 d = load_dataset("eli5")
+assert isinstance(d, DatasetDict)
 ds = d["test_eli5"].map(format_for_belar, batched=False)
 ds = ds.select_columns(["context", "prompt", "ground_truth"])
 
 ds = ds.shuffle(seed=42).select(range(500))
-ds.shape, ds.column_names
+print(ds.shape, ds.column_names)
 
-import concurrent.futures as f
-
-from langchain.llms import OpenAI
 
-llm = OpenAI()
+llm = OpenAI()  # type: ignore
 prompt = """
 {context}
 with the above context explain like I'm five: {prompt}