Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 34 additions & 38 deletions experiments/assesments/metrics_assesments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,19 @@
},
{
"cell_type": "code",
"execution_count": 62,
"execution_count": 1,
"id": "7bfb2480",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/alerts/lib/python3.8/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
}
],
"source": [
"import json\n",
"from datasets import load_dataset\n",
Expand All @@ -55,7 +64,7 @@
"metadata": {},
"outputs": [],
"source": [
"os.chdir(\"/Users/shahules/belar/\")"
"os.chdir('/Users/shahules/belar/src/')"
]
},
{
Expand Down Expand Up @@ -135,7 +144,7 @@
},
{
"cell_type": "code",
"execution_count": 129,
"execution_count": 7,
"id": "f9f4280e",
"metadata": {},
"outputs": [
Expand All @@ -144,7 +153,7 @@
"output_type": "stream",
"text": [
"Found cached dataset parquet (/Users/shahules/.cache/huggingface/datasets/explodinggradients___parquet/explodinggradients--ragas-wikiqa-5b5116e5cb909aca/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)\n",
"100%|█| 1/1 [00:00<00:00, 58.\n"
"100%|████████████████████████████████████████████████████| 1/1 [00:00<00:00, 242.78it/s]\n"
]
}
],
Expand All @@ -162,7 +171,7 @@
},
{
"cell_type": "code",
"execution_count": 153,
"execution_count": 8,
"id": "eca20daf",
"metadata": {},
"outputs": [],
Expand All @@ -184,7 +193,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 9,
"id": "f3e35532",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -216,7 +225,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 10,
"id": "335081e3",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -252,7 +261,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 11,
"id": "b2642e5b",
"metadata": {},
"outputs": [],
Expand All @@ -267,7 +276,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 13,
"id": "26ca4af4",
"metadata": {},
"outputs": [
Expand All @@ -284,7 +293,7 @@
"0"
]
},
"execution_count": 19,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -305,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 14,
"id": "ca1c56d6",
"metadata": {},
"outputs": [],
Expand All @@ -327,7 +336,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 15,
"id": "cd7fed9c",
"metadata": {},
"outputs": [],
Expand All @@ -343,7 +352,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 16,
"id": "35113558",
"metadata": {},
"outputs": [],
Expand All @@ -354,7 +363,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 17,
"id": "4e82d0df",
"metadata": {},
"outputs": [
Expand All @@ -368,10 +377,10 @@
{
"data": {
"text/plain": [
"3.514920235612768"
"3.5533440372846865"
]
},
"execution_count": 16,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down Expand Up @@ -399,40 +408,27 @@
},
{
"cell_type": "code",
"execution_count": 124,
"execution_count": 13,
"id": "cc263805",
"metadata": {},
"outputs": [],
"source": [
"from experimental.relevance import QGen"
"from ragas.metrics.answer_relevance import QGen"
]
},
{
"cell_type": "code",
"execution_count": 125,
"execution_count": 14,
"id": "38deaf06",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/envs/alerts/lib/python3.8/site-packages/transformers/models/t5/tokenization_t5_fast.py:155: FutureWarning: This tokenizer was incorrectly instantiated with a model max length of 512 which will be corrected in Transformers v5.\n",
"For now, this behavior is kept to avoid breaking backwards compatibility when padding/encoding with `truncation is True`.\n",
"- Be aware that you SHOULD NOT rely on t5-base automatically truncating your input to 512 when padding/encoding.\n",
"- If you want to encode/pad to sequences longer than 512 you can either instantiate this tokenizer with `model_max_length` or pass `max_length` when encoding/padding.\n",
"- To avoid this warning, please instantiate this tokenizer with `model_max_length` set to your preferred value.\n",
" warnings.warn(\n"
]
}
],
"outputs": [],
"source": [
"t5_qgen = QGen(\"t5-base\", \"cpu\")"
]
},
{
"cell_type": "code",
"execution_count": 126,
"execution_count": 15,
"id": "45942810",
"metadata": {},
"outputs": [],
Expand All @@ -457,7 +453,7 @@
},
{
"cell_type": "code",
"execution_count": 127,
"execution_count": 16,
"id": "ab00e4fe",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -522,12 +518,12 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 17,
"id": "b6d76ae2",
"metadata": {},
"outputs": [],
"source": [
"## import cross encoder"
"from ragas.metrics.context_relevance import context_relavancy"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion src/ragas/metrics/answer_relevance.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
class QGen:
def __init__(self, model_name: str, device: str) -> None:
config = AutoConfig.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name)
self.tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=512)
if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = "[PAD]"
architecture = np.intersect1d(
Expand Down