In [3]:
!pip install datasets transformers
!pip install evaluate
!pip install rouge_score
!pip install -U bitsandbytes
!pip install faiss-cpu 
!pip install langchain
!pip install langchain_community

Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading fsspec-2025.3.0-py3-none-any.whl (193 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: fsspec
  Attempting uninstall: fsspec
    Found existing installation: fsspec 2025.3.2
    Uninstalling fsspec-2025.3.2:
      Successfully uninstalled fsspec-2025.3.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cesium 0.12.4 requires numpy<3.0,>=2.0, but you have numpy 1.26.4 which is incompatible.
bigframes 1.42.0 requires rich<14,>=12.4.4, but you have rich 14.0.0 which is incompatible.
torch 2.6.0+cu124 requires nvidia-cublas-cu12==12.4.5.8; platform_system == "Linux" and platform_machine =

In [23]:
from datasets import load_dataset
from transformers import PreTrainedTokenizerBase
from transformers import AutoTokenizer, DPRContextEncoder,DPRContextEncoderTokenizerFast, AutoModelForQuestionAnswering
from transformers import TrainingArguments, Trainer, pipeline
from transformers import RagTokenizer, RagRetriever, RagSequenceForGeneration, pipeline, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from datasets import DatasetDict
import evaluate
import torch
import faiss
from datasets import Dataset
from langchain.schema    import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings   import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms         import HuggingFacePipeline
from langchain.chains       import RetrievalQA
from langchain.prompts import PromptTemplate
import math
import re

## Helper functions

In [10]:
def extract_ans(raw_str: str) -> str:
    m = re.search(r"<ans>(.*?)</ans>", raw_str, flags=re.S)
    return m.group(1).strip() if m else raw_str.strip()

In [11]:
def preprocess(examples, tokenizer: PreTrainedTokenizerBase):
    tokenized = tokenizer(
        examples["question"],
        examples["context"],
        truncation="only_second",
        max_length=256,
        stride=128,
        return_overflowing_tokens=True,
        return_offsets_mapping=True,
        padding="max_length",
    )
    sample_mapping = tokenized.pop("overflow_to_sample_mapping")
    offset_mapping = tokenized.pop("offset_mapping")
    start_positions = []
    end_positions = []
    for i, offsets in enumerate(offset_mapping):
        sample_idx = sample_mapping[i]
        answers = examples["answers"][sample_idx]
        seq_ids = tokenized.sequence_ids(i)
        cls_index = 0
        start_positions.append(cls_index)
        end_positions.append(cls_index)
        if len(answers["answer_start"]) == 0:
            continue
        start_char = answers["answer_start"][0]
        end_char = start_char + len(answers["text"][0])
        context_tokens = [idx for idx, s_id in enumerate(seq_ids) if s_id == 1]
        if not context_tokens:
            continue
        chunk_start_char = offsets[context_tokens[0]][0]
        chunk_end_char = offsets[context_tokens[-1]][1]
        if not (chunk_start_char <= start_char and end_char <= chunk_end_char):
            continue
        token_start_index = cls_index
        token_end_index = cls_index
        for idx in context_tokens:
            off_start, off_end = offsets[idx]
            if off_start <= start_char < off_end:
                token_start_index = idx
            if off_start < end_char <= off_end:
                token_end_index = idx
        start_positions[-1] = token_start_index
        end_positions[-1] = token_end_index
    tokenized["start_positions"] = start_positions
    tokenized["end_positions"] = end_positions
    return tokenized


In [12]:
def print_metrics(ds, custom_pipeline):
    outputs = custom_pipeline(question=ds["question"], context=ds["context"])
    
    preds = [
        {"id": ex["id"], "prediction_text": out["answer"]}
        for ex, out in zip(ds, outputs)
    ]
    refs = [
        {"id": ex["id"], "answers": ex["answers"]}
        for ex in ds
    ]
    
    results = metric.compute(predictions=preds, references=refs)
    print(f"EM: {results['exact_match']:.2f}, F1: {results['f1']:.2f}")

## Dataset loading

In [13]:
raw = load_dataset("squad")
sub = raw["train"].shuffle(seed=42).select(range(20000))
split = sub.train_test_split(test_size=0.2, seed=42)

test_subset = raw["validation"].shuffle(seed=42).select(range(2_000))
data = DatasetDict({
    "train":      split["train"],
    "validation": split["test"],
    "test":       test_subset
})

README.md:   0%|          | 0.00/7.62k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/14.5M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/1.82M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/87599 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/10570 [00:00<?, ? examples/s]

## Experiment 1

In [None]:
model_name = "distilbert-base-uncased-distilled-squad"
tokenizer  = AutoTokenizer.from_pretrained(model_name)
model_distilbert  = AutoModelForQuestionAnswering.from_pretrained(model_name)

In [None]:
tokenized = data.map(
    lambda ex: preprocess(ex, tokenizer),
    batched=True,
    remove_columns=raw["train"].column_names
)

In [None]:
args = TrainingArguments(
  output_dir="distilbert-qa",
  per_device_train_batch_size=128,
  per_device_eval_batch_size=128,
  num_train_epochs=10,
  learning_rate=3e-5,
  weight_decay=0.01,
  logging_steps=200,
  log_level="info",
  report_to=["none"],       
  disable_tqdm=False,      
  fp16=True if torch.cuda.is_available() else False,            
)

In [None]:
trainer_full = Trainer(
  model=model_distilbert,
  args=args,
  train_dataset=tokenized["train"],
  eval_dataset =tokenized["validation"],
  processing_class=tokenizer,
  compute_metrics=compute_metrics
)
trainer_full.train()

In [None]:
metric = evaluate.load("squad")
squad  = evaluate.load("squad")
rouge  = evaluate.load("rouge")
bleu   = evaluate.load("bleu")
qa = pipeline(
    "question-answering",
    model="/kaggle/working/distilbert-qa/checkpoint-1740",
    tokenizer="/kaggle/working/distilbert-qa/checkpoint-1740",
    device=0,
)

ds = data["test"]

print_metrics(ds, qa)

## Experiment 2

In [None]:
model_partial = AutoModelForQuestionAnswering.from_pretrained(model_name)

#### Partial fine tuning

In [None]:
for param in model_partial.distilbert.parameters():
    param.requires_grad = False


In [None]:
trainer_partial = Trainer(
    model=model_partial,
    args=args_partial,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["validation"],
    tokenizer=tokenizer,
)
trainer_partial.train()

In [None]:
qa_partial = pipeline(
    "question-answering",
    model="distilbert-qa-partial/checkpoint-417",  
    tokenizer="distilbert-qa-partial/checkpoint-417",
    device=0 if torch.cuda.is_available() else -1,
)


print_metrics(data["test"], qa_partial)

## Experiment 3

In [14]:
contexts = raw["train"]["context"] + raw["validation"]["context"]
unique   = list(dict.fromkeys(contexts))
docs     = [Document(page_content=c) for c in unique]


#we need smaller pieces to fit the model 
#also it improves semantic search accuracy as smaller chunks allow the faiss index to capture a more specific meaning 
#chunk overlap preserves context
splitter    = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
split_docs  = splitter.split_documents(docs)


#Building an embedding for each chunk
embeddings = HuggingFaceEmbeddings() 
#Where we store the chunk and the corresponding embedding
#Faiss index contains vectors 
db         = FAISS.from_documents(split_docs, embeddings)
#What will help us retrieve the 4 most relevant using the index inialized in the previous line
retriever  = db.as_retriever(search_kwargs={"k": 4})

  embeddings = HuggingFaceEmbeddings()
  embeddings = HuggingFaceEmbeddings()


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.4k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [7]:


MODEL_ID = "google/flan-t5-large"          # swap to any row above

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)

model = AutoModelForSeq2SeqLM.from_pretrained(
    MODEL_ID,
    device_map="auto",                  
                     
    # torch_dtype=torch.float16           # fp16 on GPU chunks
)

qa_pipe = pipeline(
    task="text2text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=512,
    max_new_tokens=64,
)

llm = HuggingFacePipeline(pipeline=qa_pipe)

model.safetensors:   0%|          | 0.00/3.13G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Device set to use cuda:0
  llm = HuggingFacePipeline(pipeline=qa_pipe)


In [15]:
zero_shot_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "{context}\n\n"
        "Question: {question}\n"
        "Answer:"
    ),
)

zero_shot_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": zero_shot_prompt},
)

In [16]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

cot_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=(
        "Use the following passages to answer the question.\n"
        "First think step-by-step, then give the final answer on a new line "
        "wrapped in <ans>…</ans>.\n\n"
        "Passages:\n{context}\n\n"
        "Question: {question}\n\n"
        "Step-by-step reasoning:\n"
        "1. "
    ),
)

cot_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",          # or "map_reduce" if you prefer
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": cot_prompt},
)


In [24]:
def print_chain_metrics(chain, data):
    preds, refs = [], []
    rouge_hyp, rouge_ref, bleu_refs = [], [], []

    total = len(data["test"])
    for idx, ex in enumerate(data["test"], 1):
        print(f"[{idx}/{total}] Processing QID={ex['id']}…")

        # ↘ call the chain with a dict and extract the answer text
        raw = chain({"query": ex["question"]})["result"]
        text = extract_ans(raw)                     # <── NEW

        preds.append({"id": ex["id"], "prediction_text": text})
        refs.append({
            "id": ex["id"],
            "answers": {
                "text": ex["answers"]["text"],
                "answer_start": ex["answers"]["answer_start"]
            }
        })
        rouge_hyp.append(text)
        rouge_ref.append(ex["answers"]["text"][0])
        bleu_refs.append(ex["answers"]["text"])

        print(f"[{idx}/{total}] Done.\n")

    print("All examples processed — now computing metrics…\n")

    s_res = squad.compute(predictions=preds, references=refs)
    r_res = rouge.compute(predictions=rouge_hyp, references=rouge_ref)
    b_res = bleu.compute(predictions=rouge_hyp, references=bleu_refs)

    print(f"EM:           {s_res['exact_match']:.2f}")
    print(f"F1:           {s_res['f1']:.2f}")
    print(f"ROUGE-L F1:   {r_res['rougeL'] * 100:.2f}")
    print(f"BLEU:         {b_res['bleu'] * 100:.2f}")


In [None]:
squad  = evaluate.load("squad")
rouge  = evaluate.load("rouge")
bleu   = evaluate.load("bleu")
print_chain_metrics(cot_chain, data)

Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[1/2000] Processing QID=572759665951b619008f8884…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[1/2000] Done.

[2/2000] Processing QID=57296de03f37b3190047839e…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[2/2000] Done.

[3/2000] Processing QID=5726d4a45951b619008f7f6c…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[3/2000] Done.

[4/2000] Processing QID=572843304b864d1900164848…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[4/2000] Done.

[5/2000] Processing QID=56d729180d65d21400198427…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[5/2000] Done.

[6/2000] Processing QID=57274beff1498d1400e8f5e6…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[6/2000] Done.

[7/2000] Processing QID=57274d905951b619008f87e3…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[7/2000] Done.

[8/2000] Processing QID=56bf1ae93aeaaa14008c951b…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[8/2000] Done.

[9/2000] Processing QID=57264d9edd62a815002e8100…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[9/2000] Done.

[10/2000] Processing QID=5725c41eec44d21400f3d50b…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[10/2000] Done.

[11/2000] Processing QID=56e20a3ae3433e140042324d…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[11/2000] Done.

[12/2000] Processing QID=572f59b4a23a5019007fc58a…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[12/2000] Done.

[13/2000] Processing QID=5727678e5951b619008f8973…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[13/2000] Done.

[14/2000] Processing QID=572867543acd2414000df9a1…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[14/2000] Done.

[15/2000] Processing QID=57275f6ef1498d1400e8f709…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[15/2000] Done.

[16/2000] Processing QID=5730b255396df919000962b4…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[16/2000] Done.

[17/2000] Processing QID=57298ef11d0469140077952f…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[17/2000] Done.

[18/2000] Processing QID=56f86966aef2371900626053…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[18/2000] Done.

[19/2000] Processing QID=5728d5793acd2414000dffb7…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[19/2000] Done.

[20/2000] Processing QID=572684365951b619008f7540…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[20/2000] Done.

[21/2000] Processing QID=570d26efb3d812140066d493…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[21/2000] Done.

[22/2000] Processing QID=57378c9b1c456719005744a9…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[22/2000] Done.

[23/2000] Processing QID=57286bb84b864d19001649c8…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[23/2000] Done.

[24/2000] Processing QID=5727f746ff5b5019007d9960…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[24/2000] Done.

[25/2000] Processing QID=5706074552bb8914006897d6…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[25/2000] Done.

[26/2000] Processing QID=5728f50baf94a219006a9e57…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[26/2000] Done.

[27/2000] Processing QID=57309ef18ab72b1400f9c601…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[27/2000] Done.

[28/2000] Processing QID=5727ec062ca10214002d99b9…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[28/2000] Done.

[29/2000] Processing QID=56d99179dc89441400fdb56d…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[29/2000] Done.

[30/2000] Processing QID=5711607f2419e314009555cf…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[30/2000] Done.

[31/2000] Processing QID=57266a15f1498d1400e8defa…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[31/2000] Done.

[32/2000] Processing QID=570953a7efce8f15003a7e02…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[32/2000] Done.

[33/2000] Processing QID=56e1239acd28a01900c67641…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[33/2000] Done.

[34/2000] Processing QID=57302efe04bcaa1900d772f9…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[34/2000] Done.

[35/2000] Processing QID=5726e37ef1498d1400e8eed8…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[35/2000] Done.

[36/2000] Processing QID=5725d662ec44d21400f3d688…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[36/2000] Done.

[37/2000] Processing QID=57266e72f1498d1400e8df8d…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[37/2000] Done.

[38/2000] Processing QID=56e74faf00c9c71400d76f95…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[38/2000] Done.

[39/2000] Processing QID=5729d51d3f37b31900478592…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[39/2000] Done.

[40/2000] Processing QID=5728f2e26aef051400154898…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[40/2000] Done.

[41/2000] Processing QID=5729efab3f37b319004785cf…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[41/2000] Done.

[42/2000] Processing QID=57266783f1498d1400e8de8a…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[42/2000] Done.

[43/2000] Processing QID=56e0c0c7231d4119001ac379…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[43/2000] Done.

[44/2000] Processing QID=56d70d240d65d21400198327…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[44/2000] Done.

[45/2000] Processing QID=5733d3cb4776f419006612ed…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[45/2000] Done.

[46/2000] Processing QID=5705f7c875f01819005e77df…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[46/2000] Done.

[47/2000] Processing QID=572680865951b619008f74e9…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[47/2000] Done.

[48/2000] Processing QID=5725d79e89a1e219009abf90…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[48/2000] Done.

[49/2000] Processing QID=570603c475f01819005e7884…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[49/2000] Done.

[50/2000] Processing QID=57264fe65951b619008f6fa4…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[50/2000] Done.

[51/2000] Processing QID=56d98f0ddc89441400fdb559…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[51/2000] Done.

[52/2000] Processing QID=56de0ed14396321400ee257a…
[52/2000] Done.

[53/2000] Processing QID=572649d8f1498d1400e8db39…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[53/2000] Done.

[54/2000] Processing QID=57264684708984140094c127…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[54/2000] Done.

[55/2000] Processing QID=570953a7efce8f15003a7dff…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[55/2000] Done.

[56/2000] Processing QID=56beb3a03aeaaa14008c9248…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[56/2000] Done.

[57/2000] Processing QID=5726a299dd62a815002e8b9e…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[57/2000] Done.

[58/2000] Processing QID=57097051ed30961900e84134…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[58/2000] Done.

[59/2000] Processing QID=5705e3f252bb89140068966c…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[59/2000] Done.

[60/2000] Processing QID=5725e1c4271a42140099d2d7…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[60/2000] Done.

[61/2000] Processing QID=57332442d058e614000b5723…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[61/2000] Done.

[62/2000] Processing QID=572906e23f37b31900477f8e…


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[62/2000] Done.

[63/2000] Processing QID=5728455bff5b5019007da078…


In [22]:
print_chain_metrics(zero_shot_chain, data)

Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


[1/2000] Processing QID=572759665951b619008f8884…


NameError: name 're' is not defined

In [91]:
# N = 5   # how many examples to inspect

# for i in range(N):
#     ex       = data["test"][i]           # one SQuAD example
#     question = ex["question"]

#     # ❱❱  feed the question as {"query": ...}
#     res = cot_chain({"query": question})   # or {"question": question} if you changed input_key
#     raw = res["result"]                    # chain output (reasoning + answer)
#     final = raw.split("Final Answer:")[-1].strip()

#     print(f"\n=== Example {i+1}/{N} ===")
#     print("QUESTION:", question)
#     print("RAW COT OUTPUT:\n", raw)
#     print("EXTRACTED ANSWER:", final)
#     print("GOLD ANSWERS:", ex["answers"]["text"])

Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)
Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



=== Example 1/5 ===
QUESTION: In what year did Massachusetts first require children to be educated in schools?
RAW COT OUTPUT:
 1852. So the final answer is 1852.
EXTRACTED ANSWER: 1852. So the final answer is 1852.
GOLD ANSWERS: ['1852', '1852', '1852']


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



=== Example 2/5 ===
QUESTION: When were stromules discovered?
RAW COT OUTPUT:
 The first observed in 1962. 2. In 1665. 3. In living plant tissue.
EXTRACTED ANSWER: The first observed in 1962. 2. In 1665. 3. In living plant tissue.
GOLD ANSWERS: ['1962', '1962', '1962']


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



=== Example 3/5 ===
QUESTION: Which artist who had a major influence on the Gothic Revival is represented in the V&A's British galleries?
RAW COT OUTPUT:
 Horace Walpole is a major influence on the Gothic Revival. So, the final answer is Horace Walpole.
EXTRACTED ANSWER: Horace Walpole is a major influence on the Gothic Revival. So, the final answer is Horace Walpole.
GOLD ANSWERS: ['Horace Walpole', 'Horace Walpole', 'Horace Walpole']


Both `max_new_tokens` (=64) and `max_length`(=512) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)



=== Example 4/5 ===
QUESTION: In 1890, who did the university decide to team up with?
RAW COT OUTPUT:
 The American Baptist Education Society was founded in 1890 by the American Baptist Education Society and a donation from oil magnate and philanthropist John D. Rockefeller on land donated by Marshall Field. So, the final answer is the American Baptist Education Society.
EXTRACTED ANSWER: The American Baptist Education Society was founded in 1890 by the American Baptist Education Society and a donation from oil magnate and philanthropist John D. Rockefeller on land donated by Marshall Field. So, the final answer is the American Baptist Education Society.
GOLD ANSWERS: ['several regional colleges and universities', 'Des Moines College, Kalamazoo College, Butler University, and Stetson University', 'Des Moines College, Kalamazoo College, Butler University, and Stetson University', 'Des Moines College, Kalamazoo College, Butler University, and Stetson University']

=== Example 5/5 ===
QU