# Training an Article Title Generation Model with Bart

## Install libraries and download the dataset

Load kaggle.json file.

In [1]:
!pip install datasets==2.21.0 transformers peft torch rouge-score nltk

Collecting datasets==2.21.0
  Downloading datasets-2.21.0-py3-none-any.whl.metadata (21 kB)
Collecting peft
  Downloading peft-0.12.0-py3-none-any.whl.metadata (13 kB)
Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting pyarrow>=15.0.0 (from datasets==2.21.0)
  Downloading pyarrow-17.0.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets==2.21.0)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets==2.21.0)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess (from datasets==2.21.0)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Downloading datasets-2.21.0-py3-none-any.whl (527 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m527.3/527.3 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pef

In [None]:
!kaggle

Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 5, in <module>
    from kaggle.cli import main
  File "/usr/local/lib/python3.10/dist-packages/kaggle/__init__.py", line 7, in <module>
    api.authenticate()
  File "/usr/local/lib/python3.10/dist-packages/kaggle/api/kaggle_api_extended.py", line 407, in authenticate
    raise IOError('Could not find {}. Make sure it\'s located in'
OSError: Could not find kaggle.json. Make sure it's located in /root/.config/kaggle. Or use the environment method. See setup instructions at https://github.com/Kaggle/kaggle-api/


In [None]:
!cp kaggle.json ~/.kaggle/kaggle.json

cp: cannot stat 'kaggle.json': No such file or directory


In [None]:
!kaggle datasets download -d fabiochiusano/medium-articles

Dataset URL: https://www.kaggle.com/datasets/fabiochiusano/medium-articles
License(s): CC0-1.0
Downloading medium-articles.zip to /content
100% 369M/369M [00:21<00:00, 22.7MB/s]
100% 369M/369M [00:21<00:00, 17.7MB/s]


In [2]:
!pip install accelerate -U



## Load the dataset

In [4]:
import transformers
from datasets import load_dataset, load_metric, Dataset,DatasetDict

In [5]:
medium_datasets = DatasetDict()

In [None]:
medium_datasets

DatasetDict({
    train: Dataset({
        features: ['title', 'text', 'url', 'authors', 'timestamp', 'tags'],
        num_rows: 192368
    })
})

## Dataset train/validation/test split

In [6]:
!git clone https://github.com/anukvma/group18_final_project.git

Cloning into 'group18_final_project'...
remote: Enumerating objects: 239, done.[K
remote: Counting objects: 100% (69/69), done.[K
remote: Compressing objects: 100% (63/63), done.[K
remote: Total 239 (delta 38), reused 13 (delta 6), pack-reused 170 (from 1)[K
Receiving objects: 100% (239/239), 4.29 MiB | 10.62 MiB/s, done.
Resolving deltas: 100% (123/123), done.


In [7]:
import os
import json
import pandas as pd

# Define the folder containing the text files
folder_path = '/content/group18_final_project/aiml_question_answers/AIML_QnA_Content/Group18_AIML_QA.csv'
df = pd.read_csv("/content/group18_final_project/aiml_question_answers/AIML_QnA_Content/Group18_AIML_QA.csv", names=['id','question','answer','unit'],encoding='unicode_escape',header=0)
df1 = pd.read_csv("/content/group18_final_project/aiml_question_answers/sampled_qa_data.csv", names=['id','question','answer','unit'],encoding='unicode_escape',header=0)
df = pd.concat([df, df1])


In [8]:
df.head()

Unnamed: 0,id,question,answer,unit
0,1.0,What is a linear classifier?,A linear classifier is a model that makes pred...,1.0
1,2.0,How does a linear classifier make predictions?,A linear classifier predicts by calculating th...,1.0
2,3.0,What is the objective function in a linear cla...,The objective function often used is the loss ...,1.0
3,4.0,What is gradient descent?,Gradient descent is an optimization algorithm ...,1.0
4,5.0,How does learning rate affect gradient descent?,The learning rate controls the step size in gr...,1.0


In [9]:
df.dropna(axis=0, inplace=True)

In [10]:
df.isna().sum()

Unnamed: 0,0
id,0
question,0
answer,0
unit,0


In [11]:
df = df.sample(frac=1).reset_index(drop=True)

In [12]:
train_dataset: Dataset = Dataset.from_pandas(df[:800])
validation_dataset: Dataset = Dataset.from_pandas(df[800:900])
test_dataset: Dataset = Dataset.from_pandas(df[900:])

In [13]:
train_dataset

Dataset({
    features: ['id', 'question', 'answer', 'unit'],
    num_rows: 800
})

In [14]:
# keep only a subsample of the datasets
medium_datasets["train"] = train_dataset
medium_datasets["validation"] = validation_dataset
medium_datasets["test"] = test_dataset


In [15]:
medium_datasets

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'answer', 'unit'],
        num_rows: 800
    })
    validation: Dataset({
        features: ['id', 'question', 'answer', 'unit'],
        num_rows: 100
    })
    test: Dataset({
        features: ['id', 'question', 'answer', 'unit'],
        num_rows: 127
    })
})

## Data preprocessing

In [16]:
import nltk
nltk.download('punkt')
import string
from transformers import GPTNeoForCausalLM, GPT2LMHeadModel, GPT2Tokenizer, Trainer, TrainingArguments
from peft import LoraConfig, get_peft_model
from datasets import load_dataset, Dataset

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


In [17]:
# model_name = "EleutherAI/gpt-neo-1.3B"  # or "EleutherAI/gpt-j-6B" for a larger model
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
text = "What is a linear classifier?"
# input_text = f"Question: {data['question']}\nAnswer:"
inputs = f"Question: {text} \n Answer:"
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
# Perform inference
outputs = model.generate(
    inputs['input_ids'],
    attention_mask=inputs['attention_mask'],
    max_length=256,
    # max_new_tokens=500,
    num_beams=8,
    early_stopping=True,
    repetition_penalty=.9
)
print(outputs)
# Decode the generated token IDs to text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Text:", generated_text)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


tensor([[ 2061,   318,   257, 14174,  1398,  7483,    30,   198,   198,    32,
         14174,  1398,  7483,   318,   257,  1398,  7483,   326,  2753,   257,
         14174,  1398,  7483,   290,  5860,   257, 14174,  1398,  7483,    13,
           198,   198,    32, 14174,  1398,  7483,   318,   257,  1398,  7483,
           326,  2753,   257, 14174,  1398,  7483,   290,  5860,   257, 14174,
          1398,  7483,    13,   198,   198,    32, 14174,  1398,  7483,   318,
           257,  1398,  7483,   326,  2753,   257, 14174,  1398,  7483,   290,
          5860,   257, 14174,  1398,  7483,    13,   198,   198,    32, 14174,
          1398,  7483,   318,   257,  1398,  7483,   326,  2753,   257, 14174,
          1398,  7483,   290,  5860,   257, 14174,  1398,  7483,    13,   198,
           198,    32, 14174,  1398,  7483,   318,   257,  1398,  7483,   326,
          2753,   257, 14174,  1398,  7483,   290,  5860,   257, 14174,  1398,
          7483,    13,   198,   198,    32, 14174,  

In [None]:
import torch
torch.cuda.empty_cache()

In [None]:
# create list of dictionaries
train_data = []; val_data1 = []
for index, row in df.iterrows():
	train_data += [{'question': row.iloc[0], 'answer' : row.iloc[1]}]
import json
with open('train_file.json', 'w') as file:
    json.dump(train_data, file)

In [None]:
# Tokenize the train dataset
from datasets import load_dataset
dataset = load_dataset('json', data_files = {'train':'train_file.json'})
tokenizer.pad_token = tokenizer.eos_token
def preprocess_function(examples):
    inputs = ['Q: ' + q + ' A:' + a + tokenizer.eos_token for q, a in zip(examples['question'], examples['answer'])]
    model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding="max_length")
    labels = model_inputs['input_ids'].copy()
    model_inputs['labels'] = labels
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

In [None]:
# Tokenize the train dataset
from datasets import load_dataset
dataset = load_dataset('json', data_files = {'train':'train_file.json'})
tokenizer.pad_token = tokenizer.eos_token
def preprocess_function(examples):
    inputs = ['Q: ' + q + ' A:' + a + tokenizer.eos_token for q, a in zip(examples['question'], examples['answer'])]
    model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding="max_length")
    labels = model_inputs['input_ids'].copy()
    model_inputs['labels'] = labels
    return model_inputs

tokenized_dataset = dataset.map(preprocess_function, batched=True)

In [None]:
prefix = "Answer the AIML question: "

max_input_length = 128
max_target_length = 128
tokenizer.pad_token= tokenizer.eos_token
def clean_text(text):
  sentences = nltk.sent_tokenize(text.strip())
  sentences_cleaned = [s for sent in sentences for s in sent.split("\n")]
  sentences_cleaned_no_titles = [sent for sent in sentences_cleaned
                                 if len(sent) > 0 and
                                 sent[-1] in string.punctuation]
  text_cleaned = "\n".join(sentences_cleaned_no_titles)
  return text_cleaned

def preprocess_data(examples):
  # texts_cleaned = [clean_text(text) for text in examples["question"]]
  # inputs = [prefix + text for text in texts_cleaned]

  inputs = [f"Question: {text} \n Answer:" for text in examples["question"]]
  model_inputs = tokenizer(inputs, padding='max_length',max_length=max_input_length, truncation=True)

  # Setup the tokenizer for targets
  with tokenizer.as_target_tokenizer():
    labels = tokenizer(examples["answer"], padding='max_length',max_length=max_target_length,
                       truncation=True)

  model_inputs["labels"] = labels["input_ids"]
  return model_inputs

def preprocess_function(examples):
    inputs = ['Q: ' + q + ' A:' + a + tokenizer.eos_token for q, a in zip(examples['question'], examples['answer'])]
    # print(examples['question'], examples['answer'])
    # for example in examples:
    #   print(example['question'])
    #   print(example['answer'])
    #   break
    model_inputs = tokenizer(inputs, max_length=256, truncation=True, padding="max_length")
    labels = model_inputs['input_ids'].copy()
    model_inputs['labels'] = labels
    return model_inputs

In [18]:
prefix = "Answer the AIML question: "

max_input_length = 128
max_target_length = 128
tokenizer.pad_token= tokenizer.eos_token
def clean_text(text):
  sentences = nltk.sent_tokenize(text.strip())
  sentences_cleaned = [s for sent in sentences for s in sent.split("\n")]
  sentences_cleaned_no_titles = [sent for sent in sentences_cleaned
                                 if len(sent) > 0 and
                                 sent[-1] in string.punctuation]
  text_cleaned = "\n".join(sentences_cleaned_no_titles)
  return text_cleaned

def preprocess_data(examples):
  texts_cleaned = [clean_text(text) for text in examples["question"]]
  inputs = [f"Question: {text}\nAnswer:" for text in examples["question"]]
  model_inputs = tokenizer(inputs, padding='max_length',max_length=max_input_length, truncation=True)

  # Setup the tokenizer for targets
  with tokenizer.as_target_tokenizer():
    labels = tokenizer(examples["answer"], padding='max_length',max_length=max_target_length,truncation=True)
  labels[labels == tokenizer.pad_token_id] = -100  # Ignore padding in the loss calculation
  model_inputs["labels"] = labels["input_ids"]
  print(model_inputs)
  return model_inputs

def preprocess(data):
    input_text = f"Question: {data['question']}\nAnswer:"
    output_text = f"{data['answer']}"

    # Tokenize input and output with same length and padding
    inputs = tokenizer(input_text, padding='max_length', max_length=128, truncation=True)
    outputs = tokenizer(output_text, padding='max_length', max_length=128, truncation=True)

    # Labels should be the same length as inputs, with padding tokens replaced by -100
    labels = outputs['input_ids']
    labels[labels == tokenizer.pad_token_id] = -100  # Ignore padding in the loss calculation

    inputs['labels'] = labels["input_ids"]

    return inputs

def preprocess_data1(examples):
    inputs = [q + "\n" + a for q, a in zip(examples['question'], examples['answer'])]
    model_inputs = tokenizer(inputs, max_length=max_input_length, truncation=True, padding="max_length")
    labels = model_inputs['input_ids'].copy()
    model_inputs['labels'] = labels
    return model_inputs


In [20]:
tokenized_datasets = medium_datasets.map(preprocess_data1, batched=True)
# tokenized_validation_datasets = validation_dataset.map(preprocess_function, batched=True)
tokenized_datasets

Map:   0%|          | 0/800 [00:00<?, ? examples/s]

Map:   0%|          | 0/100 [00:00<?, ? examples/s]

Map:   0%|          | 0/127 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'question', 'answer', 'unit', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 800
    })
    validation: Dataset({
        features: ['id', 'question', 'answer', 'unit', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 100
    })
    test: Dataset({
        features: ['id', 'question', 'answer', 'unit', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 127
    })
})

In [21]:
lora_config = LoraConfig(
    r=4,  # Rank of the low-rank adaptation matrix
    lora_alpha=16,  # Scaling factor for the low-rank adaptation
    lora_dropout=0.1,  # Dropout for regularization
    bias="none",  # No bias adjustment
    task_type="CAUSAL_LM"  # Task type for GPT-like models
)

# Apply LoRA to the GPT-Neo model
model = get_peft_model(model, lora_config)



## Fine-tune GPT


In [None]:
!rm -r {model_dir}

rm: cannot remove '{model_dir}': No such file or directory


In [22]:
training_args = TrainingArguments(
    output_dir="./gpt3-lora-qa",
    overwrite_output_dir=True,
    evaluation_strategy="steps",
    eval_steps=100,
    logging_strategy="steps",
    logging_steps=100,
    num_train_epochs=5,
    per_device_train_batch_size=2,  # Lower batch size
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,  # Adjust batch size based on GPU memory
    save_steps=500,
    save_total_limit=2,
    fp16=True,  # Use mixed precision training for efficiency
    report_to="none",
    dataloader_pin_memory=True
)



In [54]:
import numpy as np

rouge = load_metric("rouge")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred

    # Convert logits to token IDs by taking argmax along the vocabulary axis
    predictions = np.argmax(predictions, axis=-1)  # Get the index of the highest logit (token ID)

    decoded_preds = []
    decoded_labels = []

    for pred, label in zip(predictions, labels):
        # Decode the token IDs (skip special tokens)
        decoded_preds.append(tokenizer.decode(pred, skip_special_tokens=True))
        decoded_labels.append(tokenizer.decode(label, skip_special_tokens=True))

    # Now compute the ROUGE or other metrics
    rouge_scores = rouge.compute(predictions=decoded_preds, references=decoded_labels, use_stemmer=True)

    # return {k: v for k, v in rouge_scores.items()}
    rouge1 = rouge_scores['rouge1'].mid.fmeasure
    rouge2 = rouge_scores['rouge2'].mid.fmeasure
    rougeL = rouge_scores['rougeL'].mid.fmeasure
    rougeLsum = rouge_scores['rougeLsum'].mid.fmeasure
    print(rouge_scores)
    return {
        "rouge1": rouge1,
        "rouge2": rouge2,
        "rougeL": rougeL,
        "rougeLsum": rougeLsum
    }



In [55]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"],
    eval_dataset=tokenized_datasets["validation"],
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [56]:
trainer.train()

Step,Training Loss,Validation Loss,Rouge1,Rouge2,Rougel,Rougelsum
100,1.5613,1.344851,0.476642,0.168196,0.388193,0.421086
200,1.5157,1.325351,0.477955,0.170701,0.389714,0.421446
300,1.4949,1.314155,0.482422,0.173687,0.392187,0.425295
400,1.4845,1.308874,0.482999,0.176904,0.394567,0.426415
500,1.4761,1.306868,0.48233,0.177874,0.394122,0.427295


{'rouge1': AggregateScore(low=Score(precision=0.4741760726138594, recall=0.43612366295122323, fmeasure=0.45388645993105375), mid=Score(precision=0.4974305092244986, recall=0.4578021371972356, fmeasure=0.4766417438694638), high=Score(precision=0.5183414620986775, recall=0.4797479712333727, fmeasure=0.4981316217012568)), 'rouge2': AggregateScore(low=Score(precision=0.15534860529529015, recall=0.1432856642383184, fmeasure=0.14911395982489858), mid=Score(precision=0.1751911256445806, recall=0.16189357844994004, fmeasure=0.16819553249649252), high=Score(precision=0.1951102013067694, recall=0.18086848984651613, fmeasure=0.1877623825484693)), 'rougeL': AggregateScore(low=Score(precision=0.3855303977754696, recall=0.35632266188707123, fmeasure=0.3698770223453915), mid=Score(precision=0.4049291677933201, recall=0.3734099442337549, fmeasure=0.3881926095822361), high=Score(precision=0.42377500938464113, recall=0.39243751285252515, fmeasure=0.4070565339713551)), 'rougeLsum': AggregateScore(low=Sco

TrainOutput(global_step=500, training_loss=1.506501190185547, metrics={'train_runtime': 122.983, 'train_samples_per_second': 32.525, 'train_steps_per_second': 4.066, 'total_flos': 261745016832000.0, 'train_loss': 1.506501190185547, 'epoch': 5.0})

In [30]:
import torch

In [32]:
trainer.save_model()

In [28]:
device="cuda"

In [41]:
def ask_question(question):
    inputs = tokenizer.encode('Q: ' + question + ' A:', return_tensors='pt').to(device)
    attention_mask = torch.ones(inputs.shape, device=device)
    outputs = model.generate(inputs, attention_mask = attention_mask, max_new_tokens=100, num_return_sequences=1)
    gen_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    question, answer = gen_text.split(' A:')
    return question, answer

print(ask_question("What is the difference between concatenation vs. summation of two tensors?"))
print(ask_question("What are the other applications of unsupervised learning than clustering?"))
print(ask_question("What is the linear classifier?"))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Q: What is the difference between concatenation vs. summation of two tensors?', ' Concatenation is the process of combining two tensors. The sum of the tensors is the sum of the tensors.')


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


('Q: What are the other applications of unsupervised learning than clustering?', ' Unsupervised learning is a technique that allows you to learn a set of data sets from a set of data sets. It is a way to learn a set of data sets from a set of data sets.')
('Q: What is the linear classifier?', ' Linear classifiers are a way to represent a set of values in a linear way. They are used to represent a set of values in a linear way.')


## Load the model from GDrive

In [33]:
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load the tokenizer and model
model_name = "gpt3-lora-qa"
model_dir = f"/content/{model_name}"

tokenizer1 = AutoTokenizer.from_pretrained(model_dir)
model1 = AutoModelForCausalLM.from_pretrained(model_dir)


In [None]:


max_input_length = 128

In [40]:
text = "What is a linear classifier?"
# input_text = f"Question: {data['question']}\nAnswer:"
inputs = f"Q: {text} A: "
inputs = tokenizer1(inputs, return_tensors="pt", truncation=True, padding=True, max_length=128)
# Perform inference
outputs = model1.generate(
    inputs['input_ids'],
    attention_mask=inputs['attention_mask'],
    max_length=128,
    max_new_tokens=128,
    num_beams=8,
    early_stopping=True,
    repetition_penalty=2.0
)
print(outputs)
# Decode the generated token IDs to text
generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Generated Text:", generated_text)


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Both `max_new_tokens` (=128) and `max_length`(=128) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


tensor([[   48,    25,  1867,   318,   257, 14174,  1398,  7483,    30,   317,
            25,   220,  1849,    32, 14174,  1398,  7483,   318,   281, 11862,
           326,   460,   307,  5625,   284,   597,  2099,   286,  1366,    13,
           220,  1849,  1890,  1672,    11,   611,   345,   765,   284,  1064,
           262,  1271,   286,  1751,   287,   257,  5752,    11,   345,   460,
           779,   257, 14174,  1398,  7483,   588,   428,    25, 50256]])
Generated Text: Q: What is a linear classifier? A:  A linear classifier is an algorithm that can be applied to any type of data.  For example, if you want to find the number of children in a row, you can use a linear classifier like this:


## Upload the model to the Hugging Space Hub

https://huggingface.co/docs/transformers/model_sharing

In [42]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [47]:
model.push_to_hub(repo_id="anukvma/gpt-aiml-question-answer-v2")
tokenizer.push_to_hub(repo_id="anukvma/gpt-aiml-question-answer-v2")

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/593k [00:00<?, ?B/s]

No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/anukvma/gpt-aiml-question-answer-v2/commit/34b71eb5697db6baaf4fec9edae550e8915eb333', commit_message='Upload tokenizer', commit_description='', oid='34b71eb5697db6baaf4fec9edae550e8915eb333', pr_url=None, pr_revision=None, pr_num=None)

## Load the model from the Hugging Face Hub

## Evaluate the model on the test set

In [53]:

# pad texts to the same length
test_df = df[900:]
answers = []
predicts = []
for index, row in test_df.iterrows():
  question = row.iloc[1]
  answer = row.iloc[2]
  question_returned, prdicted_answer = ask_question(question)
  answers.append(answer)
  predicts.append(prdicted_answer)
answer_tokens = tokenizer(answers, padding='max_length', max_length=128, truncation=True)["input_ids"]

predicted_tokens = tokenizer(predicts, padding='max_length', max_length=128, truncation=True)["input_ids"]
predictions_labels = [answer_tokens, predicted_tokens]
compute_metrics(predictions_labels)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end gene

{'rouge1': AggregateScore(low=Score(precision=0.015748031496062992, recall=0.0003770320040640081, fmeasure=0.0007344033918837068), mid=Score(precision=0.047244094488188976, recall=0.0012343672713661215, fmeasure=0.0024033061554667085), high=Score(precision=0.08661417322834646, recall=0.0023131592315816206, fmeasure=0.004500823259705532)), 'rouge2': AggregateScore(low=Score(precision=0.0, recall=0.0, fmeasure=0.0), mid=Score(precision=0.0, recall=0.0, fmeasure=0.0), high=Score(precision=0.0, recall=0.0, fmeasure=0.0)), 'rougeL': AggregateScore(low=Score(precision=0.015748031496062992, recall=0.0003770320040640081, fmeasure=0.0007344033918837068), mid=Score(precision=0.047244094488188976, recall=0.001197457822547224, fmeasure=0.0023323956946964437), high=Score(precision=0.08661417322834646, recall=0.0022352911564375755, fmeasure=0.004346345111901092)), 'rougeLsum': AggregateScore(low=Score(precision=0.015748031496062992, recall=0.0003302424368006631, fmeasure=0.0006460730870179688), mid=

{'rougeL': 0.0023323956946964437, 'rougeLsum': 0.0023298887592146664}