In [1]:
!pip install datasets




In [2]:
import json
import pandas as pd
import torch
from transformers import (
	BertTokenizerFast,
        BertForQuestionAnswering,
        TrainingArguments,
        Trainer,
 )
from sklearn.model_selection import train_test_split
from datasets import Dataset, DatasetDict


In [4]:
data_path = "COVID-QA.json"
with open(data_path, "r") as f:
  data = json.load(f)


questions = []
answers = []
contexts = []

for entry in data['data']:
 for paragraph in entry['paragraphs']:
  context = paragraph['context']
  for qa in paragraph['qas']:
    questions.append(qa['question'])
    answers.append(qa['answers'][0]['text'])
    contexts.append(context)

df = pd.DataFrame({
  'question': questions,
  'answer': answers,
  'context': contexts
})


In [5]:
dataset = Dataset.from_pandas(df)
tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")

def tokenize(batch):
    tokenized_batch = tokenizer(
        batch["question"],
        batch["context"],
        max_length=512,
        padding="max_length",
        truncation=True,
        return_offsets_mapping=True,
        return_token_type_ids=True
    )

    answer_starts = []
    answer_ends = []

    for i, context in enumerate(batch["context"]):
        answer_start = context.find(batch["answer"][i])
        answer_end = answer_start + len(batch["answer"][i])
        answer_starts.append(answer_start)
        answer_ends.append(answer_end)

    tokenized_batch["answer_start"] = answer_starts
    tokenized_batch["answer_end"] = answer_ends

    return tokenized_batch

tokenized_dataset = dataset.map(tokenize, batched=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/2019 [00:00<?, ? examples/s]

In [6]:
def prepare_train_features(example):
    start_position = example["input_ids"].index(tokenizer.cls_token_id)
    end_position = example["input_ids"].index(tokenizer.sep_token_id)


    found_start = False
    found_end = False
    for i, (offset_start, offset_end) in enumerate(example["offset_mapping"]):
        if not found_start and offset_start == example["answer_start"]:
            start_position = i
            found_start = True
        if not found_end and offset_end == example["answer_end"]:
            end_position = i
            found_end = True
        if found_start and found_end:
            break


    if not found_start or not found_end:
        start_position = -1
        end_position = -1


    example["start_positions"] = start_position
    example["end_positions"] = end_position
    return example


prepared_dataset = tokenized_dataset.map(prepare_train_features, batched=False)


def filter_invalid_examples(example):
    return example["start_positions"] != -1 and example["end_positions"] != -1


filtered_dataset = prepared_dataset.filter(filter_invalid_examples, batched=False)


Map:   0%|          | 0/2019 [00:00<?, ? examples/s]

Filter:   0%|          | 0/2019 [00:00<?, ? examples/s]

In [7]:
train_indices, eval_indices = train_test_split(list(range(len(filtered_dataset))), test_size=0.1, random_state=42)
train_dataset = filtered_dataset.select(train_indices)
eval_dataset = filtered_dataset.select(eval_indices)


def convert_to_tensors(example):
    example["input_ids"] = torch.tensor(example["input_ids"], dtype=torch.long)
    example["attention_mask"] = torch.tensor(example["attention_mask"], dtype=torch.long)
    return example


train_dataset = train_dataset.map(convert_to_tensors)
eval_dataset = eval_dataset.map(convert_to_tensors)


dataset_dict = DatasetDict({"train": train_dataset, "eval": eval_dataset})


Map:   0%|          | 0/252 [00:00<?, ? examples/s]

Map:   0%|          | 0/29 [00:00<?, ? examples/s]

In [None]:
pip install transformers[torch]

In [None]:
pip install accelerate -U

In [13]:
pip install transformers[torch] -U

Collecting transformers[torch]
  Downloading transformers-4.40.2-py3-none-any.whl (9.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.0/9.0 MB[0m [31m25.1 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from transformers[torch])
  Downloading accelerate-0.30.0-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.4/302.4 kB[0m [31m26.4 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->transformers[torch])
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch-

In [9]:
pip install wandb

Collecting wandb
  Downloading wandb-0.17.0-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m18.7 MB/s[0m eta [36m0:00:00[0m
Collecting docker-pycreds>=0.4.0 (from wandb)
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitpython!=3.1.29,>=1.0.0 (from wandb)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m12.2 MB/s[0m eta [36m0:00:00[0m
Collecting sentry-sdk>=1.0.0 (from wandb)
  Downloading sentry_sdk-2.1.1-py2.py3-none-any.whl (277 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m277.3/277.3 kB[0m [31m25.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting setproctitle (from wandb)
  Downloading setproctitle-1.3.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86

In [11]:
###############################################################
model = BertForQuestionAnswering.from_pretrained("bert-base-uncased")

from transformers import BertModel, BertConfig

# Load the existing BERT configuration
config = BertConfig.from_pretrained("bert-base-uncased")

# Add additional layers to the existing BERT architecture
config.num_hidden_layers = 6  # Increase the number of transformer layers
config.intermediate_size = 2034  # Increase the size of the intermediate layer in each transformer block
config.num_attention_heads = 6  # You can also adjust the number of attention heads

# Initialize the BERT model with the modified configuration
model = BertForQuestionAnswering(config)

training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=3,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    fp16=False,
    load_best_model_at_end=True,
    report_to="wandb",  # Enable logging to W&B
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset_dict["train"],
    eval_dataset=dataset_dict["eval"],
)

trainer.train()


Some weights of BertForQuestionAnswering were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,6.0469,5.926538
2,5.3649,6.134736
3,4.895,7.830318


TrainOutput(global_step=756, training_loss=5.18636529407804, metrics={'train_runtime': 2141.6929, 'train_samples_per_second': 0.353, 'train_steps_per_second': 0.353, 'total_flos': 76542411939840.0, 'train_loss': 5.18636529407804, 'epoch': 3.0})

In [None]:
from transformers import BertForQuestionAnswering, TrainingArguments, Trainer
from transformers import BertConfig
from datasets import DatasetDict
from sklearn.model_selection import train_test_split
import torch
import optuna

# Define a function to train and evaluate the model with given hyperparameters
def objective(trial):
    # Sample hyperparameters
    learning_rate = trial.suggest_float("learning_rate", 1e-5, 5e-5, log=True)
    per_device_train_batch_size = trial.suggest_categorical("per_device_train_batch_size", [8, 16])
    num_train_epochs = trial.suggest_int("num_train_epochs", 2, 4)

    # Initialize the BERT model with the modified configuration
    config = BertConfig.from_pretrained("bert-base-uncased")
    model = BertForQuestionAnswering(config)

    training_args = TrainingArguments(
        output_dir='./results',
        num_train_epochs=num_train_epochs,
        per_device_train_batch_size=per_device_train_batch_size,
        per_device_eval_batch_size=3,
        warmup_steps=500,
        weight_decay=0.01,
        logging_dir='./logs',
        logging_steps=10,
        evaluation_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=2,
        fp16=False,
        load_best_model_at_end=True,
        report_to="wandb",  # Enable logging to W&B
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset_dict["train"],
        eval_dataset=dataset_dict["eval"],
    )

    # Train the model
    trainer.train()

    # Evaluate the model
    # Here you can compute your evaluation metric (e.g., ROUGE scores)
    # For simplicity, let's assume we have a dummy evaluation metric
    eval_metric = trainer.evaluate(eval_dataset=dataset_dict["eval"])["eval_loss"]

    return eval_metric

# Define the study object for Optuna
study = optuna.create_study(direction="minimize")

# Start the hyperparameter optimization
study.optimize(objective, n_trials=10)  # You can adjust the number of trials

# Get the best hyperparameters
best_params = study.best_params
print("Best Hyperparameters:", best_params)

# Use the best hyperparameters to train the final model
config = BertConfig.from_pretrained("bert-base-uncased")
best_model = BertForQuestionAnswering(config)

best_training_args = TrainingArguments(
    output_dir='./best_results',
    num_train_epochs=best_params["num_train_epochs"],
    per_device_train_batch_size=best_params["per_device_train_batch_size"],
    per_device_eval_batch_size=3,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./best_logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=2,
    fp16=False,
    load_best_model_at_end=True,
    report_to="wandb",  # Enable logging to W&B
)

best_trainer = Trainer(
    model=best_model,
    args=best_training_args,
    train_dataset=dataset_dict["train"],
    eval_dataset=dataset_dict["eval"],
)

# Train the final model with the best hyperparameters
best_trainer.train()


[I 2024-05-08 19:18:20,645] A new study created in memory with name: no-name-a487e2f7-dcb6-4175-bfe2-2ef1553215b0
[34m[1mwandb[0m: Currently logged in as: [33mmahatp77[0m ([33mnlpmaha[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [19]:
pip install optuna


Collecting optuna
  Downloading optuna-3.6.1-py3-none-any.whl (380 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m380.1/380.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting alembic>=1.5.0 (from optuna)
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.4/233.4 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting colorlog (from optuna)
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting Mako (from alembic>=1.5.0->optuna)
  Downloading Mako-1.3.3-py3-none-any.whl (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.8/78.8 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: Mako, colorlog, alembic, optuna
Successfully installed Mako-1.3.3 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.1


In [12]:
model.save_pretrained("trained_model12")
model = BertForQuestionAnswering.from_pretrained("trained_model12")

In [None]:
def get_answer(question, context):
    inputs = tokenizer.encode_plus(question, context, return_tensors="pt")
    start_logits, end_logits = model(**inputs).values()


    start_index_and_logits = torch.argmax(start_logits, dim=1).item(), start_logits[0].max().item()
    end_index_and_logits = torch.argmax(end_logits, dim=1).item(), end_logits[0].max().item()


    if end_index_and_logits[0] >= start_index_and_logits[0]:
        start_index, end_index = start_index_and_logits[0], end_index_and_logits[0]
    else:
        if start_index_and_logits[1] > end_index_and_logits[1]:
            start_index, end_index = start_index_and_logits[0], start_index_and_logits[0]
        else:
            start_index, end_index = end_index_and_logits[0], end_index_and_logits[0]


    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][start_index:end_index+1]))
    return answer


In [None]:
question1 = "What is the period of the novel coronavirus?"
context1 = "The novel coronavirus, also known as COVID-19, has an incubation period ranging from 1 to 14 days, with the majority of cases showing symptoms around 5 days after exposure."
answer1 = get_answer(question1, context1)
print("Answer 1:", answer1)


Answer 1: 1 to 14 days


In [None]:
question2 = "What are the common symptoms of COVID-19?"
context2 = "COVID-19 symptoms can vary widely and may include fever, cough, shortness of breath, fatigue, body aches, and loss of taste or smell. Some people may also experience gastrointestinal symptoms like nausea, vomiting, and diarrhea."
answer2 = get_answer(question2, context2)
print("Answer 2:", answer2)


Answer 2: fever, cough, shortness of breath, fatigue, body aches, and loss of taste or smell. some people may also experience gastrointestinal symptoms like nausea, vomiting, and diarrhea.


In [None]:
question3 = "does Covid-19 cause taste loss?"
context3 = "COVID-19 symptoms can vary widely and may include fever, cough, shortness of breath, fatigue, body aches, and loss of taste or smell. Some people may also experience gastrointestinal symptoms like nausea, vomiting, and diarrhea."
answer3 = get_answer(question3, context3)
print("Answer 3:", answer3)


Answer 3: gastrointestinal symptoms like nausea, vomiting, and diarrhea. [SEP]


### **RROOUGGE STARTS  HERE**


In [16]:
pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=72553947ffe25045ffe1664c41ea9112fb60b15e8e155ff3296087d6aae2d71d
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [13]:
def get_answer(question, context):
    inputs = tokenizer.encode_plus(question, context, return_tensors="pt", max_length=512, truncation=True)
    input_ids = inputs["input_ids"]
    token_type_ids = inputs["token_type_ids"]
    attention_mask = inputs["attention_mask"]

    start_logits, end_logits = model(input_ids=input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask).values()

    start_index = torch.argmax(start_logits, dim=1).item()
    end_index = torch.argmax(end_logits, dim=1).item()

    answer = tokenizer.decode(input_ids[0][start_index:end_index+1])

    return answer

predictions = []

# Generate predictions for the evaluation dataset
for example in eval_dataset:
    question = example["question"]
    context = example["context"]
    prediction = get_answer(question, context)
    predictions.append(prediction)
print(predictions)


['', '', '', '', '', 'hcov ‐ 229e ( 28 % ), hcov ‐ nl63 ( 22 % ), and hcov ‐ hku1 ( 16 % ). we did not observe species ‐ specific differences in the clinical characteristics of hcov infection, with the exception of hcov ‐ hku1, for which the severity of gastrointestinal symptoms trended higher on the fourth day of illness. text : clinical manifestations of human coronavirus ( hcov ) infection range from a mild, self - limiting illness of the upper respiratory tract to an acute respiratory distress syndrome with a high mortality rate. highly virulent species of hcov were responsible for outbreaks of severe acute respiratory syndrome ( sars ) and middle east respiratory syndrome ( mers ) ; case - fatality rates ranged from 14 % to 45 %. [ 1 ] [ 2 ] [ 3 ] by contrast, other hcov species ( hcov - hku1, hcov - oc43, hcov - nl63, and hcov - 229e )', 'therefore, it is necessary to develop an effective oral vaccine against pedv infection. currently, bacillus subtilis as recombinant vaccine car

In [14]:
reference_answers = []

# Extract reference answers from the evaluation dataset
for example in eval_dataset:
    reference_answer = example["answer"]
    reference_answers.append(reference_answer)

print(reference_answers)


['mutational pressure and translational selection', '53%', 'the prevalence of different respiratory viral infections in causing exacerbations in chronic airway inflammatory diseases.', 'zoonotic diseases', 'the antiproliferative effect of a copper (II) complex on HT-29 colon cancer cells', 'ranged from 14% to 45%', 'mucosal', 'With the threat of an emerging global pandemic, the peculiar problems associated with the more immediate and seasonal epidemics warrant the development of an effective vaccine.', 'fecal–oral contact', 'over 250 000', 'to evaluate the effectiveness of zinc supplementation on diarrhea and average daily weight gain (ADG) in pre-weaned dairy calves', 'Lassa and Lujo viruses', 'across the Arabian Peninsula and in parts of Africa', 'infectious bronchitis virus (IBV)', '≤14 years old', 'due to displacement, crowded housing, malnutrition, inadequate water, sanitation, and hygiene (WASH) tools, and stigmatization', 'nonenveloped, single-stranded, positive-sense RNA', "des

In [17]:
from rouge_score import rouge_scorer

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)

# Calculate ROUGE scores for each prediction-reference pair
rouge_scores = []
for prediction, reference in zip(predictions, reference_answers):
    scores = scorer.score(prediction, reference)
    rouge_scores.append(scores)

# Compute average ROUGE scores
average_rouge1_f1 = sum(score['rouge1'].fmeasure for score in rouge_scores) / len(rouge_scores)
average_rougeL_f1 = sum(score['rougeL'].fmeasure for score in rouge_scores) / len(rouge_scores)

print("Average ROUGE-1 F1 Score:", average_rouge1_f1)
print("Average ROUGE-L F1 Score:", average_rougeL_f1)


Average ROUGE-1 F1 Score: 0.052358044959519505
Average ROUGE-L F1 Score: 0.05009688045980216


In [None]:
pip install bert_score

Collecting bert_score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: bert_score
Successfully installed bert_score-0.3.13
