In [2]:
pip install transformers



In [3]:
!pip install transformers rouge-score nltk

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24933 sha256=a132478f6a96f2059bf109e9e13405c9deb0abfa49c47ad1f4eda9838ba18d0a
  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [4]:
!pip install transformers rouge-score nltk bert-score

Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.8 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.0.0->bert-score)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.0.0->bert-score)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.0.0->bert-score)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.0.0->bert-score)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.0.0->bert-score)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.

In [13]:
import json
from transformers import T5ForConditionalGeneration, T5Tokenizer, BertForQuestionAnswering, BertTokenizer
from google.colab import files
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
import bert_score
import torch

In [14]:
# Upload the file
uploaded = files.upload()

Saving NuclearDataset.json to NuclearDataset (1).json


In [15]:
# Assume the uploaded file is named 'input.json'
input_file = list(uploaded.keys())[0]

In [16]:
# Load the dataset from the uploaded JSON file
with open(input_file, "r") as f:
    data = json.load(f)

In [17]:
# Load pre-trained models for question generation and answering
question_generator_model = T5ForConditionalGeneration.from_pretrained("t5-large")
question_generator_tokenizer = T5Tokenizer.from_pretrained("t5-large")
qa_model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
qa_tokenizer = BertTokenizer.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [18]:
# Initialize the Rouge scorer
scorer = rouge_scorer.RougeScorer(['rougeL'], use_stemmer=True)

In [19]:
output_data = []

In [20]:
# Function to generate a question
def generate_question(context, temperature=1.0, top_k=50):
    input_ids = question_generator_tokenizer.encode(f"generate question: {context}", return_tensors="pt")
    outputs = question_generator_model.generate(input_ids, max_length=50, num_beams=5, temperature=temperature, top_k=top_k, num_return_sequences=1)
    generated_question = question_generator_tokenizer.decode(outputs[0], skip_special_tokens=True)
    if not generated_question.endswith('?'):
        generated_question += "?"
    return generated_question

# Generate questions and answers
for entry in data["NuclearDataset"]:
    text = entry["Text"]
    context = entry["Context"]

    questions = []
    answers = []

    # Generate two different questions
    for i in range(2):
        temperature = 1.0 + i * 0.5  # Increase temperature for the second question
        top_k = 50 + i * 20  # Increase top_k for the second question
        generated_question = generate_question(context, temperature, top_k)
        questions.append(generated_question)

        # Generate answer
        inputs = qa_tokenizer.encode_plus(generated_question, context, return_tensors="pt")
        input_ids = inputs["input_ids"]
        attention_mask = inputs["attention_mask"]
        outputs = qa_model(input_ids, attention_mask=attention_mask)
        answer_start_index = torch.argmax(outputs.start_logits)
        answer_end_index = torch.argmax(outputs.end_logits) + 1
        generated_answer = qa_tokenizer.convert_tokens_to_string(qa_tokenizer.convert_ids_to_tokens(input_ids[0][answer_start_index:answer_end_index]))

        answers.append(generated_answer)

    for i in range(2):
        # Evaluate BLEU score for the generated question
        bleu_score = sentence_bleu([context.split()], questions[i].split())

        # Evaluate ROUGE score for the generated question
        rouge_score = scorer.score(context, questions[i])

        # Evaluate BERTScore
        P, R, F1 = bert_score.score([answers[i]], [context], lang='en')

        output_data.append({
            "Text": text,
            "Context": context,
            "Question": questions[i],
            "Answer": answers[i],
            "BLEU_Score": bleu_score,
            "ROUGE_Score": rouge_score['rougeL'].fmeasure,
            "BERTScore": F1.mean().item()
        })



tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['ro

In [21]:
# Save to a JSON file
output_file = "output.json"
with open(output_file, "w") as f:
    json.dump(output_data, f, indent=4)

In [22]:
print(f"Output saved to {output_file}")

# Download the output file
files.download(output_file)

Output saved to output.json


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>