In [None]:
!pip install transformers
!pip install sentencepiece

# **T5_BASE F1_Score**

In [None]:
import torch
import transformers
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sklearn.metrics import f1_score, precision_score, recall_score

# set up the device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load the pretrained model and tokenizer
model = T5ForConditionalGeneration.from_pretrained("t5-base").to(device)
tokenizer = T5Tokenizer.from_pretrained("t5-base")

# define the function to generate answers
def generate_answer(question, context):
    input_str = "question: " + question + " context: " + context #+ " </s>"
    input_ids = tokenizer.encode(input_str, return_tensors="pt").to(device)
    outputs = model.generate(input_ids)
    # print("Output")
    # print(tokenizer.decode(outputs[0]))
    tem = tokenizer.decode(outputs[0])
    ans = ""
    n = len(tem)
    f = True
    for c in range(0,n):
      if tem[c]=='<':
        f = False
      elif tem[c]=='>':
        f = True
      else:
        if f==True:
          ans = ans + tem[c]
    print(ans)
    return ans
    # return tokenizer.decode(outputs[0])

# define the function to calculate F1 score
def calculate_f1_score(true_labels, predicted_labels):
    return f1_score(true_labels, predicted_labels, average='weighted')

# example sentences
sentences = [
    "Olive and Kemp’s are from same hypothetical_ancestor_ 1.",
    "Hawksbill and hypothetical_ancestor_1 are from same hypothetical_ancestor_ 2.",
    "Flatback and Green are from same hypothetical_ancestor_ 3 .",
    "Leatherback and hypothetical_ancestor_3 are from same hypothetical_ancestor_ 4.",
    "Loggerhead and hypothetical_ancestor_4 are from same hypothetical_ancestor_ 5.",
    "hypothetical_ancestor_2 and hypothetical_ancestor_5 are from same hypothetical_ancestor_ 6."
]

questions = [
    ("Who is the parent of Olive?","hypothetical_ancestor_ 1"),
    ("Who is the parent of Kemp's?","hypothetical_ancestor_ 1"),
    ("Who is the parent of Olive and Kemp's?","hypothetical_ancestor_ 1"),
    ("Who are the parents of Hawksbill?","hypothetical_ancestor_ 2"),
    ("Who are the parents of Leatherback?","hypothetical_ancestor_ 4"),
    ("Who are the parents of Loggerhead ?","hypothetical_ancestor_ 5"),
    ("Who is the parent of Flatback and Green?","hypothetical_ancestor_ 3"),
    ("Who is the parent of Flatback?","hypothetical_ancestor_ 3"),
    ("Who is the parent of Green?","hypothetical_ancestor_ 3"),

    ("Who are the children of hypothetical_ancestor_1?","Olive and Kemp’s"),
    ("Who are the children of hypothetical_ancestor_3?","Flatback and Green"),
    ("Who are the children of hypothetical_ancestor_2?","Hawksbill and hypothetical_ancestor_1"),
    ("Who are the children of hypothetical_ancestor_4?","Leatherback and hypothetical_ancestor_3"),
    ("Who are the children of hypothetical_ancestor_5?","Loggerhead and hypothetical_ancestor_4"),

    ("What is the common ancestor of Olive and Kemp’s?","hypothetical_ancestor_1"),
    ("What is the common ancestor of Olive and Hawksbill?","hypothetical_ancestor_ 2"),
    ("What is the common ancestor of Olive,  Kemp’s and Hawksbill?","hypothetical_ancestor_ 2"),
    ("What is the common ancestor of Flatback and Green?","hypothetical_ancestor_ 3"),
    ("What is the common ancestor of Leatherback, Flatback and Green?","hypothetical_ancestor_3"),
    ("What is the common ancestor of Loggerhead, Leatherback, Flatback and Green?","hypothetical_ancestor_4")
]

# generate answers and calculate F1 score
true_labels = []
predicted_labels = []
for question in questions:
    true_labels.append(question[1])
    answer = generate_answer(question[0], ' '.join(sentences))
    print(f"Q: {question[0]}")
    print(f"A: {answer}")
    predicted_labels.append(answer.strip())


y_true = true_labels
y_pred = predicted_labels
f1 = f1_score(y_true, y_pred, average="weighted")
precision = precision_score(y_true, y_pred, average="weighted")
recall = recall_score(y_true, y_pred, average="weighted")

exact_match = 0
n = len(y_true)
for i in range(n):
  if y_true[i]==y_pred[i]:
    exact_match = exact_match + 1
#exact_match = int(y_true == y_pred)

# Print the output and metrics
print("Evaluation Metrics:")
print("F1-score:", f1)
print("Precision:", precision)
print("Recall:", recall)
print("Exact Match:", exact_match)

# **T5_BioQA F1_Score**

In [None]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

# Load the tokenizer and model
import torch
from transformers import T5ForConditionalGeneration, T5Tokenizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model = T5ForConditionalGeneration.from_pretrained("ozcangundes/T5-base-for-BioQA").to(device)
tokenizer = T5Tokenizer.from_pretrained("ozcangundes/T5-base-for-BioQA")

# define the function to generate answers
def generate_answer(question, context):
    input_str = "question: " + question + " context: " + context #+ " </s>"
    input_ids = tokenizer.encode(input_str, return_tensors="pt").to(device)
    outputs = model.generate(input_ids)
    # print("Output")
    # print(tokenizer.decode(outputs[0]))
    tem = tokenizer.decode(outputs[0])
    ans = ""
    n = len(tem)
    f = True
    for c in range(0,n):
      if tem[c]=='<':
        f = False
      elif tem[c]=='>':
        f = True
      else:
        if f==True:
          ans = ans + tem[c]
    print(ans)
    return ans
    # return tokenizer.decode(outputs[0])

# define the function to calculate F1 score
def calculate_f1_score(true_labels, predicted_labels):
    return f1_score(true_labels, predicted_labels, average='weighted')

# example sentences
sentences = [
    "Olive and Kemp’s are from same hypothetical_ancestor_ 1.",
    "Hawksbill and hypothetical_ancestor_1 are from same hypothetical_ancestor_ 2.",
    "Flatback and Green are from same hypothetical_ancestor_ 3 .",
    "Leatherback and hypothetical_ancestor_3 are from same hypothetical_ancestor_ 4.",
    "Loggerhead and hypothetical_ancestor_4 are from same hypothetical_ancestor_ 5.",
    "hypothetical_ancestor_2 and hypothetical_ancestor_5 are from same hypothetical_ancestor_ 6."
]

questions = [
    ("Who is the parent of Olive?","hypothetical_ancestor_ 1"),
    ("Who is the parent of Kemp's?","same hypothetical_ancestor_ 1"),
    ("Who is the parent of Olive and Kemp's?","same hypothetical_ancestor_ 1"),
    ("Who are the parents of Hawksbill?","hypothetical_ancestor_ 2"),
    ("Who are the parents of Leatherback?","hypothetical_ancestor_ 4"),
    ("Who are the parents of Loggerhead ?","hypothetical_ancestor_ 5"),
    ("Who is the parent of Flatback and Green?","same hypothetical_ancestor_ 3"),
    ("Who is the parent of Flatback?","hypothetical_ancestor_ 3"),
    ("Who is the parent of Green?","same hypothetical_ancestor_ 3"),

    ("Who are the children of hypothetical_ancestor_1?","Olive and Kemp’s"),
    ("Who are the children of hypothetical_ancestor_3?","Flatback and Green"),
    ("Who are the children of hypothetical_ancestor_2?","Hawksbill, Olive and Kemp's"),
    ("Who are the children of hypothetical_ancestor_4?","Leatherback and hypothetical_ancestor_3"),
    ("Who are the children of hypothetical_ancestor_5?","Loggerhead and hypothetical_ancestor_4"),

    ("What is the common ancestor of Olive and Kemp’s?","hypothetical_ancestor_1"),
    ("What is the common ancestor of Olive and Hawksbill?","hypothetical_ancestor_ 2"),
    ("What is the common ancestor of Olive,  Kemp’s and Hawksbill?","hypothetical_ancestor_ 2"),
    ("What is the common ancestor of Flatback and Green?","hypothetical_ancestor_3"),
    ("What is the common ancestor of Leatherback, Flatback and Green?","hypothetical_ancestor_ 4"),
    ("What is the common ancestor of Loggerhead, Leatherback, Flatback and Green?","hypothetical_ancestor_ 5")
]

# generate answers and calculate F1 score
true_labels = []
predicted_labels = []
for question in questions:
    true_labels.append(question[1])
    answer = generate_answer(question[0], ' '.join(sentences))
    print(f"Q: {question[0]}")
    print(f"A: {answer}")
    predicted_labels.append(answer.strip())


y_true = true_labels
y_pred = predicted_labels
f1 = f1_score(y_true, y_pred, average="weighted")
precision = precision_score(y_true, y_pred, average="weighted")
recall = recall_score(y_true, y_pred, average="weighted")
#exact_match = int(y_true == y_pred)
exact_match = 0
n = len(y_true)
for i in range(n):
  if y_true[i]==y_pred[i]:
    exact_match = exact_match + 1

# Print the output and metrics
print("Evaluation Metrics:")
print("F1-score:", f1)
print("Precision:", precision)
print("Recall:", recall)
print("Exact Match:", exact_match)

# **ALBERT F1_Score**

In [None]:
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForQuestionAnswering

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load ALBERT model and tokenizer
model_name = "mfeb/albert-xxlarge-v2-squad2"
model = AutoModelForQuestionAnswering.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# define the function to generate answers
def generate_answer(question, context):
    nlp = pipeline('question-answering', model=model, tokenizer=tokenizer, device=device)
    result = nlp(question=question, context=context)
    answer = result['answer']
    return answer

# define the function to calculate F1 score
def calculate_f1_score(true_labels, predicted_labels):
    return f1_score(true_labels, predicted_labels, average='weighted')

# example sentences
sentences = [
    "Olive and Kemp’s are from same hypothetical_ancestor_ 1.",
    "Hawksbill and hypothetical_ancestor_1 are from same hypothetical_ancestor_ 2.",
    "Flatback and Green are from same hypothetical_ancestor_ 3 .",
    "Leatherback and hypothetical_ancestor_3 are from same hypothetical_ancestor_ 4.",
    "Loggerhead and hypothetical_ancestor_4 are from same hypothetical_ancestor_ 5.",
    "hypothetical_ancestor_2 and hypothetical_ancestor_5 are from same hypothetical_ancestor_ 6."
]


questions = [
    ("Who is the parent of Olive?","hypothetical_ancestor_ 1"),
    ("Who is the parent of Kemp's?","hypothetical_ancestor_1"),
    ("Who is the parent of Olive and Kemp's?","hypothetical_ancestor_1"),
    ("Who are the parents of Hawksbill?","hypothetical_ancestor_ 2"),
    ("Who are the parents of Leatherback?","hypothetical_ancestor_4"),
    ("Who are the parents of Loggerhead ?","hypothetical_ancestor_ 5"),
    ("Who is the parent of Flatback and Green?","hypothetical_ancestor_"),
    ("Who is the parent of Flatback?","hypothetical_ancestor_ 3"),
    ("Who is the parent of Green?","hypothetical_ancestor_ 3"),

    ("Who are the children of hypothetical_ancestor_1?","Olive and Kemp’s"),
    ("Who are the children of hypothetical_ancestor_3?","Flatback and Green"),
    ("Who are the children of hypothetical_ancestor_2?","Hawksbill and hypothetical_ancestor_1"),
    ("Who are the children of hypothetical_ancestor_4?","Leatherback and hypothetical_ancestor_3"),
    ("Who are the children of hypothetical_ancestor_5?","Loggerhead and hypothetical_ancestor_4"),

    ("What is the common ancestor of Olive and Kemp’s?","hypothetical_ancestor_ 1"),
    ("What is the common ancestor of Olive and Hawksbill?","hypothetical_ancestor_ 2"),
    ("What is the common ancestor of Olive,  Kemp’s and Hawksbill?","hypothetical_ancestor_ 2"),
    ("What is the common ancestor of Flatback and Green?","hypothetical_ancestor_ 3"),
    ("What is the common ancestor of Leatherback, Flatback and Green?","hypothetical_ancestor_ 4"),
    ("What is the common ancestor of Loggerhead, Leatherback, Flatback and Green?","hypothetical_ancestor_ 5")
]

# generate answers and calculate F1 score
true_labels = []
predicted_labels = []
for question in questions:
    true_labels.append(question[1])
    answer = generate_answer(question[0], ' '.join(sentences))
    print(f"Q: {question[0]}")
    print(f"A: {answer}")
    predicted_labels.append(answer.strip())

y_true = true_labels
y_pred = predicted_labels
f1 = f1_score(y_true, y_pred, average="weighted")
precision = precision_score(y_true, y_pred, average="weighted")
recall = recall_score(y_true, y_pred, average="weighted")
#exact_match = int(y_true == y_pred)
exact_match = 0
n = len(y_true)
for i in range(n):
  if y_true[i]==y_pred[i]:
    exact_match = exact_match + 1

# Print the output and metrics
print("Evaluation Metrics:")
print("F1-score:", f1)
print("Precision:", precision)
print("Recall:", recall)
print("Exact Match:", exact_match)
