In [7]:
import pandas as pd
from transformers import pipeline, AutoTokenizer

# Load the RoBERTa-base Squad2 model and tokenizer
roberta_model = "deepset/roberta-base-squad2"
roberta_tokenizer = AutoTokenizer.from_pretrained(roberta_model)
roberta_pipeline = pipeline("question-answering", model=roberta_model, tokenizer=roberta_tokenizer)

# Load the DataFrame with the text comments
df = pd.read_excel("evaluations_overall_comments.xlsx")

# Define the question to ask
what_was_bad = "what was bad?"

# Extract multiple answers for each comment in the DataFrame
df["bad_roberta"] = df["Answer:"].apply(lambda x: [ans["answer"] for ans in roberta_pipeline(question=what_was_bad, context=x, top_k=2)])



In [8]:
# Load the BERT-large-uncased-whole-word-masking-finetuned-squad model and tokenizer
bert_model = "bert-large-uncased-whole-word-masking-finetuned-squad"
bert_tokenizer = AutoTokenizer.from_pretrained(bert_model)
bert_pipeline = pipeline("question-answering", model=bert_model, tokenizer=bert_tokenizer)

# Extract answers for each comment in the DataFrame
df["bad_bert"] = df["Answer:"].apply(lambda x: bert_pipeline(question=what_was_bad, context=x)["answer"])



In [9]:
# Define the question to ask
how_to_improve = "how to improve?"

# Extract multiple answers for each comment in the DataFrame
df["improve_roberta"] = df["Answer:"].apply(lambda x: [ans["answer"] for ans in roberta_pipeline(question=how_to_improve, context=x, top_k=2)])

In [10]:
# Extract answers for each comment in the DataFrame
df["improve_bert"] = df["Answer:"].apply(lambda x: bert_pipeline(question=how_to_improve, context=x)["answer"])

In [11]:
df.columns

Index(['Target:', 'Evaluator:', 'Rotation Dates:', 'Service:', 'Answer:',
       'bad_roberta', 'bad_bert', 'improve_roberta', 'improve_bert'],
      dtype='object')

In [12]:
df.to_excel('question_answering.xlsx')