In [5]:
# Install the latest release of Haystack in your own environment
#! pip install farm-haystack

# Install the latest master of Haystack
!pip install --upgrade pip
!pip install git+https://github.com/deepset-ai/haystack.git#egg=farm-haystack[colab]

# Install  pygraphviz
!apt install libgraphviz-dev
!pip install pygraphviz

# In Colab / No Docker environments: Start Elasticsearch from source
! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.9.2-linux-x86_64.tar.gz -q
! tar -xzf elasticsearch-7.9.2-linux-x86_64.tar.gz
! chown -R daemon:daemon elasticsearch-7.9.2

import os
from subprocess import Popen, PIPE, STDOUT

es_server = Popen(
    ["elasticsearch-7.9.2/bin/elasticsearch"], stdout=PIPE, stderr=STDOUT, preexec_fn=lambda: os.setuid(1)  # as daemon
)
# wait until ES has started
! sleep 30

[0mCollecting farm-haystack[colab]
  Cloning https://github.com/deepset-ai/haystack.git to /tmp/pip-install-duzehp_y/farm-haystack_d96aad1173484a5cb9e6bdd7712f5092
  Running command git clone --filter=blob:none --quiet https://github.com/deepset-ai/haystack.git /tmp/pip-install-duzehp_y/farm-haystack_d96aad1173484a5cb9e6bdd7712f5092
  Resolved https://github.com/deepset-ai/haystack.git to commit 85571cdd15f1c9592cf28121187ffef7d4827f83
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Reading package lists... Done
Building dependency tree       
Reading state information... Done
libgraphviz-dev is already the newest version (2.40.1-2).
0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.
[0m

In [6]:
from haystack.utils import print_answers, fetch_archive_from_http, convert_files_to_dicts, clean_wiki_text, launch_es
from haystack.pipelines import Pipeline, RootNode
from haystack.document_stores import ElasticsearchDocumentStore
from haystack.nodes import (
    ElasticsearchRetriever,
    DensePassageRetriever,
    FARMReader,
    TransformersQueryClassifier,
    SklearnQueryClassifier,
)

# Download and prepare data - 517 Wikipedia articles for Game of Thrones
doc_dir = "data/article_txt_got"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt.zip"
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

# convert files to dicts containing documents that can be indexed to our datastore
got_dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)

# Initialize DocumentStore and index documents
launch_es()
document_store = ElasticsearchDocumentStore()
document_store.delete_documents()
document_store.write_documents(got_dicts)

# Initialize Sparse retriever
es_retriever = ElasticsearchRetriever(document_store=document_store)

# Initialize dense retriever
dpr_retriever = DensePassageRetriever(document_store)
document_store.update_embeddings(dpr_retriever, update_existing_embeddings=False)

reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2")

INFO - haystack.utils.import_utils -  Found data stored in `data/article_txt_got`. Delete this first if you really want to fetch new data.
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/10_Beyond_the_Wall__Game_of_Thrones_.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/73_A_Man_Without_Honor.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/346_Ygritte.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/511_After_the_Thrones.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/359_Kill_the_Boy.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/85_Game_of_Thrones__Seven_Kingdoms.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/358_A_Game_of_Thrones__Genesis.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_got/331_Bran_Stark.txt
INFO - haystack.utils.preprocessing -  Converting data/article_txt_go

Updating embeddings:   0%|          | 0/2357 [00:00<?, ? Docs/s]

Create embeddings:   0%|          | 0/2368 [00:00<?, ? Docs/s]

INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.model.language_model -  LOADING MODEL
INFO - haystack.modeling.model.language_model -  Could not find deepset/roberta-base-squad2 locally.
INFO - haystack.modeling.model.language_model -  Looking on Transformers Model Hub (in local cache and online)...
INFO - haystack.modeling.model.language_model -  Loaded deepset/roberta-base-squad2
INFO - haystack.modeling.logger -  ML Logging is turned off. No parameters, metrics or artifacts will be logged to MLFlow.
INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
INFO - haystack.modeling.infer -  Got ya 3 parallel workers to do inference ...
INFO - haystack.modeling.infer -   0     0     0  
INFO - haystack.modeling.infer -  /w\   /w\   /w\ 
INFO - haystack.modeling.infer -  /'\   / \   /'\ 


## Question vs Statement Classifier


In [7]:
# Here we build the pipeline
transformer_question_classifier = Pipeline()
transformer_question_classifier.add_node(component=dpr_retriever, name="DPRRetriever", inputs=["Query"])
transformer_question_classifier.add_node(
    component=TransformersQueryClassifier(model_name_or_path="shahrukhx01/question-vs-statement-classifier"),
    name="QueryClassifier",
    inputs=["DPRRetriever"],
)
transformer_question_classifier.add_node(component=reader, name="QAReader", inputs=["QueryClassifier.output_1"])
transformer_question_classifier.draw("question_classifier.png")

# Run only the QA reader on the question query
res_1 = transformer_question_classifier.run(query="Who is the father of Arya Stark?")
print("DPR Results" + "\n" + "=" * 15)
print_answers(res_1, details="minimum")


INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0
  start_indices = flat_sorted_indices // max_seq_len
Inferencing Samples: 100%|██████████| 1/1 [00:01<00:00,  1.16s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.16s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.41 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.35 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.21 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.42 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.28 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:05<00:00,  5.46s/ Batches]
Inferencing Samples: 100%|██████████| 1/1 [00:00<00:00,  1.01 Batches/s]
Inferencing Samples: 100%|██████████| 1/1 [00:02<00:00,  2.20s/ Batches]

DPR Results

Query: Who is the father of Arya Stark?
Answers:
[   {   'answer': 'Eddard and Catelyn Stark',
        'context': 'Background ===\n'
                   'Arya is the third child and younger daughter of Eddard and '
                   'Catelyn Stark and is nine years old at the beginning of '
                   'the book series.  Sh'},
    {   'answer': 'Rhaegar',
        'context': ', Aemon Targaryen, Jorah Mormont, Meera Reed, Jon '
                   'Connington and Gilly.\n'
                   'Rhaegar married the Dornish princess Elia Martell of '
                   'Sunspear, and fathered wi'},
    {   'answer': 'Eddard Stark',
        'context': 'e from House Tully in the Riverlands region prior to her '
                   'marriage to Eddard Stark. She has her hair dyed dark brown '
                   'later on while in the Vale, disgui'},
    {   'answer': 'Eddard Stark and Catelyn Stark',
        'context': 'ces==\n'
                   'Sansa Stark is the second ch




In [8]:
# Here we create the question vs statement query classifier
from haystack.pipelines import TransformersQueryClassifier

queries = [
    "Lord Eddard was the father of Arya Stark.",
    "Jon Snow was filmed in United Kingdom.",
    "who is the father of arya stark?",
    "Which country was jon snow filmed in?",
    "Did Pope Francis Just Pave the Way for Women Priests?"
]

question_classifier = TransformersQueryClassifier(model_name_or_path="shahrukhx01/question-vs-statement-classifier")

for query in queries:
    result = question_classifier.run(query=query)
    if result[1] == "output_1":
        category = "question"
    else:
        category = "statement"

    print(f"Query: {query}, raw_output: {result}, class: {category}")

INFO - haystack.modeling.utils -  Using devices: CPU
INFO - haystack.modeling.utils -  Number of GPUs: 0


Query: Lord Eddard was the father of Arya Stark., raw_output: ({}, 'output_2'), class: statement
Query: Jon Snow was filmed in United Kingdom., raw_output: ({}, 'output_2'), class: statement
Query: who is the father of arya stark?, raw_output: ({}, 'output_1'), class: question
Query: Which country was jon snow filmed in?, raw_output: ({}, 'output_1'), class: question
Query: Did Pope Francis Just Pave the Way for Women Priests?, raw_output: ({}, 'output_1'), class: question


apply to our dataframe


In [9]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
%cd /content/drive/MyDrive/Clean CS224N folder

/content/drive/MyDrive/Clean CS224N folder


In [11]:
import pandas as pd
# df = pd.read_csv('SCBAll.csv')
# #Check what preprocessing (Beicheng uses pruned data set)
# df = df.dropna() #remove nones
# df['summary'] = df['summary'].str.replace('#StopClickbait', '')
# df

In [12]:
# df['classifier']=''
# for ind in range(786):
#     try:
#       query = df['title'][ind]
#       result = question_classifier.run(query=query)
#       if result[1] == "output_1":
#           category = "question"
#       else:
#           category = "statement"
#       df['classifier'][ind] = category
#     except:
#       pass


In [13]:
df

NameError: ignored

In [None]:
# df.to_excel("output_class.xlsx")

In [None]:
# df['classifier'].value_counts()

In [None]:
# print("statements= "+str(590/(590+141)))
# print("question= "+str(141/(590+141)))

See if extr or abs performs better for which task!

In [None]:
# the longformer model i saved as output_ext excel file
import pandas as pd
longformer_df = pd.read_csv('whole_dataset_valhalla-longformer.csv')

In [None]:
ind=7
print("title: " +str(longformer_df["title"][ind]))
print("answer: " +str(longformer_df["summary"][ind]))
print("ext answer: " +str(longformer_df["ext answer val_Sq1"][ind]))

In [None]:
longformer_df['classifier']=''
for ind in range(len(longformer_df)):
    try:
      query = longformer_df['title'][ind]
      result = question_classifier.run(query=query)
      if result[1] == "output_1":
          category = "question"
      else:
          category = "statement"
      longformer_df['classifier'][ind] = category
    except:
      pass


In [None]:
longformer_df

In [None]:
longformer_df = longformer_df.dropna()
longformer_df_S = longformer_df[longformer_df['classifier'] == "statement"]
longformer_df_Q = longformer_df[longformer_df['classifier'] == "question"]

In [None]:
lenS= len(longformer_df_S)
lenQ = len(longformer_df_Q)

print("S questions: "+ str(lenS/(lenS+lenQ)))
print("Q questions: "+ str(lenQ/(lenS+lenQ)))


BERTscore for longformer_df_S and longformer_df_Q

In [None]:
!pip install bert-score
!pip install torch

In [None]:
from bert_score import score as bscore
import numpy as np
import torch

In [None]:
from bert_score import score
import numpy as np
import torch

In [None]:
tmp = longformer_df
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["ext answer val_Sq1"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = longformer_df_S
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["ext answer val_Sq1"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = longformer_df_Q
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["ext answer val_Sq1"].tolist()], lang='en')
print("Longformer Questions")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

Rouge scores Longformer Q and S

In [None]:
!pip install rouge/requirements.txt
!pip install rouge-score

In [None]:
import numpy as np
from rouge_score import rouge_scorer


In [None]:
import numpy as np
from rouge_score import rouge_scorer

tmp = longformer_df
print(len(longformer_df))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['ext answer val_Sq1'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))


tmp = longformer_df_S
print(len(longformer_df_S))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['ext answer val_Sq1'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

tmp = longformer_df_Q
print(len(longformer_df_Q))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['ext answer val_Sq1'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

In [None]:
longformer_df.to_csv('./longformer_df_all.csv')
longformer_df_S.to_csv('./longformer_df_S.csv')
longformer_df_Q.to_csv('./longformer_df_Q.csv')

# do it for T5 epoch 20

In [None]:
import pandas as pd
t5_df = pd.read_csv('T5e20whole_dataset.csv')

In [None]:
query

In [None]:
t5_df['classifier']=''
for ind in range(len(t5_df)):
    try:
      query = t5_df['title'][ind]
      result = question_classifier.run(query=query)
      if result[1] == "output_1":
          category = "question"
      else:
          category = "statement"
      t5_df['classifier'][ind] = category
    except:
      pass


In [None]:
t5_df

In [None]:
t5_df = t5_df.dropna()
t5_df_S = t5_df[t5_df['classifier'] == "statement"]
t5_df_Q = t5_df[t5_df['classifier'] == "question"]

In [None]:
lenS= len(t5_df_S)
lenQ = len(t5_df_Q)
print(lenS)
print(lenQ)
print("S questions: "+ str(lenS/(lenS+lenQ)))
print("Q questions: "+ str(lenQ/(lenS+lenQ)))


In [None]:

tmp = t5_df
print(len(t5_df))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['abs answer e20'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))


tmp = t5_df_S
print(len(t5_df_S))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['abs answer e20'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

tmp = t5_df_Q
print(len(t5_df_Q))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['abs answer e20'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

In [None]:
from bert_score import score
import numpy as np
import torch

tmp = t5_df
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["abs answer e20"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = t5_df_S
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["abs answer e20"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = t5_df_Q
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["abs answer e20"].tolist()], lang='en')
print("Longformer Questions")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

In [None]:
t5_df.to_csv('./t5_df_all.csv')
t5_df_S.to_csv('./t5_df_S.csv')
t5_df_Q.to_csv('./t5_df_Q.csv')

# reddit fine-tuned ext model

In [None]:
import pandas as pd
fsquad_df = pd.read_csv('validation_squad_finetuned_fb_all.csv')
SCB_all_v2 = pd.read_csv('SCB_all_v2.csv')
SCB_all_v2 = SCB_all_v2.dropna()
SCB_all_v2 = SCB_all_v2.reset_index(drop=True)
SCB_all_v2=SCB_all_v2[['summary','title','article']]
SCB_all_v2

In [None]:
fsquad_df = pd.concat([fsquad_df['Predicted'],SCB_all_v2['title'],SCB_all_v2['summary']],axis=1)
fsquad_df

In [None]:
result = question_classifier.run(query=query)


In [None]:
fsquad_df['classifier']=''
for ind in range(len(fsquad_df)):
    try:
      query = fsquad_df['title'][ind]
      result = question_classifier.run(query=query)
      if result[1] == "output_1":
          category = "question"
      else:
          category = "statement"
      fsquad_df['classifier'][ind] = category
    except:
      pass


In [None]:
fsquad_df

In [None]:
fsquad_df = fsquad_df.dropna()
fsquad_df_S = fsquad_df[fsquad_df['classifier'] == "statement"]
fsquad_df_Q = fsquad_df[fsquad_df['classifier'] == "question"]

In [None]:
lenS= len(fsquad_df_S)
lenQ = len(fsquad_df_Q)
print(lenS)
print(lenQ)
print("S questions: "+ str(lenS/(lenS+lenQ)))
print("Q questions: "+ str(lenQ/(lenS+lenQ)))


In [None]:

tmp = fsquad_df
print(len(fsquad_df))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['Predicted'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))


tmp = fsquad_df_S
print(len(fsquad_df_S))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['Predicted'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

tmp = fsquad_df_Q
print(len(fsquad_df_Q))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['Predicted'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

In [None]:
from bert_score import score
import numpy as np
import torch

tmp = fsquad_df
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["Predicted"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = fsquad_df_S
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["Predicted"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = fsquad_df_Q
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["Predicted"].tolist()], lang='en')
print("Longformer Questions")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

In [None]:
fsquad_df_all.to_csv('./fsquad_df_all.csv')
fsquad_df_S.to_csv('./fsquad_df_S.csv')
fsquad_df_Q.to_csv('./fsquad_df_Q.csv')

# fine tuned news dataset 

In [None]:
import pandas as pd
fnews_df = pd.read_csv('validation_newsqa_finetuned_fb_all.csv')
SCB_all_v2 = pd.read_csv('SCB_all_v2.csv')
SCB_all_v2 = SCB_all_v2.dropna()
SCB_all_v2 = SCB_all_v2.reset_index(drop=True)
SCB_all_v2=SCB_all_v2[['summary','title','article']]
SCB_all_v2

In [None]:
fnews_df = pd.concat([fnews_df['Predicted'],SCB_all_v2['title'],SCB_all_v2['summary']],axis=1)
fnews_df

In [None]:
result = question_classifier.run(query=query)


In [None]:
fnews_df['classifier']=''
for ind in range(len(fnews_df)):
    try:
      query = fnews_df['title'][ind]
      result = question_classifier.run(query=query)
      if result[1] == "output_1":
          category = "question"
      else:
          category = "statement"
      fnews_df['classifier'][ind] = category
    except:
      pass


In [None]:
fnews_df

In [None]:
fnews_df = fnews_df.dropna()
fnews_df_S = fnews_df[fnews_df['classifier'] == "statement"]
fnews_df_Q = fnews_df[fnews_df['classifier'] == "question"]

In [None]:
lenS= len(fnews_df_S)
lenQ = len(fnews_df_Q)
print(lenS)
print(lenQ)
print("S questions: "+ str(lenS/(lenS+lenQ)))
print("Q questions: "+ str(lenQ/(lenS+lenQ)))


In [None]:

tmp = fnews_df
print(len(fnews_df))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['Predicted'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))


tmp = fnews_df_S
print(len(fsquad_df_S))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['Predicted'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

tmp = fnews_df_Q
print(len(fsquad_df_Q))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['Predicted'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

In [None]:
from bert_score import score
import numpy as np
import torch

tmp = fsquad_df
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["Predicted"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = fsquad_df_S
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["Predicted"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = fsquad_df_Q
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["Predicted"].tolist()], lang='en')
print("Longformer Questions")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

In [None]:
fsquad_df_all.to_csv('./fsquad_df_all.csv')
fsquad_df_S.to_csv('./fsquad_df_S.csv')
fsquad_df_Q.to_csv('./fsquad_df_Q.csv')

## Let's try again but for everything

In [None]:
import pandas as pd
df_everything = pd.read_csv('df_test_everything.csv')
df_everything

In [None]:
query

In [None]:
df_everything['classifier']=''
for ind in range(len(df_everything)):
    try:
      query = df_everything['title'][ind]
      result = question_classifier.run(query=query)
      if result[1] == "output_1":
          category = "question"
      else:
          category = "statement"
      df_everything['classifier'][ind] = category
    except:
      pass


In [None]:
df_everything

In [None]:
df_everything = df_everything.dropna()
df_everything_S = df_everything[df_everything['classifier'] == "statement"]
df_everything_Q = df_everything[df_everything['classifier'] == "question"]

In [None]:
lenS= len(df_everything_S)
lenQ = len(df_everything_Q)
print(lenS)
print(lenQ)
print("S questions: "+ str(lenS/(lenS+lenQ)))
print("Q questions: "+ str(lenQ/(lenS+lenQ)))


# T5

In [None]:
#T5

tmp = df_everything
print(len(df_everything))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['fT5'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))


# tmp = df_everything_S
# print(len(df_everything_S))
# # a list of the hypothesis documents
# hyp = [str(i) for i in tmp['summary'].tolist()]
# # a list of the references documents
# ref = [str(i) for i in tmp['fT5'].tolist()]
# for ind in ['1','2','L']:
#   print("Rouge"+ind)
#   scorer = rouge_scorer.RougeScorer(['rouge'+ind])
#   results = {'precision': [], 'recall': [], 'fmeasure': []}
#   for (h, r) in zip(hyp, ref):
#       score = scorer.score(h, r)
#       precision, recall, fmeasure = score['rouge'+ind]
#       results['precision'].append(precision)
#       results['recall'].append(recall)
#       results['fmeasure'].append(fmeasure)
#   print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
#   print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
#   print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

# tmp = df_everything_Q
# print(len(df_everything_Q))
# # a list of the hypothesis documents
# hyp = [str(i) for i in tmp['summary'].tolist()]
# # a list of the references documents
# ref = [str(i) for i in tmp['fT5'].tolist()]
# for ind in ['1','2','L']:
#   print("Rouge"+ind)
#   scorer = rouge_scorer.RougeScorer(['rouge'+ind])
#   results = {'precision': [], 'recall': [], 'fmeasure': []}
#   for (h, r) in zip(hyp, ref):
#       score = scorer.score(h, r)
#       precision, recall, fmeasure = score['rouge'+ind]
#       results['precision'].append(precision)
#       results['recall'].append(recall)
#       results['fmeasure'].append(fmeasure)
#   print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
#   print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
#   print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

In [None]:
from bert_score import score
import numpy as np
import torch

tmp = df_everything
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fT5"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = df_everything_S
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fT5"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = df_everything_Q
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fT5"].tolist()], lang='en')
print("Longformer Questions")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

# Squad

In [None]:
#fR+S

tmp = df_everything
print(len(df_everything))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['fN'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))


tmp = df_everything_S
print(len(df_everything_S))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['fN'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

tmp = df_everything_Q
print(len(df_everything_Q))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['fN'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

In [None]:
from bert_score import score
import numpy as np
import torch

tmp = df_everything
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fN"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = df_everything_S
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fN"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = df_everything_Q
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fN"].tolist()], lang='en')
print("Longformer Questions")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

# News

In [None]:
#fR+N

tmp = df_everything
print(len(df_everything))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['fS'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))


tmp = df_everything_S
print(len(df_everything_S))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['fS'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

tmp = df_everything_Q
print(len(df_everything_Q))
# a list of the hypothesis documents
hyp = [str(i) for i in tmp['summary'].tolist()]
# a list of the references documents
ref = [str(i) for i in tmp['fS'].tolist()]
for ind in ['1','2','L']:
  print("Rouge"+ind)
  scorer = rouge_scorer.RougeScorer(['rouge'+ind])
  results = {'precision': [], 'recall': [], 'fmeasure': []}
  for (h, r) in zip(hyp, ref):
      score = scorer.score(h, r)
      precision, recall, fmeasure = score['rouge'+ind]
      results['precision'].append(precision)
      results['recall'].append(recall)
      results['fmeasure'].append(fmeasure)
  print("results['precision']"+ str(np.around(np.mean(results['precision'])*100,2)))
  print("results['recall']"+ str(np.around(np.mean(results['recall'])*100,2)))
  print("results['fmeasure']"+ str(np.around(np.mean(results['fmeasure'])*100,2)))

In [None]:
from bert_score import score
import numpy as np
import torch

tmp = df_everything
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fS"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = df_everything_S
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fS"].tolist()], lang='en')
print("Longformer Statements")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

tmp = df_everything_Q
Pb, Rb, Fb = score([str(i) for i in tmp['summary'].tolist()], [str(i) for i in tmp["fS"].tolist()], lang='en')
print("Longformer Questions")
print("Precision: "+str(torch.mean(Pb)))
print("Recall: "+str(torch.mean(Rb[~torch.isnan(Rb)])))
print("Fbert: "+str(torch.mean(Fb)))

# Same thing but for reddit test data set

# Comparison

In [14]:
from bert_score import score as bscore
import numpy as np
import torch
import numpy as np
from rouge_score import rouge_scorer

from transformers import logging
logging.set_verbosity_warning()

def print_rouge(h,r):
  #for ind in ['1','2','L']:
  for ind in ['L']:
    scorer = rouge_scorer.RougeScorer(['rouge'+ind])
    score = scorer.score(h, r)
    precision, recall, fmeasure = score['rouge'+ind]
    print("ROUGE-"+str(ind)+", P:"+str(precision)+", R:"+str(recall)+", F:"+str(fmeasure))
    return 

def print_bertscore(h,r):
  Pb, Rb, Fb = bscore([h], [r], lang='en', verbose=False)
  print("BERTscore "+str(i)+", P:"+str(Pb)+", R:"+str(Rb)+", F:"+str(Fb))
  return


for i in range(len(df_everything)):
  ref = df_everything["summary"][i]
  print("index: "+ str(i))
  print("Title: "+ df_everything["title"][i].replace("\n",""))
  print("Fb answer: "+ ref.replace("\n",""))
  out = df_everything["fS"][i]
  print("fS: "+ out.replace("\n",""))
  print_rouge(out,ref)
  print_bertscore(out,ref)
  out = df_everything["fN"][i]
  print("fN: "+ out.replace("\n",""))
  print_rouge(out,ref)
  print_bertscore(out,ref)
  out = df_everything["fT5"][i]
  print("fT5: "+ out.replace("\n",""))
  print_rouge(out,ref)
  print_bertscore(out,ref)
  print("\n")



NameError: ignored