# 6 Disagreement Pairs

## 0 Import Libraries

In [1]:
import pandas as pd
import spacy
from spacy.matcher import Matcher
from spacy.pipeline import EntityRuler
from spacy import displacy
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
# Import English Library
nlp = spacy.load("en_core_web_lg", disable=["ner"])

## 1 Load Dataframe

In [3]:
# Load sentences dataframe from 'HCQ_sentences.json': sentences_df
sentences_df = pd.read_json("../data/HCQ_sentences.json")

In [4]:
sentences_df.head(3)

Unnamed: 0,sentence_id,title,sentence
0,pub.1126880632-0,COVID-19 and what pediatric rheumatologists sh...,"On March 11th, 2020 the World Health Organizat..."
1,pub.1126880632-1,COVID-19 and what pediatric rheumatologists sh...,"The infection, transmitted by 2019 novel coron..."
2,pub.1126880632-2,COVID-19 and what pediatric rheumatologists sh...,"Italy was early and severely involved, with a ..."


## 2 Verb Filtered Sentences

In [5]:
# Make list of sentences (Doc-object) 
# from column 'sentence' of dataframe 'sentences_df': doc_list
doc_list = list(nlp.pipe(sentences_df["sentence"].to_list()))

In [6]:
# Make matcher: 'verb_matcher_2'
verb_matcher_2 = Matcher(nlp.vocab, validate=True)

In [7]:
# Make search pattern for 'verb_matcher_2': support_verbs_pattern
support_verbs_pattern = [{"POS": "VERB", "DEP": "ROOT", "LEMMA": {"IN": ['reveal', 'show', 'suggest', 'support']}}]

In [8]:
# Make additional search pattern for 'verb_matcher_2': confirm_pattern
confirm_pattern = [{"POS": "VERB", "DEP": "xcomp", "LEMMA": "confirm"}]

In [9]:
# Add 'support_verbs_pattern' and 'confirm_pattern' to 'verb_matcher_2'
verb_matcher_2.add("VERB_ID", None, support_verbs_pattern, confirm_pattern)

In [10]:
# Filter Doc-objects (sentences) in 'doc_list' and add the selected Docs to a list: verb_filtered_sentences_2
verb_filtered_sentences_2 = [doc for doc in doc_list if len(verb_matcher_2(doc)) > 0] 

In [11]:
# Print each enumerated verb filtered sentence
for sentence_number, sentence in enumerate(verb_filtered_sentences_2):
    print(f"({sentence_number}) {sentence}")

(0) We were unable to confirm a benefit of hydroxychloroquine or chloroquine, when used alone or with a macrolide, on in-hospital outcomes for COVID-19.
(1) This work was supported by the Emergent Projects of National Science and Technology (2020YFC0844500), National Natural Science Foundation of China (81970020, 81770025), National Key Research and Development Program of China (2016YFC0901104), Shanghai Municipal Key Clinical Specialty (shslczdzk02202, shslczdzk01103), National Innovative Research Team of High-level Local Universities in Shanghai, Shanghai Key Discipline for Respiratory Diseases (2017ZZ02014), National Major Scientific and Technological Special Project for Significant New Drugs Development (2017ZX09304007), Key Projects in the National Science and Technology Pillar Program during the Thirteenth Five-year Plan Period (2018ZX09206005-004, 2017ZX10202202-005-004, 2017ZX10203201-008).
(2) This re-analysis reveals severe limitations in the methodology of this study, includ

## 3 Noun Filtered Sentences

In [12]:
# Make a matcher 'noun_matcher_2'
noun_matcher_2 = Matcher(nlp.vocab, validate=True)

In [13]:
# EVIDENCE-noun patterns for 'noun_matcher_2'
analysis_pattern = [{"POS": "NOUN", "DEP": "nsubj", "LEMMA": "analysis"}]
evidence_pattern = [{"POS": "NOUN", "DEP": "nsubj", "LEMMA": "evidence"}]
finding_pattern = [{"POS": "NOUN", "DEP": "nsubj", "LEMMA": "finding"}]

result_pattern = [{"POS": "NOUN", "DEP": "nsubj", "LEMMA": "result"}]
survey_pattern = [{"POS": "NOUN", "DEP": "nsubj", "LEMMA": "survey"}]

In [14]:
# Additional search pattern for 'noun_matcher_2': 'trial_pattern'
trial_pattern = [{"POS": "NOUN", "DEP": "pobj", "LEMMA": "trial"}]

In [15]:
# Additional search pattern for 'noun_matcher_2': 'we_pattern'
we_pattern = [{"POS": "PRON", "DEP": "nsubj", "LEMMA": "-PRON-"}]

In [16]:
# Add search patterns to 'noun_matcher_2'
noun_matcher_2.add("NOUN_ID", None, 
                   analysis_pattern, 
                   evidence_pattern, 
                   finding_pattern, 
                   result_pattern, 
                   survey_pattern, 
                   trial_pattern, 
                   we_pattern)

In [17]:
# Filter Doc-objects (sentences) in 'verb_filtered_sentences_2' and add the selected Docs to a list: 
# noun_filtered_sentences_2
noun_filtered_sentences_2 = [doc.text for doc in verb_filtered_sentences_2 if len(noun_matcher_2(doc)) > 0] 

In [18]:
# Print each enumerated noun filtered sentence
for sentence_number, sentence in enumerate(noun_filtered_sentences_2):
    print(f"({sentence_number}) {sentence}")

(0) We were unable to confirm a benefit of hydroxychloroquine or chloroquine, when used alone or with a macrolide, on in-hospital outcomes for COVID-19.
(1) This re-analysis reveals severe limitations in the methodology of this study, including ambiguous inclusion/exclusion of participant data and inconsistent analysis techniques, and yielded nonsignificant differences between control and treatment groups across any treatment days.
(2) This systematic review and meta-analysis showed no clinical benefits regarding HCQ treatment with/without azithromycin for COVID-19 patients.
(3) These results do not support the use of HCQ in patients hospitalised for documented SARS-CoV-2-positive hypoxic pneumonia.
(4) Interpretation Preliminary findings suggest that the higher CQ dosage (10-day regimen) should not be recommended for COVID-19 treatment because of its potential safety hazards.
(5) Preliminary evidence suggests potential benefit with chloroquine or hydroxychloroquine.
(6) The findings s

In [19]:
# Show number of item in 'doc_list'/'verb_filtered_sentences_2'/'noun_filtered_sentences_1' using 'len()'
print(f"Number of sentences in 'doc_list': {len(doc_list)}")
print(f"Number of sentences in 'verb_filtered_sentences_2': {len(verb_filtered_sentences_2)}")
print(f"Number of sentences in 'noun_filtered_sentences_2': {len(noun_filtered_sentences_2)}")

Number of sentences in 'doc_list': 216
Number of sentences in 'verb_filtered_sentences_2': 11
Number of sentences in 'noun_filtered_sentences_2': 9


## 4 Label Entities

For the following see:

[https://spacy.io/usage/rule-based-matching#entityruler](https://spacy.io/usage/rule-based-matching#entityruler)

**EntityRuler**

In [20]:
# Initialize spacy's EntityRuler: ruler
ruler = EntityRuler(nlp, validate=True)

In [21]:
# Add 'ruler' to pipline of 'nlp'
nlp.add_pipe(ruler)

**SUPPORT-verbs**

In [22]:
# Pattern for Entity "SUPP" (SUPPORT-verb): verb_patterns
verb_patterns = [{"label": "SUPP", "pattern": [{"POS": "VERB", "DEP": "ROOT", "LEMMA": {"IN": ['reveal', 'show', 'suggest', 'support']}}]}, 
                 {"label": "SUPP", "pattern": [{"POS": "VERB", "DEP": "xcomp", "LEMMA": "confirm"}]}]

**EVIDENCE-nouns**

In [23]:
# Pattern for Entity "EVID" (EVIDENCE-noun) : evidence_patterns
evidence_patterns = [{'label': 'EVID', 'pattern': 'This re-analysis'},
                     {'label': 'EVID', 'pattern': 'This systematic review and meta-analysis'},
                     {'label': 'EVID', 'pattern': 'These results'},
                     {'label': 'EVID', 'pattern': 'Interpretation Preliminary findings'},
                     {'label': 'EVID', 'pattern': 'Preliminary evidence'},
                     {'label': 'EVID', 'pattern': 'The findings'},
                     {'label': 'EVID', 'pattern': 'our survey'},
                     {"label": "EVID", "pattern": "multicenter clinical trials"}]

In [24]:
# Pattern for Entity "SCI" (scientists): we_label_pattern
we_label_pattern = [{"label": "SCI", "pattern": "We"}]

**Negations**

In [25]:
# Pattern for Entity "NEG" (negation): negation_patterns
negation_patterns = [{"label": "NEG", "pattern": [{"LEMMA": {"IN": ["not", "no", "unable"]}}]}]

**Apply "ruler" to Sentences**

In [26]:
# Add patterns to 'ruler'
ruler.add_patterns(verb_patterns)
ruler.add_patterns(evidence_patterns)
ruler.add_patterns(we_label_pattern)
ruler.add_patterns(negation_patterns)

  self.phrase_matcher.add(label, patterns)
  self.phrase_matcher.add(label, patterns)


In [27]:
# Convert strings in 'noun_filtered_sentences_2' into Doc-objects with labeled named entities
# make a list of these Docs: disagreement_sentences_5
disagreement_sentences_5 = list(nlp.pipe(noun_filtered_sentences_2))

## 5 Separate Sentences with Regard to Negation

In [28]:
# Make 'negation_matcher'
negation_matcher = Matcher(nlp.vocab, validate=True)

In [29]:
# Negation Pattern
negation_pattern = [{"LEMMA": {"IN": ["not", "no", "unable"]}}]

In [30]:
# Add 'negation_pattern' to 'negation_matcher'
negation_matcher.add("NEGATION_ID", None, negation_pattern)

In [31]:
# List of affirmative sentences: sents
sents = []

In [32]:
# List of negated sentences: negated_sents
negated_sents = []

In [33]:
# Define a function that seperates Docs from a list according to whether there occurs a negation in a Doc or not:
# negation_filter()
def negation_filter(sent_list):
    for doc in sent_list:
        if len(negation_matcher(doc)) > 0:
            negated_sents.append(doc)
        else:
            sents.append(doc)

In [34]:
# Apply 'negation_filter()' to 'disagreement_sentences_5'. Docs of 'disagreement_sentences_5' will be either stored in 
# 'sents' or 'negated_sents'
negation_filter(disagreement_sentences_5)

**sents**

In [35]:
# Show with running number the items of enumerated list 'sents'
# Highlight labeled entities ('SUPP': SUPPORT-verb; 'EVID': EVIDENCE-noun/'SCI': group of scientists; 'NEG': negation)
for sentence_number, sentence in enumerate(sents):
    print(f"({sentence_number})")
    displacy.render(sentence, style="ent", jupyter=True)
    print("----------------------------------------------------------------------\n")

(0)


----------------------------------------------------------------------

(1)


----------------------------------------------------------------------

(2)


----------------------------------------------------------------------

(3)


----------------------------------------------------------------------

(4)


----------------------------------------------------------------------



**negated_sents**

In [36]:
# Show with running number the items of enumerated list 'negated_sents'
# Highlight labeled entities ('SUPP': SUPPORT-verb; 'EVID': EVIDENCE-noun/'SCI': group of scientists; 'NEG': negation)
for sentence_number, sentence in enumerate(negated_sents):
    print(f"({sentence_number})")
    displacy.render(sentence, style="ent", jupyter=True)
    print("----------------------------------------------------------------------\n")

(0)


----------------------------------------------------------------------

(1)


----------------------------------------------------------------------

(2)


----------------------------------------------------------------------

(3)


----------------------------------------------------------------------



In [37]:
# Number of sentences in 'sents'
len(sents)

5

In [38]:
# Number of sentences in 'negated_sents'
len(negated_sents)

4

## 6 Disagreement Pairs

### 6.1 Preparing Evaluation

#### 6.1.1 Make Sentence Pairs for Labeling

In [39]:
# Define a function that makes a dataframe - preparation for labeling sentence pairs: label_sentence_pairs()
# Purpose: pairs of sentences are to be labeled with regard to whether they show disagreement or not
def label_sentence_pairs(list_of_affirmative_sentences, list_of_negated_sentences):
    sentence_pairs_list = []
    
    # Combine each sentence of 'list_of_affirmative_sentences' with each sentence of 'list_of_negated_sentences';
    # add a default label ('0')
    for doc in list_of_affirmative_sentences:
        for doc_neg in list_of_negated_sentences:
            sentence_pairs_list.append([0, doc.text, doc_neg.text])
            
    return pd.DataFrame(sentence_pairs_list, columns=["label", "affirmative sentence", "negated sentence"])

In [40]:
# Make dataframe by passing 'sents' and 'negated_sents' to function 'label_sentence_pairs()': sentence_pairs_df
# Each sentence of 'sents' is combined with every sentence of 'negated_sents'
sentence_pairs_df = label_sentence_pairs(sents, negated_sents)

In [41]:
# Show dataframe 'sentence_pairs_df'
sentence_pairs_df

Unnamed: 0,label,affirmative sentence,negated sentence
0,0,This re-analysis reveals severe limitations in...,We were unable to confirm a benefit of hydroxy...
1,0,This re-analysis reveals severe limitations in...,This systematic review and meta-analysis showe...
2,0,This re-analysis reveals severe limitations in...,These results do not support the use of HCQ in...
3,0,This re-analysis reveals severe limitations in...,Interpretation Preliminary findings suggest th...
4,0,Preliminary evidence suggests potential benefi...,We were unable to confirm a benefit of hydroxy...
5,0,Preliminary evidence suggests potential benefi...,This systematic review and meta-analysis showe...
6,0,Preliminary evidence suggests potential benefi...,These results do not support the use of HCQ in...
7,0,Preliminary evidence suggests potential benefi...,Interpretation Preliminary findings suggest th...
8,0,The findings support the hypothesis that these...,We were unable to confirm a benefit of hydroxy...
9,0,The findings support the hypothesis that these...,This systematic review and meta-analysis showe...


In [42]:
# Export 'sentence_pairs_df' as an Excel spread sheet
sentence_pairs_df.to_excel("../labeling/sentence_pairs_to_label.xlsx", index=False)

In [43]:
# Import table with labeled sentence pairs as a dataframe: df_labeled_pairs
df_labeled_pairs = pd.read_excel("../labeling/sentence_pairs_labeled.xlsx")

In [44]:
# Rearrange columns of 'df_labeled_pairs'
df_labeled_pairs = df_labeled_pairs[["affirmative sentence", "negated sentence", "label"]]

In [45]:
# Show rows 6 till 11 of 'df_labeled_pairs'
df_labeled_pairs.iloc[6:12]

Unnamed: 0,affirmative sentence,negated sentence,label
6,Preliminary evidence suggests potential benefi...,These results do not support the use of HCQ in...,1
7,Preliminary evidence suggests potential benefi...,Interpretation Preliminary findings suggest th...,0
8,The findings support the hypothesis that these...,We were unable to confirm a benefit of hydroxy...,1
9,The findings support the hypothesis that these...,This systematic review and meta-analysis showe...,1
10,The findings support the hypothesis that these...,These results do not support the use of HCQ in...,1
11,The findings support the hypothesis that these...,Interpretation Preliminary findings suggest th...,0


#### 6.1.2 Span Similarity

In [46]:
# Make and configure Matcher: span_matcher
# 'span_matcher' is used in the following function 'span_similarity()'
# Purpose: spot the token after which a Doc is to be cut into two parts. The second part is the Span of interest
span_matcher = Matcher(nlp.vocab, validate=True)

span_pattern_1 = [{"POS": "VERB", "DEP": "ROOT", "LEMMA": {"IN": ['reveal', 'show', 'suggest', 'support']}}]
span_pattern_2 = [{"POS": "VERB", "DEP": "xcomp", "LEMMA": "confirm"}]

span_matcher.add("SPAN_ID", None, span_pattern_1, span_pattern_2)

In [47]:
# Define function that comptutes the Span Similarity for each sentence pair: span_similarity()
# Takes a dataframe (df) as argument; returns a dict that contains the similiratiy values
def span_similarity(df):
    # Similarity values list
    span_sim = []
    
    # Doc lists
    aff_list = list(nlp.pipe(df["affirmative sentence"]))
    neg_list = list(nlp.pipe(df["negated sentence"]))
    
    # Span lists
    aff_spans = []
    neg_spans = []
    
    for doc in aff_list:
        for match_id, start, end in span_matcher(doc):
            aff_spans.append(doc[end:])
            
    for doc_neg in neg_list:
        for match_id, start, end in span_matcher(doc_neg):
            neg_spans.append(doc_neg[end + 1:])
        
    for i in range(len(df)):
        span_sim.append(aff_spans[i].similarity(neg_spans[i]))
        
    return {"span_similarity": span_sim}

In [48]:
# Create dictionary: 'span_simil'
span_simil = span_similarity(df_labeled_pairs)

In [49]:
# Add new column 'span similarity' to dataframe 'df_labeled_pairs'
df_labeled_pairs["span similarity"] = pd.DataFrame(span_simil)

In [50]:
# Show rows 6 till 11 of 'df_labeled_pairs'
df_labeled_pairs.iloc[6:12]

Unnamed: 0,affirmative sentence,negated sentence,label,span similarity
6,Preliminary evidence suggests potential benefi...,These results do not support the use of HCQ in...,1,0.69368
7,Preliminary evidence suggests potential benefi...,Interpretation Preliminary findings suggest th...,0,0.828888
8,The findings support the hypothesis that these...,We were unable to confirm a benefit of hydroxy...,1,0.870841
9,The findings support the hypothesis that these...,This systematic review and meta-analysis showe...,1,0.836503
10,The findings support the hypothesis that these...,These results do not support the use of HCQ in...,1,0.761835
11,The findings support the hypothesis that these...,Interpretation Preliminary findings suggest th...,0,0.889206


In [51]:
# Which span similarity values do disagreement pairs (labeled as "1") have?
# Select rows where pairs are labeled as "1" and sort rows with regard to span similarity in descending order
df_sorted_1 = df_labeled_pairs.loc[df_labeled_pairs["label"] == 1].sort_values("span similarity")

In [52]:
# Show 'df_sorted_1'
df_sorted_1

Unnamed: 0,affirmative sentence,negated sentence,label,span similarity
6,Preliminary evidence suggests potential benefi...,These results do not support the use of HCQ in...,1,0.69368
10,The findings support the hypothesis that these...,These results do not support the use of HCQ in...,1,0.761835
18,"Chloroquine phosphate, an old drug for treatme...",These results do not support the use of HCQ in...,1,0.81842
5,Preliminary evidence suggests potential benefi...,This systematic review and meta-analysis showe...,1,0.830467
9,The findings support the hypothesis that these...,This systematic review and meta-analysis showe...,1,0.836503
16,"Chloroquine phosphate, an old drug for treatme...",We were unable to confirm a benefit of hydroxy...,1,0.840745
19,"Chloroquine phosphate, an old drug for treatme...",Interpretation Preliminary findings suggest th...,1,0.853145
17,"Chloroquine phosphate, an old drug for treatme...",This systematic review and meta-analysis showe...,1,0.858759
8,The findings support the hypothesis that these...,We were unable to confirm a benefit of hydroxy...,1,0.870841
13,"Despite its small sample size, our survey show...",This systematic review and meta-analysis showe...,1,0.872061


In [53]:
# Show average span similarity of disagreement sentences (sentences labeled as "1")
df_sorted_1["span similarity"].mean()

0.8332407027482986

In [54]:
# Which span similarity values do non disagreement pairs (labeled as "0") have?
# Select rows where pairs are labeled as "0" and sort rows with regard to span similarity in descending order
df_sorted_0 = df_labeled_pairs.loc[df_labeled_pairs["label"] == 0].sort_values("span similarity")

In [55]:
# Show 'df_sorted_0'
df_sorted_0

Unnamed: 0,affirmative sentence,negated sentence,label,span similarity
2,This re-analysis reveals severe limitations in...,These results do not support the use of HCQ in...,0,0.726318
14,"Despite its small sample size, our survey show...",These results do not support the use of HCQ in...,0,0.780333
1,This re-analysis reveals severe limitations in...,This systematic review and meta-analysis showe...,0,0.828638
7,Preliminary evidence suggests potential benefi...,Interpretation Preliminary findings suggest th...,0,0.828888
3,This re-analysis reveals severe limitations in...,Interpretation Preliminary findings suggest th...,0,0.861157
0,This re-analysis reveals severe limitations in...,We were unable to confirm a benefit of hydroxy...,0,0.863716
15,"Despite its small sample size, our survey show...",Interpretation Preliminary findings suggest th...,0,0.885565
11,The findings support the hypothesis that these...,Interpretation Preliminary findings suggest th...,0,0.889206


In [56]:
# Show average span similarity of non disagreement sentences (sentences labeled as "0")
df_sorted_0["span similarity"].mean()

0.8329775184392929

**Example: how values in column "prediction" are generated**

In [57]:
# Add column 'prediction' to 'df_labeled_pairs'
# Prediction is based on span similarity value, here as an example "0.84"
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= 0.84)

In [58]:
# Rearrange columns
df_labeled_pairs = df_labeled_pairs[["affirmative sentence", "negated sentence", "span similarity", "label", "prediction"]]

In [59]:
# Show first five rows of 'df_labeled_pairs'
df_labeled_pairs.head()

Unnamed: 0,affirmative sentence,negated sentence,span similarity,label,prediction
0,This re-analysis reveals severe limitations in...,We were unable to confirm a benefit of hydroxy...,0.863716,0,True
1,This re-analysis reveals severe limitations in...,This systematic review and meta-analysis showe...,0.828638,0,False
2,This re-analysis reveals severe limitations in...,These results do not support the use of HCQ in...,0.726318,0,False
3,This re-analysis reveals severe limitations in...,Interpretation Preliminary findings suggest th...,0.861157,0,True
4,Preliminary evidence suggests potential benefi...,We were unable to confirm a benefit of hydroxy...,0.882067,1,True


In [60]:
# In column 'prediction': convert "True" into "1" and "False" into "0"
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))

In [61]:
# Show first five rows of 'df_labeled_pairs'
df_labeled_pairs.head()

Unnamed: 0,affirmative sentence,negated sentence,span similarity,label,prediction
0,This re-analysis reveals severe limitations in...,We were unable to confirm a benefit of hydroxy...,0.863716,0,1
1,This re-analysis reveals severe limitations in...,This systematic review and meta-analysis showe...,0.828638,0,0
2,This re-analysis reveals severe limitations in...,These results do not support the use of HCQ in...,0.726318,0,0
3,This re-analysis reveals severe limitations in...,Interpretation Preliminary findings suggest th...,0.861157,0,1
4,Preliminary evidence suggests potential benefi...,We were unable to confirm a benefit of hydroxy...,0.882067,1,1


### 6.2 Evaluation

In [62]:
# Smallest span similarity value
df_labeled_pairs["span similarity"].min()

0.693679928779602

In [63]:
# Largest span similarity value
df_labeled_pairs["span similarity"].max()

0.8892055749893188

In [64]:
# Average span similarity of sentence pairs
df_labeled_pairs["span similarity"].mean()

0.8331354290246964

In [65]:
# Average span similarity of disagreement sentences (sentences labeled as "1")
df_sorted_1["span similarity"].mean()

0.8332407027482986

In [66]:
# Average span similarity of non disagreement sentences (sentences labeled as "0")
df_sorted_0["span similarity"].mean()

0.8329775184392929

#### 6.2.1 Confusion Matrix

In [67]:
# Make confusion matrices for each value of span similarity that is in the intervall [0.69, 0.9]
# You get from one value to the next higher value by adding 0.01
similarity_value = 0.69

while similarity_value < 0.9:
    df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= similarity_value)
    df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))
    
    print(f"Similarity value: {similarity_value}")
    print(confusion_matrix(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))
    print("------------------------------------------------------------\n")
    
    similarity_value += 0.01

Similarity value: 0.69
[[ 0  8]
 [ 0 12]]
------------------------------------------------------------

Similarity value: 0.7
[[ 0  8]
 [ 1 11]]
------------------------------------------------------------

Similarity value: 0.71
[[ 0  8]
 [ 1 11]]
------------------------------------------------------------

Similarity value: 0.72
[[ 0  8]
 [ 1 11]]
------------------------------------------------------------

Similarity value: 0.73
[[ 1  7]
 [ 1 11]]
------------------------------------------------------------

Similarity value: 0.74
[[ 1  7]
 [ 1 11]]
------------------------------------------------------------

Similarity value: 0.75
[[ 1  7]
 [ 1 11]]
------------------------------------------------------------

Similarity value: 0.76
[[ 1  7]
 [ 1 11]]
------------------------------------------------------------

Similarity value: 0.77
[[ 1  7]
 [ 2 10]]
------------------------------------------------------------

Similarity value: 0.78
[[ 1  7]
 [ 2 10]]
-----------------------

In [68]:
# Confusion Matrix when threshold is average span similarity of disagreement pairs (pairs labeled as "1")
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= df_sorted_1["span similarity"].mean())
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))

print(f"Similarity value: {df_sorted_1['span similarity'].mean()}")
print(confusion_matrix(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))

Similarity value: 0.8332407027482986
[[4 4]
 [4 8]]


In [69]:
# Confusion Matrix when threshold is average span similarity of all sentence pairs
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= df_labeled_pairs["span similarity"].mean())
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))

print(f"Similarity value: {df_labeled_pairs['span similarity'].mean()}")
print(confusion_matrix(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))

Similarity value: 0.8331354290246964
[[4 4]
 [4 8]]


In [70]:
# Confusion Matrix when threshold is average span similarity of non disagreement pairs (pairs labeled as "0")
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= df_sorted_0["span similarity"].mean())
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))

print(f"Similarity value: {df_sorted_0['span similarity'].mean()}")
print(confusion_matrix(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))

Similarity value: 0.8329775184392929
[[4 4]
 [4 8]]


In [71]:
# Values from confusion matrices, divided by category
sim_value = [0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.8332407027482986, 0.8331354290246964, 0.8329775184392929]

correct_0 = [0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 4, 4, 4, 4, 6, 6, 8, 4, 4, 4]

false_pos = [8, 8, 8, 8, 7, 7, 7, 7, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 2, 2, 0, 4, 4, 4]

false_neg = [0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 5, 6, 8, 8, 10, 12, 4, 4, 4]

correct_1 = [12, 11, 11, 11, 11, 11, 11, 11, 10, 10, 10, 10, 10, 9, 9, 7, 6, 4, 4, 2, 0, 8, 8, 8]

In [72]:
# Dictionary categorizing values from confusion matrices: confusionMatrix_dict_1
confusionMatrix_dict_1 = {"similarity value": sim_value, "0/0": correct_0, "0/1": false_pos, "1/0": false_neg, "1/1":correct_1}

In [73]:
# Make dataframe from 'confusionMatrix_dict_1': confusionMatrix_df_1
confusionMatrix_df_1 = pd.DataFrame(confusionMatrix_dict_1)

In [74]:
# Column "correct": Sum of values from "0/0" and "1/1" 
confusionMatrix_df_1["correct"] = confusionMatrix_df_1["0/0"] + confusionMatrix_df_1["1/1"]

In [75]:
# Column "mistakes": Sum of values from "0/1" (false positives) and "1/0" (false negatives)
confusionMatrix_df_1["mistakes"] = confusionMatrix_df_1["0/1"] + confusionMatrix_df_1["1/0"]

In [76]:
# Dataframe 'confusionMatrix_df_1'
confusionMatrix_df_1

Unnamed: 0,similarity value,0/0,0/1,1/0,1/1,correct,mistakes
0,0.69,0,8,0,12,12,8
1,0.7,0,8,1,11,11,9
2,0.71,0,8,1,11,11,9
3,0.72,0,8,1,11,11,9
4,0.73,1,7,1,11,12,8
5,0.74,1,7,1,11,12,8
6,0.75,1,7,1,11,12,8
7,0.76,1,7,1,11,12,8
8,0.77,1,7,2,10,11,9
9,0.78,1,7,2,10,11,9


In [77]:
# Order rows after values in column "correct" in descending order (high to low)
confusionMatrix_df_1 = confusionMatrix_df_1.sort_values("correct", ascending=False)

In [78]:
# First five rows of ordered 'confusionMatrix_df_1'
confusionMatrix_df_1.head()

Unnamed: 0,similarity value,0/0,0/1,1/0,1/1,correct,mistakes
14,0.83,4,4,3,9,13,7
0,0.69,0,8,0,12,12,8
7,0.76,1,7,1,11,12,8
22,0.833135,4,4,4,8,12,8
21,0.833241,4,4,4,8,12,8


#### 6.2.2 Classification Report

In [79]:
# Make a classification report for each value of span similarity that is in the intervall [0.69, 0.9]
# You get from one value to the next higher value by adding 0.01
similarity_value = 0.69

while similarity_value < 0.9:
    df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= similarity_value)
    df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))
    
    print(f"Similarity value: {similarity_value}\n")
    print(classification_report(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))
    print("------------------------------------------------------------\n")
    
    similarity_value += 0.01

Similarity value: 0.69

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.60      1.00      0.75        12

    accuracy                           0.60        20
   macro avg       0.30      0.50      0.37        20
weighted avg       0.36      0.60      0.45        20

------------------------------------------------------------

Similarity value: 0.7

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.58      0.92      0.71        12

    accuracy                           0.55        20
   macro avg       0.29      0.46      0.35        20
weighted avg       0.35      0.55      0.43        20

------------------------------------------------------------

Similarity value: 0.71

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.58      0.92      0.7

  _warn_prf(average, modifier, msg_start, len(result))



              precision    recall  f1-score   support

           0       0.33      0.12      0.18         8
           1       0.59      0.83      0.69        12

    accuracy                           0.55        20
   macro avg       0.46      0.48      0.44        20
weighted avg       0.49      0.55      0.49        20

------------------------------------------------------------

Similarity value: 0.78

              precision    recall  f1-score   support

           0       0.33      0.12      0.18         8
           1       0.59      0.83      0.69        12

    accuracy                           0.55        20
   macro avg       0.46      0.48      0.44        20
weighted avg       0.49      0.55      0.49        20

------------------------------------------------------------

Similarity value: 0.79

              precision    recall  f1-score   support

           0       0.50      0.25      0.33         8
           1       0.62      0.83      0.71        12

    accur

In [80]:
# Classification report when threshold is average span similarity of disagreement pairs (pairs labeled as "1")
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= df_sorted_1["span similarity"].mean())
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))

print(f"Similarity value: {df_sorted_1['span similarity'].mean()}\n")
print(classification_report(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))

Similarity value: 0.8332407027482986

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         8
           1       0.67      0.67      0.67        12

    accuracy                           0.60        20
   macro avg       0.58      0.58      0.58        20
weighted avg       0.60      0.60      0.60        20



In [81]:
# Classification report when threshold is average span similarity of all sentence pairs
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= df_labeled_pairs["span similarity"].mean())
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))

print(f"Similarity value: {df_labeled_pairs['span similarity'].mean()}\n")
print(classification_report(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))

Similarity value: 0.8331354290246964

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         8
           1       0.67      0.67      0.67        12

    accuracy                           0.60        20
   macro avg       0.58      0.58      0.58        20
weighted avg       0.60      0.60      0.60        20



In [82]:
# Classification report when threshold is average span similarity of non disagreement pairs (pairs labeled as "0")
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["span similarity"] >= df_sorted_0["span similarity"].mean())
df_labeled_pairs = df_labeled_pairs.assign(prediction=lambda df: df["prediction"].astype(np.int8))

print(f"Similarity value: {df_sorted_0['span similarity'].mean()}\n")
print(classification_report(df_labeled_pairs["label"], df_labeled_pairs["prediction"]))

Similarity value: 0.8329775184392929

              precision    recall  f1-score   support

           0       0.50      0.50      0.50         8
           1       0.67      0.67      0.67        12

    accuracy                           0.60        20
   macro avg       0.58      0.58      0.58        20
weighted avg       0.60      0.60      0.60        20



In [83]:
# Sort values of classification reports by category; store values in lists
sim_val = [0.69, 0.7, 0.71, 0.72, 0.73, 0.74, 0.75, 0.76, 0.77, 0.78, 0.79, 0.8, 0.81, 0.82, 0.83, 0.84, 0.85, 0.86, 0.87, 0.88, 0.89, 0.8332407027482986, 0.8331354290246964, 0.8329775184392929]

prec0 = [0.00, 0.00, 0.00, 0.00, 0.50, 0.50, 0.50, 0.50, 0.33, 0.33, 0.50, 0.50, 0.50, 0.40, 0.57, 0.44, 0.40, 0.33, 0.43, 0.38, 0.40, 0.50, 0.50, 0.50]

rec0 = [0.00, 0.00, 0.00, 0.00, 0.12, 0.12, 0.12, 0.12, 0.12, 0.12, 0.25, 0.25, 0.25, 0.25, 0.50, 0.50, 0.50, 0.50, 0.75, 0.75, 1.00, 0.50, 0.50, 0.50]

prec1 = [0.60, 0.58, 0.58, 0.58, 0.61, 0.61, 0.61, 0.61, 0.59, 0.59, 0.62, 0.62, 0.62, 0.60, 0.69, 0.64, 0.60, 0.50, 0.67, 0.50, 0.00, 0.67, 0.67, 0.67]

rec1 = [1.00, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92, 0.83, 0.83, 0.83, 0.83, 0.83, 0.75, 0.75, 0.58, 0.50, 0.33, 0.33, 0.17, 0.00, 0.67, 0.67, 0.67]

acc = [0.60, 0.55, 0.55, 0.55, 0.60, 0.60, 0.60, 0.60, 0.55, 0.55, 0.60, 0.60, 0.60, 0.55, 0.65, 0.55, 0.50, 0.40, 0.50, 0.40, 0.40, 0.60, 0.60, 0.60]

In [84]:
# Make dictionary and assign one list of values to one category name: classificationReport_dict_1
classificationReport_dict_1 = {"similarity value": sim_val,
                               "precision 0": prec0,
                               "recall 0": rec0,
                               "precision 1": prec1,
                               "recall 1": rec1,
                               "accuracy": acc}

In [85]:
# Make dataframe from dictionary 'classificationReport_dict_1': classificationReport_df_1
classificationReport_df_1 = pd.DataFrame(classificationReport_dict_1)

In [86]:
# Column "total": Values are computed by adding values of the columns "precision 0", "recall 0", "precision 1", "recall 1" and "accuracy"
classificationReport_df_1["total"] = classificationReport_df_1["precision 0"] + classificationReport_df_1["recall 0"] + classificationReport_df_1["precision 1"] + classificationReport_df_1["recall 1"] + classificationReport_df_1["accuracy"]

In [87]:
# Order rows of 'classificationReport_df_1' by values in column "total"
# values are ordered from high to low (descending order)
classificationReport_df_1 = classificationReport_df_1.sort_values("total", ascending=False)
classificationReport_df_1

Unnamed: 0,similarity value,precision 0,recall 0,precision 1,recall 1,accuracy,total
14,0.83,0.57,0.5,0.69,0.75,0.65,3.16
23,0.832978,0.5,0.5,0.67,0.67,0.6,2.94
22,0.833135,0.5,0.5,0.67,0.67,0.6,2.94
21,0.833241,0.5,0.5,0.67,0.67,0.6,2.94
10,0.79,0.5,0.25,0.62,0.83,0.6,2.8
11,0.8,0.5,0.25,0.62,0.83,0.6,2.8
12,0.81,0.5,0.25,0.62,0.83,0.6,2.8
7,0.76,0.5,0.12,0.61,0.92,0.6,2.75
5,0.74,0.5,0.12,0.61,0.92,0.6,2.75
4,0.73,0.5,0.12,0.61,0.92,0.6,2.75


### 6.3 Display Disagreement Pairs

In [88]:
# Define a function that creates pairs of disagreeing sentences and stores each pair in a list: disagreement_pairs()
def disagreement_pairs(list_of_affirmative_sentences, list_of_negated_sentences):
    
    # List of pairs of disagreeing sentences: pairs_of_disagreeing_sentences
    # This list shall be returned be the function
    pairs_of_disagreeing_sentences = []
    
    # 1. Loop ("outer loop"): Iterate over all Doc-objects in 'list_of_affirmative_sentences'
    for doc in list_of_affirmative_sentences:
        # Slice Doc-object into Span-object with the help of 'span_matcher': span1
        for match_id, start, end in span_matcher(doc):
            span1 = doc[end:]
         
        # 2. Loop ("inner loop"): Iterate over all Doc-objects in 'list_of_negated_sentences'
        for doc_neg in list_of_negated_sentences:
            # Slice Doc-object into Span-object with the help of 'span_matcher': span2 
            for match_id, start, end in span_matcher(doc_neg):
                span2 = doc_neg[end + 1:]
            
            # If Span-object 1 and Span-object 2 have a certain degree of similarity
            # then make a pair of the corresponding sentences and add the pair to 
            # the list 'pairs_of_disagreeing_sentences'
            if span1.similarity(span2) >= 0.83:
                pairs_of_disagreeing_sentences.append((doc, doc_neg))
    
    # Return the list of pairs which sentences show disagreement
    return pairs_of_disagreeing_sentences

In [89]:
# Make a list of pairs of disagreeing sentences from 'sents' and 'negated_sents': disagreementPairs
disagreementPairs = disagreement_pairs(sents, negated_sents)

In [90]:
# Show pairs of disagreeing sentences with highlighted entities
for (doc, doc_neg) in disagreementPairs:
    print("==========\n")
    print("(PRO)\n")
    displacy.render(doc, style="ent", jupyter=True)
    print("\n(CON)\n")
    displacy.render(doc_neg, style="ent", jupyter=True)
    print("\n")


(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)






(PRO)




(CON)





