In [1]:
from stanfordcorenlp import StanfordCoreNLP
from pyopenie import OpenIE5
import textacy
import json
import pprint
import pandas as pd

In [2]:
stanford = StanfordCoreNLP(r'C:\Users\aandi\Documents\Uni\Final Year\FYP Code\stanford-corenlp-4.2.0')
stanford_props={'annotators': 'openie', 'pipelineLanguage': 'en', 'outputFormat': 'json'}
iit = OpenIE5('http://localhost:8000')

simple_sentences = [
    "The Simpsons predicted the epidemic.",
    "Bill Gates predicted and simulated the COVID-19 pandemic.",
    "Coronavirus was spread via bananas",
    "Drinking alcohol can help fight coronavirus.",
    "Steam therapy cures the coronavirus."
]

complex_sentences = [
    "A publication claims that Bill and Melinda Gates didn’t vaccine their kids.",
    "Bill Gates said that the Catholic religion may have to be suspended forever because of the COVID-19 pandemic.",
    "The Centers for Disease and Prevention has reduced the number of COVID-19 cases recorded in the U.S.",
    "After announcing social isolation measures, governor of São Paulo, João Doria, appears at a party in the interior of the state.",
    "Trupti Desai, a well known social activist from India, was arrested for illegally buying liquor during the COVID-19 lockdown."
]

In [10]:
def openie_extractor(sentence):
    spos = []
    try:
        annotation = json.loads(stanford.annotate(sentence, properties=stanford_props))["sentences"]
    except json.decoder.JSONDecodeError as e:
        print(e.msg)
        return []
    for sent in annotation:
        for openie in sent['openie']:
            spo = {
                    "subject": openie['subject'],
                    "relation": openie['relation'],
                    "object": openie['object']
            }
            spos.append(spo)
    return [spos]

openie_simple_spos = [spos for sent in simple_sentences for spos in openie_extractor(sent)]
openie_complex_spos = [spos for sent in complex_sentences for spos in openie_extractor(sent)]

In [11]:
pprint.pprint(openie_simple_spos)
pprint.pprint(openie_complex_spos)

[[{'object': 'epidemic', 'relation': 'predicted', 'subject': 'Simpsons'}],
 [{'object': 'COVID 19 pandemic',
   'relation': 'predicted',
   'subject': 'Bill Gates'}],
 [{'object': 'spread', 'relation': 'was', 'subject': 'Coronavirus'},
  {'object': 'bananas',
   'relation': 'was spread via',
   'subject': 'Coronavirus'}],
 [],
 [{'object': 'coronavirus', 'relation': 'cures', 'subject': 'Steam therapy'}]]
[[],
 [],
 [{'object': 'number recorded',
   'relation': 'has reduced',
   'subject': 'Centers'},
  {'object': 'number', 'relation': 'has reduced', 'subject': 'Centers'},
  {'object': 'number of COVID 19 cases recorded',
   'relation': 'has reduced',
   'subject': 'Centers'},
  {'object': 'number of COVID 19 cases',
   'relation': 'has reduced',
   'subject': 'Centers'},
  {'object': 'number of COVID 19 cases recorded in U.S.',
   'relation': 'has reduced',
   'subject': 'Centers'},
  {'object': 'number recorded in U.S.',
   'relation': 'has reduced',
   'subject': 'Centers'},
  {'obje

In [12]:
def textacy_extractor(sentence):
    spos = []
    doc = textacy.make_spacy_doc(sentence, lang="en")
    svos = textacy.extract.subject_verb_object_triples(doc)
    for svo in svos:
        spo = {
            "subject": svo[0],
            "relation": svo[1],
            "object": svo[2]
        }
        spos.append(spo)
    return [spos]

textacy_simple_spos = [spos for sent in simple_sentences for spos in textacy_extractor(sent)]
textacy_complex_spos = [spos for sent in complex_sentences for spos in textacy_extractor(sent)]


In [13]:
pprint.pprint(textacy_simple_spos)
pprint.pprint(textacy_complex_spos)

[[{'object': epidemic, 'relation': predicted, 'subject': Simpsons}],
 [],
 [],
 [{'object': fight, 'relation': can help, 'subject': Drinking alcohol}],
 [{'object': coronavirus, 'relation': cures, 'subject': Steam therapy}]]
[[{'object': kids, 'relation': didn’t vaccine, 'subject': Gates}],
 [],
 [{'object': number, 'relation': has reduced, 'subject': Centers}],
 [],
 []]


In [17]:
def iit_extractor(sentence):
    spos = []
    extractions = iit.extract(sentence)
    for extraction in extractions:
        spo = {
            "subject": extraction['extraction']['arg1']['text'],
            "relation": extraction['extraction']['rel']['text'],
            "object": [obj['text'] for obj in extraction['extraction']['arg2s']]
        }
        spos.append(spo)
    return [spos]

In [18]:
iit_simple_spos = [spos for sent in simple_sentences for spos in iit_extractor(sent)]
iit_complex_spos = [spos for sent in complex_sentences for spos in iit_extractor(sent)]

In [19]:
pprint.pprint(iit_simple_spos)
pprint.pprint(iit_complex_spos)

[[{'object': ['the epidemic'],
   'relation': 'predicted',
   'subject': 'The Simpsons'}],
 [{'object': ['19 pandemic'], 'relation': 'simulated', 'subject': 'Bill'},
  {'object': ['the COVID - 19 pandemic'],
   'relation': 'simulated',
   'subject': 'Bill'},
  {'object': [], 'relation': 'predicted', 'subject': 'Bill Gates'}],
 [{'object': [], 'relation': 'was spread', 'subject': 'Coronavirus'}],
 [{'object': ['fight coronavirus'],
   'relation': 'can help',
   'subject': 'Drinking alcohol'}],
 [{'object': ['the coronavirus'],
   'relation': 'cures',
   'subject': 'Steam therapy'}]]
[[{'object': ['their kids'],
   'relation': "did n't vaccine",
   'subject': 'Melinda Gates'},
  {'object': ["that Melinda Gates did n't vaccine their kids"],
   'relation': 'claims',
   'subject': 'A publication'},
  {'object': ['their kids'],
   'relation': "did n't vaccine",
   'subject': 'Bill Gates'},
  {'object': ["that Bill Gates did n't vaccine their kids"],
   'relation': 'claims',
   'subject': 'A 

In [22]:
my_spos = [
    [
        {"subject": "The Simpsons", "relation": "predicted", "object": "the epidemic"}
    ],
    [
        {"subject": "Bill Gates", "relation": "predicted", "object": "the COVID-19 pandemic"},
        {"subject": "Bill Gates", "relation": "simulated", "object": "the COVID-19 pandemic"}
    ],
    [
        {"subject": "Coronavirus", "relation": "was", "object": "spread"},
        {"subject": "Coronavirus", "relation": "was spread via", "object": "bananas"}
    ],
    [
        {"subject": "Drinking alcohol", "relation": "can help", "object": "fight Coronavirus"},
        {"subject": "Drinking alcohol", "relation": "can help fight", "object": "Coronavirus"},
    ],
    [
        {"subject": "Steam therapy", "relation": "cures", "object": "Coronavirus"}
    ],
    [
        {"subject": "publication", "relation": "claims", "object": "Bill and Melinda Gates didn’t vaccine their kids"},
        {"subject": "Bill and Melinda Gates", "relation": "didn't vaccine", "object": "their kids"},
        {"subject": "Bill", "relation": "didn't vaccine", "object": "their kids"},
        {"subject": "Melinda Gates", "relation": "didn't vaccine", "object": "their kids"},
    ],
    [
        {"subject": "Bill Gates", "relation": "said", "object": "the Catholic religion may have to be suspended forever"},
        {"subject": "Catholic religion", "relation": "may have to be", "object": "suspended forever"}
    ],
    [
        {"subject": "Centers for Disease and Prevention", "relation": "has reduced", "object": "the number of COVID-19 cases recorded in the U.S"},
    ],
    [
        {"subject": "governor of São Paolo", "relation": "appears at", "object": "a party in the interior of the state"},
        {"subject": "João Doria", "relation": "appears at", "object": "a party in the interior of the state"},
        {"subject": "João Doria", "relation": "governor", "object": "São Paolo"},
        {"subject": "João Doria", "relation": "is", "object": "governor of São Paolo"}
    ],
    [
        {"subject": "Trupti Desai", "relation": "social activist from", "object": "India"},
        {"subject": "Trupti Desai", "relation": "is", "object": "social activist from India"},
        {"subject": "Trupti Desai", "relation": "was", "object": "arrested for illegally buying liquor"},
        {"subject": "Trupti Desai", "relation": "arrested", "object": "for illegally buying liquor"},
    ],
]

In [23]:
df = pd.DataFrame({'text': simple_sentences + complex_sentences})
df['openie'] = openie_simple_spos + openie_complex_spos
df['textacy'] = textacy_simple_spos + textacy_complex_spos
df['iit'] = iit_simple_spos + iit_complex_spos
df['my own'] = my_spos

In [21]:
pd.set_option('display.max_colwidth', -1)
display(df)

  pd.set_option('display.max_colwidth', -1)


Unnamed: 0,text,openie,textacy,iit,my own
0,The Simpsons predicted the epidemic.,"[{'subject': 'Simpsons', 'relation': 'predicted', 'object': 'epidemic'}]","[{'subject': (Simpsons), 'relation': (predicted), 'object': (epidemic)}]","[{'subject': 'The Simpsons', 'relation': 'predicted', 'object': ['the epidemic']}]","[{'subject': 'The Simpsons', 'relation': 'predicted', 'object': 'the epidemic'}]"
1,Bill Gates predicted and simulated the COVID-19 pandemic.,"[{'subject': 'Bill Gates', 'relation': 'predicted', 'object': 'COVID 19 pandemic'}]",[],"[{'subject': 'Bill', 'relation': 'simulated', 'object': ['19 pandemic']}, {'subject': 'Bill', 'relation': 'simulated', 'object': ['the COVID - 19 pandemic']}, {'subject': 'Bill Gates', 'relation': 'predicted', 'object': []}]","[{'subject': 'Bill Gates', 'relation': 'predicted', 'object': 'the COVID-19 pandemic'}, {'subject': 'Bill Gates', 'relation': 'simulated', 'object': 'the COVID-19 pandemic'}]"
2,Coronavirus was spread via bananas,"[{'subject': 'Coronavirus', 'relation': 'was', 'object': 'spread'}, {'subject': 'Coronavirus', 'relation': 'was spread via', 'object': 'bananas'}]",[],"[{'subject': 'Coronavirus', 'relation': 'was spread', 'object': []}]","[{'subject': 'Coronavirus', 'relation': 'was', 'object': 'spread'}, {'subject': 'Coronavirus', 'relation': 'was spread via', 'object': 'bananas'}]"
3,Drinking alcohol can help fight coronavirus.,[],"[{'subject': (Drinking, alcohol), 'relation': (can, help), 'object': (fight)}]","[{'subject': 'Drinking alcohol', 'relation': 'can help', 'object': ['fight coronavirus']}]","[{'subject': 'Drinking alcohol', 'relation': 'can help', 'object': 'fight Coronavirus'}, {'subject': 'Drinking alcohol', 'relation': 'can help fight', 'object': 'Coronavirus'}]"
4,Steam therapy cures the coronavirus.,"[{'subject': 'Steam therapy', 'relation': 'cures', 'object': 'coronavirus'}]","[{'subject': (Steam, therapy), 'relation': (cures), 'object': (coronavirus)}]","[{'subject': 'Steam therapy', 'relation': 'cures', 'object': ['the coronavirus']}]","[{'subject': 'Steam therapy', 'relation': 'cures', 'object': 'fight Coronavirus'}]"
5,A publication claims that Bill and Melinda Gates didn’t vaccine their kids.,[],"[{'subject': (Gates), 'relation': (did, n’t, vaccine), 'object': (kids)}]","[{'subject': 'Melinda Gates', 'relation': 'did n't vaccine', 'object': ['their kids']}, {'subject': 'A publication', 'relation': 'claims', 'object': ['that Melinda Gates did n't vaccine their kids']}, {'subject': 'Bill Gates', 'relation': 'did n't vaccine', 'object': ['their kids']}, {'subject': 'A publication', 'relation': 'claims', 'object': ['that Bill Gates did n't vaccine their kids']}]","[{'subject': 'publication', 'relation': 'claims', 'object': 'Bill and Melinda Gates didn’t vaccine their kids'}, {'subject': 'Bill and Melinda Gates', 'relation': 'didn't vaccine', 'object': 'their kids'}, {'subject': 'Bill', 'relation': 'didn't vaccine', 'object': 'their kids'}, {'subject': 'Melinda Gates', 'relation': 'didn't vaccine', 'object': 'their kids'}]"
6,Bill Gates said that the Catholic religion may have to be suspended forever because of the COVID-19 pandemic.,[],[],"[{'subject': 'Bill Gates', 'relation': 'said', 'object': ['that the Catholic religion may have to be suspended forever because of the COVID-19 pandemic']}, {'subject': 'the Catholic religion', 'relation': 'to be suspended', 'object': ['forever', 'because of the COVID-19 pandemic']}]","[{'subject': 'Bill Gates', 'relation': 'said', 'object': 'the Catholic religion may have to be suspended forever'}, {'subject': 'Catholic religion', 'relation': 'may have to be', 'object': 'suspended forever'}]"
7,The Centers for Disease and Prevention has reduced the number of COVID-19 cases recorded in the U.S.,"[{'subject': 'Centers', 'relation': 'has reduced', 'object': 'number recorded'}, {'subject': 'Centers', 'relation': 'has reduced', 'object': 'number'}, {'subject': 'Centers', 'relation': 'has reduced', 'object': 'number of COVID 19 cases recorded'}, {'subject': 'Centers', 'relation': 'has reduced', 'object': 'number of COVID 19 cases'}, {'subject': 'Centers', 'relation': 'has reduced', 'object': 'number of COVID 19 cases recorded in U.S.'}, {'subject': 'Centers', 'relation': 'has reduced', 'object': 'number recorded in U.S.'}, {'subject': 'number', 'relation': 'Centers for', 'object': 'Disease'}]","[{'subject': (Centers), 'relation': (has, reduced), 'object': (number)}]","[{'subject': 'The Centers for Prevention', 'relation': 'has reduced', 'object': ['the number of COVID - 19 cases']}, {'subject': 'COVID - 19 cases', 'relation': 'recorded', 'object': ['in the U.S.']}, {'subject': 'The Centers for Disease', 'relation': 'has reduced', 'object': ['the number of COVID - 19 cases']}]","[{'subject': 'Centers for Disease and Prevention', 'relation': 'has reduced', 'object': 'the number of COVID-19 cases recorded in the U.S'}]"
8,"After announcing social isolation measures, governor of São Paulo, João Doria, appears at a party in the interior of the state.","[{'subject': 'governor', 'relation': 'appears at', 'object': 'party in interior'}, {'subject': 'governor', 'relation': 'appears at', 'object': 'party in interior of state'}, {'subject': 'governor', 'relation': 'appears at', 'object': 'party'}, {'subject': 'governor', 'relation': 'appears After', 'object': 'announcing isolation measures'}, {'subject': 'governor', 'relation': 'appears After', 'object': 'announcing social isolation measures'}, {'subject': 'party', 'relation': 'is in', 'object': 'interior of state'}, {'subject': 'governor', 'relation': 'appears After', 'object': 'social isolation measures'}, {'subject': 'governor', 'relation': 'appears After', 'object': 'isolation measures'}]",[],"[{'subject': 'governor of São Paulo', 'relation': 'appears', 'object': ['at a party in the interior of the state', 'After announcing social isolation measures']}]","[{'subject': 'governor of São Paolo', 'relation': 'appears at', 'object': 'a party in the interior of the state'}, {'subject': 'João Doria', 'relation': 'appears at', 'object': 'a party in the interior of the state'}, {'subject': 'João Doria', 'relation': 'governor', 'object': 'São Paolo'}, {'subject': 'João Doria', 'relation': 'is', 'object': 'governor of São Paolo'}]"
9,"Trupti Desai, a well known social activist from India, was arrested for illegally buying liquor during the COVID-19 lockdown.","[{'subject': 'Trupti Desai', 'relation': 'activist from', 'object': 'India'}, {'subject': 'Trupti Desai', 'relation': 'was', 'object': 'arrested'}, {'subject': 'Trupti Desai', 'relation': 'was', 'object': 'arrested for buying liquor during COVID 19 lockdown'}, {'subject': 'Trupti Desai', 'relation': 'was arrested for', 'object': 'liquor'}, {'subject': 'Trupti Desai', 'relation': 'was arrested for', 'object': 'illegally buying liquor'}, {'subject': 'Trupti Desai', 'relation': 'was arrested for', 'object': 'buying liquor'}, {'subject': 'Trupti Desai', 'relation': 'was arrested during', 'object': 'COVID 19 lockdown'}, {'subject': 'Trupti Desai', 'relation': 'was', 'object': 'arrested for liquor during COVID 19 lockdown'}, {'subject': 'Trupti Desai', 'relation': 'was', 'object': 'arrested for illegally buying liquor during COVID 19 lockdown'}]",[],"[{'subject': 'Trupti Desai', 'relation': 'for illegally buying', 'object': ['liquor', 'during the COVID-19 lockdown']}, {'subject': 'Trupti Desai', 'relation': 'was arrested', 'object': ['for illegally buying liquor during the COVID-19 lockdown']}]","[{'subject': 'Trupti Desai', 'relation': 'social activist from', 'object': 'India'}, {'subject': 'Trupti Desai', 'relation': 'is', 'object': 'social activist from India'}, {'subject': 'Trupti Desai', 'relation': 'was', 'object': 'arrested for illegally buying liquor'}, {'subject': 'Trupti Desai', 'relation': 'arrested', 'object': 'for illegally buying liquor'}]"
