# Triple Extraction using stanford's OpenIE

In [None]:
from openie import StanfordOpenIE # first get torch from pytorch.org/get-started/locally
import graphviz

import os
import tempfile
from subprocess import Popen
from sys import stderr
print('imports done')

In [None]:
new_keys = {'subject':'S', 'relation':'P', 'object':'O'}
def change_triple_keys(triple):
    return dict((new_keys[key], value) for (key, value) in triple.items())

In [None]:
def generate_text_graph(triples: dict = None, png_filename: str = './out/graph.png'):
    
    graph = list()
    graph.append('digraph {')
    for er in triples:
        graph.append('"{}" -> "{}" [ label="{}" ];'.format(er['subject'].lower(), er['object'].lower(), er['relation'].lower()))
    graph.append('}')
    
    output_dir = os.path.join('.', os.path.dirname(png_filename))
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    out_dot = os.path.join(tempfile.gettempdir(), 'graph.dot')
    with open(out_dot, 'w') as output_file:
        output_file.writelines(graph)

    command = 'dot -Tpng {} -o {}'.format(out_dot, png_filename)
    dot_process = Popen(command, stdout=stderr, shell=True)
    dot_process.wait()
    assert not dot_process.returncode, 'ERROR: Call to dot exited with a non-zero code status.'
    

In [None]:
sample_sentences = ["The corona vaccine contains dangerous chemicals",
                    "The corona vaccine contains only safe ingredients and is harmless",
                    "The corona vaccine contains a compound called Formaldehyde"]

with StanfordOpenIE() as client:
    for sentence in sample_sentences:
        print('\n__ %s.' % sentence)
        for triple in client.annotate(sentence):
            print('|-', change_triple_keys(triple))

In [None]:
txt_path = os.path.join('sources', 'article_corona_vaccine.txt')
extracted_triples = list()
with StanfordOpenIE() as client:
    with open(txt_path, 'r', encoding='utf-8') as r:
        text = r.read().replace('\n', ' ').replace('\r', '')
        extracted_triples = client.annotate(text, simple_format=True)
        print('\n',text[:500:],'\n', ) # to check the formatting
        print('Found %s triples in the corpus.' % len(extracted_triples))

In [None]:
graph_image = 'graph.png'
#generate_text_graph(extracted_triples, graph_image)

In [None]:
with StanfordOpenIE() as client:
    text = "Python is a programming language designed for readability"
    graph_image = 'graph.png'
    #client.generate_graphviz_graph(text, graph_image)

In [None]:
dot = graphviz.Digraph(comment = 'testgraph')
dot.edge('A','B')
dot.view()

### TODO Properly install graphviz please