In [188]:
import mammoth
from bs4 import BeautifulSoup
from pprint import pprint
import pydot
from IPython.display import Image, display
from os import listdir
from os.path import isfile, join
from deepmerge import always_merger
import stanfordnlp

In [189]:
stanfordnlp.download('en') 

Using the default treebank "en_ewt" for language "en".
Would you like to download the models for: en_ewt now? (Y/n)


 Y



Default download directory: /Users/jackyho/stanfordnlp_resources
Hit enter to continue or type an alternate directory.


 



Downloading models for: en_ewt
Download location: /Users/jackyho/stanfordnlp_resources/en_ewt_models.zip


100%|██████████| 235M/235M [01:11<00:00, 3.28MB/s] 



Download complete.  Models saved to: /Users/jackyho/stanfordnlp_resources/en_ewt_models.zip
Extracting models file for: en_ewt
Cleaning up...Done.


In [69]:
def nested_list_to_dict(ul):
    level = {}
    lis = ul.findChildren('li' , recursive=False)
    
    for li in lis:
        child_ul = li.find('ul')
        if child_ul == None:
            level[li.next_element] = {}
        else:
            level[li.next_element] = nested_list_to_dict(child_ul)
            
    return level

def file_to_html(file_path):
    f = open(file_path, 'rb')
    document = mammoth.convert_to_html(f)
    return BeautifulSoup(document.value.encode('utf8'))

def doc_to_dict(file_path):
    html = file_to_html(file_path)
    start = html.find('h1', string='call taker')
    event = {}
    current_sit = None

    for element in start.next_elements:
        tag = element.name
        level = {}

        if tag == 'h1':
            break
        elif tag == 'h2':
            current_situation = element.text
            event[current_situation] = {}
        elif tag == 'p':
            next_next_element = element.next_element.next_element
            if next_next_element.name == 'ul':
                event[current_situation][element.text] = nested_list_to_dict(next_next_element.find('ul'))
            else:
                event[current_situation][element.text] = {}

    return event

In [70]:
sop = doc_to_dict("./data/A-ANIMAL.docx")

In [71]:
pprint(sop)

{'Animal bites – Just occurred or with a time delay': {'Advise BCEHS as required': {},
                                                       'Refer to Animal Control': {}},
 'Animal left in a vehicle': {'If animal is in imminent distress': {'Transfer caller to Fire': {}},
                              'In all other instances': {'Refer to Animal Control': {}}},
 'Deceased animals': {' Large animal': {'On highways': {'Contact the Department of Highways': {}},
                                        'On municipal property': {'Refer to Public Works Yard': {}}},
                      'Small animal': {'Refer to Animal Control': {}}},
 'Dogs at large': {'If running at large and attacking or viciously pursuing a person or domestic animal or livestock': {'Create a call': {}},
                   'In all other cases ': {'Refer to Animal Control': {}}},
 'Dogs barking': {'Refer to Animal Control': {}},
 'Livestock at large': {'Create a call': {'Can the animal be contained?': {},
                 

In [72]:
def draw(graph, parent_name, child_name):
    edge = pydot.Edge(parent_name, child_name)
    graph.add_edge(edge)

def visit(graph, node, parent=None):
    for k,v in node.items():
        if parent:
            draw(graph, parent, k)
        visit(graph, v, k)

def dict_to_graph(sop_dict):
    graph = pydot.Dot(graph_type='graph', rankdir='LR')
    visit(graph, sop_dict)
    
    return graph

In [73]:
graph = dict_to_graph(sop)

In [74]:
graph.write_png('example1_graph.png')

In [75]:
def docs_to_dict(dir_path):
    files = [f for f in listdir(dir_path) if isfile(join(dir_path, f))]
    dir_dict = {}
    
    for file_path in files:
        file_dict = doc_to_dict(dir_path + '/' + file_path)
        dir_dict = always_merger.merge(dir_dict, file_dict)

    return dir_dict

In [76]:
big_dict = docs_to_dict('./data')

In [77]:
graph = dict_to_graph(big_dict)

In [78]:
graph.write_png('example2_graph.png')

In [190]:
nlp = stanfordnlp.Pipeline()

Use device: cpu
---
Loading: tokenize
With settings: 
{'model_path': '/Users/jackyho/stanfordnlp_resources/en_ewt_models/en_ewt_tokenizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: pos
With settings: 
{'model_path': '/Users/jackyho/stanfordnlp_resources/en_ewt_models/en_ewt_tagger.pt', 'pretrain_path': '/Users/jackyho/stanfordnlp_resources/en_ewt_models/en_ewt.pretrain.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
---
Loading: lemma
With settings: 
{'model_path': '/Users/jackyho/stanfordnlp_resources/en_ewt_models/en_ewt_lemmatizer.pt', 'lang': 'en', 'shorthand': 'en_ewt', 'mode': 'predict'}
Building an attentional Seq2Seq model...
Using a Bi-LSTM encoder
Using soft attention for LSTM.
Finetune all embeddings.
[Running seq2seq lemmatizer with edit classifier]
---
Loading: depparse
With settings: 
{'model_path': '/Users/jackyho/stanfordnlp_resources/en_ewt_models/en_ewt_parser.pt', 'pretrain_path': '/Users/jackyho/stanfordnlp_resources/en_ew

In [191]:
sop['Deceased animals']

{'Small animal': {'Refer to Animal Control': {}},
 ' Large animal': {'On municipal property': {'Refer to Public Works Yard': {}},
  'On highways': {'Contact the Department of Highways': {}}}}

In [192]:
def flatten_sop(sop_dict):
    new_sop_dict = {}
    
    for key, value in sop_dict.items(): 
        new_sop_dict[key] = merge_dict_conditions(value)
    
    return new_sop_dict

In [223]:
def merge_dict_conditions(sop_dict, previous_levels = []):
    new_sop_dict = {}
    
    for key, value in sop_dict.items():
        doc = nlp(str(key))
        
        print(key)
        print(doc.sentences[0].words[0].upos)

        if doc.sentences[0].words[0].upos == 'VERB':
            if len(previous_levels) == 0:
                new_sop_dict[key] = value
            else:
                for level in previous_levels:
                    new_sop_dict[level] = {}
                    new_sop_dict[level][key] = value
        else:
            flattened_levels = []
            if len(previous_levels) == 0:
                flattened_levels.append(key)
            else:
                flattened_levels += list(map(lambda x: x + ' - ' + key, previous_levels))
            
            new_dict = merge_dict_conditions(value, flattened_levels)
            new_sop_dict = always_merger.merge(new_sop_dict, new_dict)   
            
    return new_sop_dict

In [224]:
sop['Animal left in a vehicle']

{'If animal is in imminent distress': {'Transfer caller to Fire': {}},
 'In all other instances': {'Refer to Animal Control': {}}}

In [225]:
merge_dict_conditions(sop['Animal left in a vehicle'])

If animal is in imminent distress
SCONJ
Transfer caller to Fire
NOUN
In all other instances
ADP
Refer to Animal Control
VERB




{'In all other instances': {'Refer to Animal Control': {}}}

In [221]:
flat_sop

{'In all other instances': {'Refer to Animal Control': {}}}

In [207]:
dict_to_graph(flat_sop).write_png('example1b_graph.png')

In [210]:
pprint(sop)

{'Animal bites – Just occurred or with a time delay': {'Advise BCEHS as required': {},
                                                       'Refer to Animal Control': {}},
 'Animal left in a vehicle': {'If animal is in imminent distress': {'Transfer caller to Fire': {}},
                              'In all other instances': {'Refer to Animal Control': {}}},
 'Deceased animals': {' Large animal': {'On highways': {'Contact the Department of Highways': {}},
                                        'On municipal property': {'Refer to Public Works Yard': {}}},
                      'Small animal': {'Refer to Animal Control': {}}},
 'Dogs at large': {'If running at large and attacking or viciously pursuing a person or domestic animal or livestock': {'Create a call': {}},
                   'In all other cases ': {'Refer to Animal Control': {}}},
 'Dogs barking': {'Refer to Animal Control': {}},
 'Livestock at large': {'Create a call': {'Can the animal be contained?': {},
                 

In [211]:
pprint(flat_sop)

{'Animal bites – Just occurred or with a time delay': {'Advise BCEHS as required': {},
                                                       'Refer to Animal Control': {}},
 'Animal left in a vehicle': {'In all other instances': {'Refer to Animal Control': {}}},
 'Deceased animals': {' Large animal - On highways': {'Contact the Department of Highways': {}},
                      ' Large animal - On municipal property': {'Refer to Public Works Yard': {}},
                      'Small animal': {'Refer to Animal Control': {}}},
 'Dogs at large': {'If running at large and attacking or viciously pursuing a person or domestic animal or livestock': {'Create a call': {}},
                   'In all other cases ': {'Refer to Animal Control': {}}},
 'Dogs barking': {'Refer to Animal Control': {}},
 'Livestock at large': {'Create a call': {'Can the animal be contained?': {},
                                          'Description': {},
                                          'How many animals a