In [1]:
def read_file(file_name):
    with open(file_name, 'r') as file:
        return file.read()

In [2]:
import spacy

nlp = spacy.load('en_core_web_lg')

# Process `text` with Spacy NLP Parser
text = read_file('/Users/anirudhprabhu/PycharmProjects/novelWriter/Skeleton/docs/books_txt/Fantasy/465179.txt')

processed_text = nlp(text)
print(processed_text)

﻿
The Gamers:
Dorkness Rising
(The Novel)
Diana Brown
Adapted from the screenplay by
Matt Vancil
Published at Smashwords
The Gamers: Dorkness Rising is the creation of Dead Gentlemen Productions, and is distributed by Zombie Orpheus Entertainment under a Creative Commons license (Attribution-NonCommercial-NoDerivs 3.0 Unported). The Gamers: Dorkness Rising (The Novel) inherits and is distributed under these terms.
Nodwick appears in The Gamers: Dorkness Rising courtesy of Aaron Williams and Do Gooder Press 
Contents
Chapter 1: Dungeon Crawling
Chapter 2: The Gamers
Chapter 3: Alone In The Dark
Chapter 4: Aarrrbuck’s
Chapter 5: A Matter Of Characters
Chapter 6: The Adventure Begins
Chapter 7: Meditating At The Temple Of The Moon
Chapter 8: Mudhollow Inne
Chapter 9: The Road To Westhaven
Chapter 10: Evolving Naturally
Chapter 11: The ‘Real’ World?
Chapter 12: Another Friday Night
Chapter 13: A Scruffy-Looking Peasant
Chapter 14: Westhaven
Chapter 15: Mort Kemnon’s Secret Cave HQ 
Chapter

In [3]:
from __future__ import unicode_literals


class Parse(object):
    def __init__(self, nlp, text, collapse_punctuation, collapse_phrases):
        self.doc = nlp(text)
        if collapse_punctuation:
            spans = []
            for word in self.doc[:-1]:
                if word.is_punct:
                    continue
                if not word.nbor(1).is_punct:
                    continue
                start = word.i
                end = word.i + 1
                while end < len(self.doc) and self.doc[end].is_punct:
                    end += 1
                span = self.doc[start : end]
                spans.append(
                    (span.start_char, span.end_char,
                     {'tag': word.tag_, 'lemma': word.lemma_, 'ent_type': word.ent_type_})
                )
            for start, end, attrs in spans:
                self.doc.merge(start, end, **attrs)

        if collapse_phrases:
            for np in list(self.doc.noun_chunks):
                np.merge(tag=np.root.tag_, lemma=np.root.lemma_, ent_type=np.root.ent_type_)

    def to_json(self):
        words = [{'text': w.text, 'tag': w.tag_} for w in self.doc]
        arcs = []
        for word in self.doc:
            if word.i < word.head.i:
                arcs.append(
                    {
                        'start': word.i,
                        'end': word.head.i,
                        'label': word.dep_,
                        'dir': 'left'
                    })
            elif word.i > word.head.i:
                arcs.append(
                    {
                        'start': word.head.i,
                        'end': word.i,
                        'label': word.dep_,
                        'dir': 'right'
                    })
        return {'words': words, 'arcs': arcs}


class Entities(object):
    def __init__(self, nlp, text):
        self.doc = nlp(text)
     
    def to_json(self):
        return [{'start': ent.start_char, 'end': ent.end_char, 'type': ent.label_}
                for ent in self.doc.ents]
    
    




In [15]:
from subject_object_extraction import findSVOs

# can still work even without punctuation
parse = nlp("he and his brother shot me and my sister")
print(findSVOs(processed_text))

[('dungeon', 'stalked', 'hallway'), ('men', 'stalked', 'hallway'), ('henchman', 'shifted', 'chest'), ('men', 'thought', '–'), ('henchmen', 'are', 'men'), ('discipline', 'brought', 'mind'), ('he', 'weave', 'way'), ('he', 'examined', 'walls'), ('he', 'examined', 'floor'), ('corridors', 'marked', 'entrance'), ('rennard', 'found', 'end'), ('nothing', 'been', '–'), ('–', 'be', 'trap'), ('–', 'be', 'trap'), ('rennard', 'seized', 'hand'), ('turk', 'turned', 'face'), ('what', 'been', 'disaster'), ('priest', 'do', 'work'), ('hand', 'wrapped', 'size'), ('he', 'examined', 'runes'), ('he', 'invoke', 'hands'), ('he', 'invoke', 'light'), ('it', 'was', '–'), ('he', 'picked', 'it'), ('they', 'were', 'creatures'), ('creatures', 'devour', 'unwary'), ('fighter', 'engaged', 'aggressive'), ('rogue', 'engaged', 'aggressive'), ('him', 'wrap', 'himself'), ('fastidian', 'joined', 'will'), ('he', 'cast', 'glance'), ('thing', 'was', 'someone'), ('he', 'turned', 'attention'), ('it', 'be', 'end'), ('examination', 

In [5]:
# very complex sample. Only some are correct. Some are missed.
docs = nlp("Far out in the uncharted backwaters of the unfashionable end of the Western Spiral arm of the Galaxy lies a small unregarded yellow sun. "
                "Orbiting this at a distance of roughly ninety-two million miles is an utterly insignificant little blue green planet whose ape-descended "
                "life forms are so amazingly primitive that they still think digital watches are a pretty neat idea. "
                "This planet has – or rather had – a problem, which was this: most of the people living on it were unhappy for pretty much of the time. "
                "Many solutions were suggested for this problem, but most of these were largely concerned with the movements of small green pieces of paper, "
                "which is odd because on the whole it wasn’t the small green pieces of paper that were unhappy. And so the problem remained; lots of the "
                "people were mean, and most of them were miserable, even the ones with digital watches.")
#print(findSVOs(parse))

In [11]:
import spacy

nlp = spacy.load('en_core_web_lg')
doc = nlp('Bloomberg announced today that Gordian Capital, a Singapore-based institutional fund management platform, will implement the Bloomberg Entity Exchange solution to help its clients pursue new fund opportunities faster.')

for ent in list(doc.ents):
    print(ent)
    

today
Gordian Capital
Singapore
Bloomberg Entity Exchange


In [10]:
from spacy.lang.en import English
parser = English()

example = u"Donald Trump is the worst president of USA, but Hillary is better than him"
parsedEx = parser(example)
# shown as: original token, dependency tag, head word, left dependents, right dependents
for token in parsedEx:
    print(token.orth_, token.dep_, token.head.orth_, [t.orth_ for t in token.lefts], [t.orth_ for t in token.rights])


Donald  Donald [] []
Trump  Trump [] []
is  is [] []
the  the [] []
worst  worst [] []
president  president [] []
of  of [] []
USA  USA [] []
,  , [] []
but  but [] []
Hillary  Hillary [] []
is  is [] []
better  better [] []
than  than [] []
him  him [] []


In [12]:
import spacy
from textpipeliner import PipelineEngine, Context
from textpipeliner.pipes import *

nlp = spacy.load('en_core_web_lg')
doc = nlp(text)



In [13]:
pipes_structure = [SequencePipe([FindTokensPipe("VERB/nsubj/*"),
                                 NamedEntityFilterPipe(),
                                 NamedEntityExtractorPipe()]),
                   FindTokensPipe("VERB"),
                   AnyPipe([SequencePipe([FindTokensPipe("VBD/dobj/NNP"),
                                          AggregatePipe([NamedEntityFilterPipe("GPE"), 
                                                NamedEntityFilterPipe("PERSON")]),
                                          NamedEntityExtractorPipe()]),
                            SequencePipe([FindTokensPipe("VBD/**/*/pobj/NNP"),
                                          AggregatePipe([NamedEntityFilterPipe("LOC"), 
                                                NamedEntityFilterPipe("PERSON")]),
                                          NamedEntityExtractorPipe()])])]

engine = PipelineEngine(pipes_structure, Context(doc), [0,1,2])
engine.process()


[([Cass], [leveled], [Gary]),
 ([Lodge], [rested], [Cass]),
 ([Cass], [turned], [Gary]),
 ([Lodge], [was], [Cass]),
 ([Joanna], [handed], [Cass]),
 ([Gary], [told], [Lodge]),
 ([Lodge], [looked], [Leo]),
 ([Lodge], [talked], [Cass]),
 ([Daphne], [tried], [Flynn]),
 ([Cass], [said], [Joanna]),
 ([Joanna], [turned], [Gary]),
 ([Joanna], [said], [Gary]),
 ([Lodge], [reminded], [Leo]),
 ([Agrippa], [followed], [Hadoken]),
 ([Cass], [climbed], [Leo]),
 ([, Lodge], [dropped], [Mitch]),
 ([Gary], [leaned], [Leo]),
 ([Leo], [picked], [Gary]),
 ([Gary], [declared], [Sean, Connery]),
 ([Daphne], [killed], [Drazuul]),
 ([Daphne], [glared], [Drazuul]),
 ([Joanna], [looked], [Leo]),
 ([Joanna], [looked], [Leo]),
 ([Gary], [whipped], [Cass]),
 ([Daphne], [asked], [Nodwick]),
 ([, Leo], [turned], [Gary]),
 ([Turk], [hit], [Daphne]),
 ([Lodge], [picked], [Leo]),
 ([Gary], [spotted], [Leo]),
 ([Joanna], [looked], [Cass]),
 ([Cass], [turned], [Gary]),
 ([Daphne], [beckoned], [Flynn]),
 ([Leo], [said], [

In [72]:
pipes_structure = [SequencePipe([FindTokensPipe("VERB/nsubj/NNP"),
                                 NamedEntityFilterPipe(),
                                 NamedEntityExtractorPipe()]),
                       AggregatePipe([FindTokensPipe("VERB"),
                                      FindTokensPipe("VERB/xcomp/VERB/aux/*"),
                                      FindTokensPipe("VERB/xcomp/VERB")]),
                       AnyPipe([FindTokensPipe("VERB/[acomp,amod]/ADJ"),
                                AggregatePipe([FindTokensPipe("VERB/[dobj,attr]/NOUN/det/DET"),
                                               FindTokensPipe("VERB/[dobj,attr]/NOUN/[acomp,amod]/ADJ")])])
                      ]

engine = PipelineEngine(pipes_structure, Context(doc), [0,1,2])
engine.process()

[([Turk], [turned], [a]),
 ([Turk], [were], [wary]),
 ([Cass], [leveled], [an, accusatory]),
 ([Leo], [left], [the]),
 ([Cass], [was], [willing]),
 ([Lodge], [picked], [the]),
 ([Lodge], [considered], [the]),
 ([Lodge], [took], [a]),
 ([Mark], [was], [the, only]),
 ([Mark], [joining], [the]),
 ([Cass], [had], [important]),
 ([Lodge], [was], [able]),
 ([Cass], [noted], [odd]),
 ([Leo], [owned], [own]),
 ([Joann], [entered], [the, back]),
 ([Joanna], [was], [sure]),
 ([Gary], [mimicked], [the]),
 ([Joanna], [resisted], [the]),
 ([Joanna], [handed], [the]),
 ([Joanna], [took], [the]),
 ([Cass], [was], [right]),
 ([Lodge], [stifled], [a]),
 ([Leo], [rolled], [a]),
 ([Cass], [shook], [the]),
 ([Cass], [indicated], [the]),
 ([Joanna], [was], [about]),
 ([Mort, Kemnon], [discovered], [an, evil]),
 ([Daphne], [studied], [the]),
 ([Gary], [raised], [both]),
 ([Gary], [looked], [mystified]),
 ([Daphne], [was], [certain]),
 ([Daphne], [took], [careful]),
 ([Leo], [moaned], [a]),
 ([Osric], [had],

In [56]:
pipes_structure_comp = [SequencePipe([FindTokensPipe("VERB/conj/VERB/nsubj/NNP"),
                                 NamedEntityFilterPipe(),
                                 NamedEntityExtractorPipe()]),
                   AggregatePipe([FindTokensPipe("VERB/conj/VERB"),
                                  FindTokensPipe("VERB/conj/VERB/xcomp/VERB/aux/*"),
                                  FindTokensPipe("VERB/conj/VERB/xcomp/VERB")]),
                   AnyPipe([FindTokensPipe("VERB/conj/VERB/[acomp,amod]/ADJ"),
                            AggregatePipe([FindTokensPipe("VERB/conj/VERB/[dobj,attr]/NOUN/det/DET"),
                                           FindTokensPipe("VERB/conj/VERB/[dobj,attr]/NOUN/[acomp,amod]/ADJ")])])
                  ]

engine2 = PipelineEngine(pipes_structure_comp, Context(doc), [0,1,2])



In [57]:
engine2.process()

[([Daphne], [had], [no]), ([Jack], [sent, was], [clever])]

In [80]:
pipes_structure = [SequencePipe([FindTokensPipe("VERB/nsubj/NNP"),
                                 NamedEntityFilterPipe(),
                                 NamedEntityExtractorPipe()]),
                       AggregatePipe([FindTokensPipe("VERB"),
                                      FindTokensPipe("VERB/xcomp/VERB/aux/*"),
                                      FindTokensPipe("VERB/xcomp/VERB")]),
                       AnyPipe([FindTokensPipe("VERB/[acomp,amod]/ADJ"),
                                AggregatePipe([FindTokensPipe("VERB/[dobj,attr]/NOUN/det/DET"),
                                               FindTokensPipe("VERB/[dobj,attr]/NOUN/[acomp,amod]/ADJ")])])
                      ]

engine = PipelineEngine(pipes_structure, Context(doc), [0,1,2])
engine.process()


[([Turk], [turned], [a]),
 ([Turk], [were], [wary]),
 ([Cass], [leveled], [an, accusatory]),
 ([Leo], [left], [the]),
 ([Cass], [was], [willing]),
 ([Lodge], [picked], [the]),
 ([Lodge], [considered], [the]),
 ([Lodge], [took], [a]),
 ([Mark], [was], [the, only]),
 ([Mark], [joining], [the]),
 ([Cass], [had], [important]),
 ([Lodge], [was], [able]),
 ([Cass], [noted], [odd]),
 ([Leo], [owned], [own]),
 ([Joann], [entered], [the, back]),
 ([Joanna], [was], [sure]),
 ([Gary], [mimicked], [the]),
 ([Joanna], [resisted], [the]),
 ([Joanna], [handed], [the]),
 ([Joanna], [took], [the]),
 ([Cass], [was], [right]),
 ([Lodge], [stifled], [a]),
 ([Leo], [rolled], [a]),
 ([Cass], [shook], [the]),
 ([Cass], [indicated], [the]),
 ([Joanna], [was], [about]),
 ([Mort, Kemnon], [discovered], [an, evil]),
 ([Daphne], [studied], [the]),
 ([Gary], [raised], [both]),
 ([Gary], [looked], [mystified]),
 ([Daphne], [was], [certain]),
 ([Daphne], [took], [careful]),
 ([Leo], [moaned], [a]),
 ([Osric], [had],