In [3]:
import spacy
from spacy.matcher import PhraseMatcher

nlp = spacy.load("en_core_web_md")
matcher = PhraseMatcher(nlp.vocab)
terms = ["Barack Obama", "Angela Merkel", "Washington, D.C."]
# Only run nlp.make_doc to speed things up
patterns = [nlp.make_doc(text) for text in terms]
matcher.add("TerminologyList", patterns)

doc = nlp("German Chancellor Angela Merkel and US President Barack Obama "
          "converse in the Oval Office inside the White House in Washington, D.C.")
matches = matcher(doc)
for match_id, start, end in matches:
    span = doc[start:end]
    print(span.text)

Angela Merkel
Barack Obama
Washington, D.C.


In [4]:
doc = nlp("Autonomous cars shift insurance liability toward manufacturers")
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
            chunk.root.head.text)

Autonomous cars cars nsubj shift
insurance liability liability dobj shift
manufacturers manufacturers pobj toward


In [5]:
doc = nlp("Autonomous cars shift insurance liability toward manufacturers")
for token in doc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
            [child for child in token.children])

Autonomous amod cars NOUN []
cars nsubj shift VERB [Autonomous]
shift ROOT shift VERB [cars, liability]
insurance compound liability NOUN []
liability dobj shift VERB [insurance, toward]
toward prep liability NOUN [manufacturers]
manufacturers pobj toward ADP []


In [8]:
# Finding a verb with a subject from below — good
from spacy.symbols import nsubj, VERB
verbs = set()
for possible_subject in doc:
    if possible_subject.dep == nsubj and possible_subject.head.pos == VERB:
        verbs.add(possible_subject.head)
print(verbs)

{shift}


In [9]:
doc = nlp("Credit and mortgage account holders must submit their requests")

root = [token for token in doc if token.head == token][0]
subject = list(root.lefts)[0]
for descendant in subject.subtree:
    assert subject is descendant or subject.is_ancestor(descendant)
    print(descendant.text, descendant.dep_, descendant.n_lefts,
            descendant.n_rights,
            [ancestor.text for ancestor in descendant.ancestors])

Credit nmod 0 2 ['account', 'holders', 'submit']
and cc 0 0 ['Credit', 'account', 'holders', 'submit']
mortgage conj 0 0 ['Credit', 'account', 'holders', 'submit']
account compound 1 0 ['holders', 'submit']
holders nsubj 1 0 ['submit']


In [112]:
doc = nlp("tomorrow now later Apple is looking at buying U.K. startup for $1 billion")

for token in doc:
    print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
            token.shape_, token.is_alpha, token.is_stop)

tomorrow tomorrow NOUN NN npadvmod xxxx True False
now now ADV RB advmod xxx True True
later later ADV RBR advmod xxxx True False
Apple Apple PROPN NNP nsubj Xxxxx True False
is be AUX VBZ aux xx True True
looking look VERB VBG ROOT xxxx True False
at at ADP IN prep xx True True
buying buy VERB VBG pcomp xxxx True False
U.K. U.K. PROPN NNP compound X.X. False False
startup startup NOUN NN dobj xxxx True False
for for ADP IN prep xxx True True
$ $ SYM $ quantmod $ False False
1 1 NUM CD compound d False False
billion billion NUM CD pobj xxxx True False


## Forturn Cookie Genre from Frankenstein - Week 6 Homework

Now go to it! It's read to be pick.
How about another forturn?
Lucky number: 45, 39, 46, 14,10, 5

Issues:

1. Can't make Tracery to include the lucky number
2. Lucky number in list. Is there an easy way to display as string seperated with ","
3. Can't find proper VERB. Has to use VBG



In [4]:
docf = nlp(open("./pg84.txt",encoding='utf8').read())


In [116]:
sentences = list(docf.sents)

In [117]:
type(sentences);

In [5]:
numbers=[int(item.text) for item in list(docf) if item.is_digit]

In [6]:
sorted(numbers)

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 3,
 3,
 3,
 3,
 3,
 3,
 3,
 4,
 4,
 4,
 4,
 4,
 4,
 4,
 5,
 5,
 5,
 6,
 6,
 7,
 7,
 8,
 8,
 9,
 9,
 10,
 10,
 11,
 11,
 12,
 12,
 13,
 13,
 14,
 14,
 15,
 15,
 16,
 16,
 17,
 17,
 17,
 18,
 18,
 19,
 19,
 20,
 20,
 20,
 21,
 21,
 22,
 22,
 23,
 23,
 24,
 24,
 30,
 50,
 60,
 64,
 84,
 90,
 90,
 596,
 801,
 809,
 1500,
 1887,
 1993,
 2001,
 2022,
 84116,
 6221541]

In [7]:
some = []
for item in numbers:
    if item not in some:
        some.append(item)

In [122]:
some

[1,
 1993,
 84,
 2,
 2022,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 60,
 30,
 90,
 2001,
 64,
 6221541,
 809,
 1500,
 84116,
 801,
 596,
 1887,
 50]

In [9]:
lucky = []
for item in numbers:
    if item not in lucky and item <=60:
        lucky.append(item)

In [124]:
import random
print("Luck Numbers: " + str(sorted(random.sample(lucky,6))))

Luck Numbers: [2, 4, 15, 17, 21, 60]


In [10]:
time = [item.text for item in list(docf) if item.pos_=='ADV']

In [140]:
time

[anywhere,
 almost,
 whatsoever,
 away,
 Most,
 recently,
 here,
 already,
 far,
 north,
 more,
 ever,
 There,
 ever,
 just,
 There,
 undoubtedly,
 only,
 ever,
 never,
 never,
 so,
 only,
 so,
 much,
 passionately,
 also,
 also,
 well,
 heavily,
 just,
 even,
 now,
 voluntarily,
 often,
 harder,
 greatest,
 Twice,
 actually,
 so,
 now,
 often,
 only,
 sometimes,
 most,
 quickly,
 far,
 more,
 already,
 actually,
 there,
 easily,
 as,
 perhaps,
 again,
 soon,
 never,
 again,
 again,
 slowly,
 here,
 already,
 certainly,
 never,
 yet,
 now,
 most,
 there,
 bitterly,
 yet,
 as,
 well,
 too,
 too,
 still,
 only,
 most,
 Now,
 more,
 more,
 more,
 greatly,
 enough,
 enough,
 Well,
 certainly,
 even,
 here,
 Yet,
 even,
 madly,
 rather,
 more,
 characteristically,
 first,
 easily,
 well,
 very,
 so,
 never,
 equally,
 peculiarly,
 first,
 rather,
 briefly,
 ago,
 once,
 never,
 instantly,
 already,
 together,
 then,
 decidedly,
 so,
 then,
 wholly,
 as,
 more,
 otherwise,
 never,
 as,
 only

In [11]:
noun = [item.text for item in list(docf) if item.pos_=='PROPN']

In [130]:
noun

[Project,
 Gutenberg,
 eBook,
 Frankenstein,
 Modern,
 Prometheus,
 United,
 States,
 Project,
 Gutenberg,
 License,
 United,
 States,
 eBook,
 Frankenstein,
 Modern,
 Prometheus,
 Mary,
 Wollstonecraft,
 Shelley,
 Release,
 October,
 December,
 English,
 Judith,
 Boss,
 Christy,
 Phillips,
 Lynn,
 Hanninen,
 David,
 Meltzer,
 Al,
 Haines,
 Menno,
 de,
 Leeuw,
 GUTENBERG,
 EBOOK,
 FRANKENSTEIN,
 PROMETHEUS,
 Frankenstein,
 Modern,
 Prometheus,
 Mary,
 Wollstonecraft,
 Godwin,
 Shelley,
 CONTENTS,
 Letter,
 Letter,
 Letter,
 Letter,
 Letter,
 Mrs.,
 Saville,
 England,
 _,
 St.,
 Petersburgh,
 Dec.,
 11th,
 London,
 Petersburgh,
 Margaret,
 heaven,
 North,
 Pacific,
 Ocean,
 Uncle,
 Thomas,
 heaven,
 Homer,
 Shakespeare,
 North,
 Sea,
 Greenland,
 Margaret,
 Russia,
 St.,
 Petersburgh,
 Archangel,
 June,
 Farewell,
 Margaret,
 Heaven,
 R.,
 Walton,
 Letter,
 Mrs.,
 Saville,
 England,
 Archangel,
 March,
 Margaret,
 Uncle,
 Thomas,
 Archangel,
 Englishman,
 Turk,
 Ancient,
 Mariner,
 Afri

In [12]:
verb = [item.text for item in list(docf) if item.tag_=='VBG']

In [135]:
verb

[using,
 increasing,
 advancing,
 skirting,
 diffusing,
 preceding,
 sailing,
 surpassing,
 supposing,
 discovering,
 ascertaining,
 arriving,
 learning,
 dying,
 inuring,
 failing,
 travelling,
 walking,
 remaining,
 freezing,
 paying,
 collecting,
 glowing,
 becoming,
 keeping,
 finding,
 being,
 having,
 throwing,
 confessing,
 being,
 thinking,
 according,
 trembling,
 preparing,
 going,
 having,
 floating,
 indicating,
 advancing,
 renovating,
 tracing,
 being,
 swelling,
 recording,
 leaving,
 hoping,
 being,
 fearing,
 talking,
 being,
 persuading,
 perceiving,
 hearing,
 hearing,
 rubbing,
 forcing,
 drawing,
 concerning,
 decaying,
 conciliating,
 being,
 concerning,
 feeling,
 watching,
 burning,
 Having,
 being,
 quelling,
 concerning,
 finding,
 respecting,
 elevating,
 concerning,
 failing,
 subduing,
 sting,
 pursuing,
 exposing,
 perceiving,
 marrying,
 relating,
 Having,
 endeavouring,
 persuading,
 rankling,
 decreasing,
 attending,
 leaving,
 weeping,
 protecting,
 di

In [18]:
import tracery
from tracery.modifiers import base_english

In [13]:
print(random.choice(time) +" "+ random.choice(verb) + " "+random.choice(noun))

Even including Elizabeth


In [35]:
numberchain = str(sorted(random.sample(lucky,6)));
rules ={
    "origin":"#interj.capitalize# \n #luck# #chain#",
    "interj": (random.choice(time) +" "+ random.choice(verb) + " "+random.choice(noun)),
    "luck": "Luck Numbers: ",
    "chain": str(sorted(random.sample(lucky,6)))
}
grammer = tracery.Grammar(rules)
grammer.add_modifiers(base_english)
print(grammer.flatten("#origin#"))

Sometimes rambling Gutenberg 
 Luck Numbers:  


In [34]:
numberchain

'[7, 13, 15, 16, 30, 50]'

In [38]:
import random
for s in range(10):
    print(random.choice(time).capitalize() +" "+ random.choice(verb) + " "+random.choice(noun)+"!")
    print("Luck Numbers: " + str(sorted(random.sample(lucky,6))))
    print("\n")

Furiously being Gutenberg!
Luck Numbers: [14, 15, 17, 21, 22, 30]


Before approaching Safie!
Luck Numbers: [3, 10, 14, 21, 22, 24]


More climbing Kirwin!
Luck Numbers: [2, 5, 9, 13, 16, 21]


Rather having England!
Luck Numbers: [1, 3, 7, 10, 19, 20]


Here making England!
Luck Numbers: [1, 3, 4, 5, 7, 15]


Alike having Cumberland!
Luck Numbers: [2, 4, 10, 19, 20, 60]


Never being Elizabeth!
Luck Numbers: [2, 3, 8, 11, 18, 23]


Also bidding States!
Luck Numbers: [5, 10, 11, 14, 30, 60]


Therefore reading Project!
Luck Numbers: [3, 6, 8, 9, 18, 22]


Only going Literary!
Luck Numbers: [2, 7, 10, 11, 20, 21]


