In [1]:
import spacy #conda install spacy
             #python -m spacy download en_core_web_sm

nlp = spacy.load("en_core_web_sm")

text = r"Janet Yellen said the Federal Reserve Board wouldn't raise the federal funds rate by 1/4 percent this period."

doc = nlp(text)

## LANGUAGE!

https://spacy.io/usage/models#languages

NOUN: A word referring to a person, place or thing
   
PROPER NOUN: The name referring to a noun

VERB: A word describing an action or state

ADJECTIVE: A word that modifies or describes a noun

DETERMINER: Words that modify nouns to reference the context (e.g. articles like the, a, an; quantifiers)


## SENTENCES

In [None]:
sents = list(doc.sents)
print(sents[0].string)

In [None]:
list(doc.sents)

## TOKENS

In [None]:
print(list(doc))

token = doc[8]
print(token.lemma_) #lemma

In [None]:
print(list(token.ancestors)) #ancestors

In [None]:
token = doc[9]
print(token)

In [None]:
print(list(token.ancestors))

In [None]:
print(list(token.children))

In [None]:
token = doc[2]
print(token)

In [None]:
list(token.children)

## STOP WORDS

In [None]:
token.is_stop

In [None]:
stopwords = spacy.lang.en.stop_words.STOP_WORDS
print(stopwords)

In [None]:
print([(t, t.is_stop) for t in doc])

In [None]:
[t for t in doc if t.is_stop == False]

## ENTITIES

In [None]:
entities = [(entity.label_, entity.text) for entity in doc.ents]
print(entities)

## DEPENDENCY PARSING

In [3]:
#https://stackoverflow.com/questions/40288323/what-do-spacys-part-of-speech-and-dependency-tags-mean
for t in doc:
    print(t, t.pos_)

Janet PROPN
Yellen PROPN
said VERB
the DET
Federal PROPN
Reserve PROPN
Board PROPN
would VERB
n't ADV
raise VERB
the DET
federal ADJ
funds NOUN
rate NOUN
by ADP
1/4 NUM
percent NOUN
this DET
period NOUN
. PUNCT


In [41]:
spacy.displacy.render(doc, style='dep', options={'distance' : 140}, jupyter=True)

## What direction will the rate go?

In [2]:
doc

Janet Yellen said the Federal Reserve Board wouldn't raise the federal funds rate by 1/4 percent this period.

In [39]:
token = doc[9]

In [40]:
list(token.ancestors)

[said]

In [7]:
[t for t in doc if t.pos_ == 'NOUN']

[funds, rate, percent, period]

In [17]:
rate_mentions = [t for t in doc if t.text == 'rate']
rate_mentions

[rate]

In [18]:
rate_ancestors = [list(r.ancestors) for r in rate_mentions]
rate_ancestors

[[raise, said]]

In [24]:
rate_up = ['raise', 'increase', 'up']
rate_down = ['lower', 'decrease', 'down']
rate_unchanged = ['unchanged', 'same']

up_counter = 0
down_counter = 0
flat_counter = 0

In [25]:
for ra in rate_ancestors:
    for ancestor in ra:
        if ancestor.text in rate_up:
            up_counter += 1
        elif ancestor.text in rate_down:
            down_counter += 1
        elif ancestor.text in rate_unchanged:
            flat_counter += 1

In [26]:
print('Up:', up_counter, '\nUnchanged:', flat_counter, '\nDown:', down_counter)

Up: 1 
Unchanged: 0 
Down: 0


## What direction will the rate go? v2!

In [44]:
list(doc.noun_chunks)

[Janet Yellen, the Federal Reserve Board, the federal funds rate, 1/4 percent]

In [45]:
nchunks = [nc for nc in doc.noun_chunks if 'rate' in nc.string]
nchunks

[the federal funds rate]

In [46]:
kinds_of_rates = {'unemployment': [nc for nc in doc.noun_chunks if 'rate' in nc.string and 'unemployment' in nc.string],
                  'fed funds':    [nc for nc in doc.noun_chunks if 'rate' in nc.string and 'federal' in nc.string],
                  'discount':     [nc for nc in doc.noun_chunks if 'rate' in nc.string and 'discount' in nc.string]}

In [47]:
kinds_of_rates

{'unemployment': [], 'fed funds': [the federal funds rate], 'discount': []}

In [48]:
sp = kinds_of_rates['fed funds'][0]

In [49]:
sp.end

14

In [50]:
print(sp.start, sp.end)

10 14


In [52]:
[(d, i) for d, i in enumerate(doc)]

[(0, Janet),
 (1, Yellen),
 (2, said),
 (3, the),
 (4, Federal),
 (5, Reserve),
 (6, Board),
 (7, would),
 (8, n't),
 (9, raise),
 (10, the),
 (11, federal),
 (12, funds),
 (13, rate),
 (14, by),
 (15, 1/4),
 (16, percent),
 (17, this),
 (18, period),
 (19, .)]

In [51]:
doc[sp.start:sp.end]

the federal funds rate

In [53]:
def span_to_index(sp, doc):
    start = sp.start
    end = sp.end
    return doc[start:end]

In [54]:
kinds_of_rates = {'unemployment': [span_to_index(nc, doc) for nc in doc.noun_chunks if 'rate' in nc.string and 'unemployment' in nc.string],
                  'fed funds':    [span_to_index(nc, doc) for nc in doc.noun_chunks if 'rate' in nc.string and 'federal' in nc.string],
                  'discount':     [span_to_index(nc, doc) for nc in doc.noun_chunks if 'rate' in nc.string and 'discount' in nc.string]}

In [55]:
kinds_of_rates

{'unemployment': [], 'fed funds': [the federal funds rate], 'discount': []}

In [56]:
#[item for sublist in l for item in sublist]
#https://stackoverflow.com/questions/952914/how-to-make-a-flat-list-out-of-list-of-lists
rate_mentions = [t for nc in kinds_of_rates['fed funds'] for t in nc if t.text == 'rate']
rate_mentions

[rate]

In [57]:
rate_ancestors = [list(r.ancestors) for r in rate_mentions]
rate_ancestors

[[raise, said]]

In [58]:
rate_up = ['raise', 'increase', 'up']
rate_down = ['lower', 'decrease', 'down']
rate_unchanged = ['unchanged', 'same']

up_counter = 0
down_counter = 0
flat_counter = 0

In [59]:
for ra in rate_ancestors:
    for ancestor in ra:
        if ancestor.text in rate_up:
            up_counter += 1
        elif ancestor.text in rate_down:
            down_counter += 1
        elif ancestor.text in rate_unchanged:
            flat_counter += 1

In [60]:
print('Up:', up_counter, '\nUnchanged:', flat_counter, '\nDown:', down_counter)

Up: 1 
Unchanged: 0 
Down: 0
