In [17]:
import spacy

# make sure to download the model first 
# use either the CLI or this line of code
# spacy.cli.download('en_core_web_sm')

# load pre-trained model pipeline
nlp = spacy.load('en_core_web_sm')

# text document
text = 'spaCy is pretty cool.'

# apply pipeline
doc = nlp(text)

✔ Download and installation successful
You can now load the model via spacy.load('en_core_web_sm')


In [33]:
from spacy import displacy
import spacy

# load pre-trained model pipeline
nlp = spacy.load('en_core_web_sm')

# text document
text = 'spaCy is pretty cool.'

# apply pipeline
doc = nlp(text)

# display parsed dependency tree
displacy.render(doc, style='dep', jupyter=True)

In [34]:
from pathlib import Path
from cairosvg import svg2png
from spacy import displacy

# display parsed dependency tree
svg = displacy.render(doc, style='dep', jupyter=False)

# save svg
fname = 'parsed_dependency_tree_simple'
output_path = Path('{}.svg'.format(fname))
output_path.open("w", encoding="utf-8").write(svg)

# save png
svg2png(bytestring=svg, write_to='{}.png'.format(fname))

In [14]:
from spacy import displacy
import spacy

# load pre-trained model pipeline
nlp = spacy.load('en_core_web_sm')

# sentence with entities
text = """In 2015, independent researchers from Emory University and Yahoo! Labs showed that spaCy offered the fastest syntactic parser in the world and that its accuracy was within 1% of the best available."""

# apply pipeline
doc = nlp(text)

displacy.render(doc, style='ent', jupyter=True)

In [47]:
import pandas as pd
import spacy
from spacy import displacy

# load pre-trained model pipeline
nlp = spacy.load('en_core_web_sm')

# sentence for grammar rules
text = """He does not eat meat, but he loves Beyond Burgers."""

# apply pipeline
doc = nlp(text)

# collect token meta into df
def collect_token_meta(
    doc: spacy.tokens.Doc
) -> pd.DataFrame:
    """Collect useful token meta data into a convenient dataframe"""
    
    # loop through tokens in doc
    token_meta = [
        {
            'text': t.text, 
            'lemma': t.lemma_, 
            'ent': t.ent_type_, 
            'pos': t.pos_, 
            'tag': t.tag_, 
            'dep': t.dep_,
            'ancestors': [tt.text for tt in t.ancestors],
            'children': [tt.text for tt in t.children],
            'subtree': [tt.text for tt in t.subtree],
            'lefts': [tt.text for tt in t.lefts],
            'rights': [tt.text for tt in t.rights]
        }
        for t in doc
    ]
    token_df = pd.DataFrame(token_meta)
    return token_df

display(collect_token_meta(doc))

Unnamed: 0,text,lemma,ent,pos,tag,dep,ancestors,children,subtree,lefts,rights
0,He,-PRON-,,PRON,PRP,nsubj,[eat],[],[He],[],[]
1,does,do,,AUX,VBZ,aux,[eat],[],[does],[],[]
2,not,not,,PART,RB,neg,[eat],[],[not],[],[]
3,eat,eat,,VERB,VB,ROOT,[],"[He, does, not, meat, ,, but, loves]","[He, does, not, eat, meat, ,, but, he, loves, ...","[He, does, not]","[meat, ,, but, loves]"
4,meat,meat,,NOUN,NN,dobj,[eat],[],[meat],[],[]
5,",",",",,PUNCT,",",punct,[eat],[],"[,]",[],[]
6,but,but,,CCONJ,CC,cc,[eat],[],[but],[],[]
7,he,-PRON-,,PRON,PRP,nsubj,"[loves, eat]",[],[he],[],[]
8,loves,love,,VERB,VBZ,conj,[eat],"[he, Beyond, .]","[he, loves, Beyond, Burgers, .]",[he],"[Beyond, .]"
9,Beyond,beyond,PERSON,ADP,IN,prep,"[loves, eat]",[Burgers],"[Beyond, Burgers]",[],[Burgers]


In [48]:
displacy.render(doc, style='dep', jupyter=True)

In [52]:
import typing as t
import pandas as pd
import spacy
from spacy import displacy

# load pre-trained model pipeline
nlp = spacy.load('en_core_web_sm')

# sentence for grammar rules
text = """He does not eat meat, but he loves Beyond Burgers."""

# apply pipeline
doc = nlp(text)

def get_parent_verb(
    token: spacy.tokens.Token
) -> t.Optional[t.Tuple[spacy.tokens.Token, int]]:
    """Get the parent verb of token, along with its polarity 
    (1 for positive, -1 for negative)"""
    
    # loop through ancestors
    polarity = 1
    for a in token.ancestors:
        if a.pos_ in {'VERB', 'ROOT'}:
            # found the first parent verb
            verb = a
            # check if it's negated
            for l in a.lefts:
                if l.dep_ == 'neg':
                    polarity = -1
            # return first parent verb found
            return (verb, polarity)
    return None


# hard code "Beyond" token
token = doc[9]

# get parent verb and polarity
get_parent_verb(token)

(loves, 1)

In [None]:
import typing as t
import pandas as pd
import spacy
from spacy import displacy

# load pre-trained model pipeline
nlp = spacy.load('en_core_web_sm')

# sentence for grammar rules
text = """He does not eat meat, but he loves Beyond Burgers."""

# apply pipeline
doc = nlp(text)

