In [1]:
# pip install -U spacy
# python -m spacy download es_core_news_sm

import spacy
from spacy.symbols import nsubj, VERB
from spacy import displacy


# Load English tokenizer, tagger, parser, NER and word vectors
nlp = spacy.load("en_core_web_sm")
    

In [2]:
# Process whole documents
#Some mentors in the course didn't conduct the online training and just give materials to read and answer questions7.
#The subjects were not easy and we face problems via time. Every mentor gave different assignments and it was overwhelming.

text = (u"Some mentors in the course didn't conduct the online training and just give materials to read and answer questions."
        "The subjects were not easy and we face problems via time. Every mentor gave different assignments and it was overwhelming.")

doc = nlp(text)


In [3]:
# Word decomposition
print([(w.text) for w in doc])


['Some', 'mentors', 'in', 'the', 'course', 'did', "n't", 'conduct', 'the', 'online', 'training', 'and', 'just', 'give', 'materials', 'to', 'read', 'and', 'answer', 'questions', '.', 'The', 'subjects', 'were', 'not', 'easy', 'and', 'we', 'face', 'problems', 'via', 'time', '.', 'Every', 'mentor', 'gave', 'different', 'assignments', 'and', 'it', 'was', 'overwhelming']


In [4]:
# Part of speech tagging
for token in doc:
    print(token.text, token.pos_, token.tag_, token.dep_,
            token.shape_, token.is_alpha, token.is_stop)


Some DET DT det Xxxx True True
mentors NOUN NNS nsubj xxxx True False
in ADP IN prep xx True True
the DET DT det xxx True True
course NOUN NN pobj xxxx True False
did AUX VBD aux xxx True True
n't PART RB neg x'x False True
conduct VERB VB ROOT xxxx True False
the DET DT det xxx True True
online ADJ JJ amod xxxx True False
training NOUN NN dobj xxxx True False
and CCONJ CC cc xxx True True
just ADV RB advmod xxxx True True
give VERB VB conj xxxx True True
materials NOUN NNS dobj xxxx True False
to PART TO aux xx True True
read VERB VB advcl xxxx True False
and CCONJ CC cc xxx True True
answer VERB VB conj xxxx True False
questions NOUN NNS dobj xxxx True False
. PUNCT . punct . False False
The DET DT det Xxx True True
subjects NOUN NNS nsubj xxxx True False
were AUX VBD ROOT xxxx True True
not PART RB neg xxx True True
easy ADJ JJ acomp xxxx True False
and CCONJ CC cc xxx True True
we PRON PRP nsubj xx True True
face VERB VBP conj xxxx True False
problems NOUN NNS dobj xxxx True False


text	unicode	Verbatim text content.

pos_	unicode	Coarse-grained part-of-speech from the Universal POS tag set.

tag_	unicode	Fine-grained part-of-speech.

dep_	unicode	Syntactic dependency relation.

is_alpha	bool	Does the token consist of alphabetic characters?

is_stop	bool	Is the token part of a “stop list”?

Universal POS tags:

    ADJ: adjective
    ADP: adposition
    ADV: adverb
    AUX: auxiliary verb
    CONJ: coordinating conjunction
    DET: determiner
    INTJ: interjection
    NOUN: noun
    NUM: numeral
    PART: particle
    PRON: pronoun
    PROPN: proper noun
    PUNCT: punctuation
    SCONJ: subordinating conjunction
    SYM: symbol
    VERB: verb
    X: other


In [5]:
# Find named entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

In [6]:
# Noun chunks
for chunk in doc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
            chunk.root.head.text)

Some mentors mentors nsubj conduct
the course course pobj in
the online training training dobj conduct
materials materials dobj give
questions questions dobj answer
The subjects subjects nsubj were
we we nsubj face
problems problems dobj face
time time pobj via
Every mentor mentor nsubj gave
different assignments assignments dobj gave
it it nsubj was


In [7]:
# Navigating the parse tree
for token in doc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
            [child for child in token.children])

Some det mentors NOUN []
mentors nsubj conduct VERB [Some, in]
in prep mentors NOUN [course]
the det course NOUN []
course pobj in ADP [the]
did aux conduct VERB []
n't neg conduct VERB []
conduct ROOT conduct VERB [mentors, did, n't, training, and, give, .]
the det training NOUN []
online amod training NOUN []
training dobj conduct VERB [the, online]
and cc conduct VERB []
just advmod give VERB []
give conj conduct VERB [just, materials, read]
materials dobj give VERB []
to aux read VERB []
read advcl give VERB [to, and, answer]
and cc read VERB []
answer conj read VERB [questions]
questions dobj answer VERB []
. punct conduct VERB []
The det subjects NOUN []
subjects nsubj were AUX [The]
were ROOT were AUX [subjects, not, easy, and, face]
not neg were AUX []
easy acomp were AUX []
and cc were AUX []
we nsubj face VERB []
face conj were AUX [we, problems, via, .]
problems dobj face VERB []
via prep face VERB [time]
time pobj via ADP []
. punct face VERB []
Every det mentor NOUN []
men

In [8]:
# Finding a verb with a subject from below — good
verbs = set()
for possible_subject in doc:
    if possible_subject.dep == nsubj and possible_subject.head.pos == VERB:
        verbs.add(possible_subject.head)
print(verbs)

{gave, face, conduct}


In [9]:
root = [token for token in doc if token.head == token][0]
subject = list(root.lefts)[0]
for descendant in subject.subtree:
    assert subject is descendant or subject.is_ancestor(descendant)
    print(descendant.text, descendant.dep_, descendant.n_lefts,
            descendant.n_rights,
            [ancestor.text for ancestor in descendant.ancestors])

Some det 0 0 ['mentors', 'conduct']
mentors nsubj 1 1 ['conduct']
in prep 0 1 ['mentors', 'conduct']
the det 0 0 ['course', 'in', 'mentors', 'conduct']
course pobj 1 0 ['in', 'mentors', 'conduct']


In [10]:
# Accessing entity annotations
ents = [(e.text, e.start_char, e.end_char, e.label_) for e in doc.ents]
print(ents)


[]


In [11]:
displacy.render(doc, style="ent")



In [12]:
displacy.render(doc, style="dep")

In [13]:
# Sentence Segmentation
for sent in doc.sents:
    print(sent.text)

Some mentors in the course didn't conduct the online training and just give materials to read and answer questions.
The subjects were not easy and we face problems via time.
Every mentor gave different assignments and it was overwhelming
