In [1]:
## Data in - common-data/Example-TDBank-PersonalAcctAgree.txt
# Data fragments in multiple languages are:
text1 = "This is an important document. It contains the contract governing \
your deposit relationship with the Bank and required legal \
disclosures. Please have it translated. "

text2 = "Este es un documento importante. Contiene el contrato que \
rige su relación de depósitos con el Banco y declaraciones \
de información exigidas por ley. Por favor, mande a hacer la \
traducción de este documento. "

text3 = "Ce document est important. Il contient le contrat régissant vos \
rapports avec la Banque en votre qualité de déposant ainsi que \
les informations exigées par la loi. Veuillez le faire traduire. "

text4 = "Este documento é importante. Contém o contrato que governa \
a sua relação para depósitos com o banco e as declarações \
requeridas por lei.  Por favor mande traduzir. "

In [2]:
# Do imports
import spacy
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(text1 + text2 + text3 + text4)

In [4]:
# Print tokens and lemmas
print ([(token.text, token.lemma_) for token in doc])

[('This', 'this'), ('is', 'be'), ('an', 'an'), ('important', 'important'), ('document', 'document'), ('.', '.'), ('It', '-PRON-'), ('contains', 'contain'), ('the', 'the'), ('contract', 'contract'), ('governing', 'govern'), ('your', '-PRON-'), ('deposit', 'deposit'), ('relationship', 'relationship'), ('with', 'with'), ('the', 'the'), ('Bank', 'Bank'), ('and', 'and'), ('required', 'require'), ('legal', 'legal'), ('disclosures', 'disclosure'), ('.', '.'), ('Please', 'please'), ('have', 'have'), ('it', '-PRON-'), ('translated', 'translate'), ('.', '.'), ('Este', 'Este'), ('es', 'es'), ('un', 'un'), ('documento', 'documento'), ('importante', 'importante'), ('.', '.'), ('Contiene', 'Contiene'), ('el', 'el'), ('contrato', 'contrato'), ('que', 'que'), ('rige', 'rige'), ('su', 'su'), ('relación', 'relación'), ('de', 'de'), ('depósitos', 'depósitos'), ('con', 'con'), ('el', 'el'), ('Banco', 'Banco'), ('y', 'y'), ('declaraciones', 'declaracione'), ('de', 'de'), ('información', 'información'), ('e

In [5]:
# Parts of speech
for token in doc:
    print (token, token.tag_, token.pos_, spacy.explain(token.tag_))

This DT DET determiner
is VBZ AUX verb, 3rd person singular present
an DT DET determiner
important JJ ADJ adjective
document NN NOUN noun, singular or mass
. . PUNCT punctuation mark, sentence closer
It PRP PRON pronoun, personal
contains VBZ VERB verb, 3rd person singular present
the DT DET determiner
contract NN NOUN noun, singular or mass
governing VBG VERB verb, gerund or present participle
your PRP$ DET pronoun, possessive
deposit NN NOUN noun, singular or mass
relationship NN NOUN noun, singular or mass
with IN ADP conjunction, subordinating or preposition
the DT DET determiner
Bank NNP PROPN noun, proper singular
and CC CCONJ conjunction, coordinating
required VBD VERB verb, past tense
legal JJ ADJ adjective
disclosures NNS NOUN noun, plural
. . PUNCT punctuation mark, sentence closer
Please UH INTJ interjection
have VB AUX verb, base form
it PRP PRON pronoun, personal
translated VBN VERB verb, past participle
. . PUNCT punctuation mark, sentence closer
Este NNP PROPN noun, prop

In [6]:
# Chunking to get nouns
for chunk in doc.noun_chunks:
    print (chunk)

an important document
It
the contract
your deposit relationship
the Bank
legal disclosures
it
Este
un documento importante
el Banco
y
por ley
Por
mande
la traducción de este documento
Ce document
est
contient le contrat régissant vos rapports
la Banque
votre
par
la loi
Veuillez le faire traduire
Este documento é importante
Contém o contrato que governa
a sua relação
para depósitos com
o banco
e
declarações requeridas por lei
Por
mande traduzir


In [7]:
# Do import
import textacy

In [8]:
# Make internal obj
new_doc = textacy.make_spacy_doc(text1 + text2 + text3 + text4,  lang='en_core_web_sm')

In [9]:
# Parse for verb specification
pattern = r'(<VERB>?<ADV>*<VERB>+)'
verb_phrases = textacy.extract.pos_regex_matches(new_doc, pattern)

In [10]:
for chunk in verb_phrases:
    print(chunk.text)

contains
governing
required
translated
por
favor
contrat
exigées
Este
favor


  action="once",


In [11]:
# Dependency graphs in different languages
# Show dependency graph
from spacy import displacy


In [12]:
doc = nlp(text1)
displacy.render(doc, style='dep')

In [13]:
doc = nlp(text2)
displacy.render(doc, style='dep')