In [2]:
import spacy 
from spacy import displacy

nlp = spacy.load('en_core_web_lg')

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [3]:
doc = nlp('Apple is going to build a U.K. factory for $6 million.')

displacy.render(doc, style= 'dep', jupyter= True, options= {'distance':100})

In [4]:
for ent in doc:
    print(f'{ent.text:{10}} {ent.dep_:{10}} {spacy.explain(ent.dep_)}')

Apple      nsubj      nominal subject
is         aux        auxiliary
going      ROOT       root
to         aux        auxiliary
build      xcomp      open clausal complement
a          det        determiner
U.K.       compound   compound
factory    dobj       direct object
for        prep       prepositional modifier
$          quantmod   modifier of quantifier
6          compound   compound
million    pobj       object of preposition
.          punct      punctuation


In [5]:
# doc = nlp(u'This is a sentence.')
# displacy.serve(doc, style='dep')

In [6]:
text = """In ancient Rome, some neighbors live in three adjacent houses. In the center is the house of Senex, who lives there with wife Domina, son Hero, and several slaves, including head slave Hysterium and the musical's main character Pseudolus. A slave belonging to Hero, Pseudolus wishes to buy, win, or steal his freedom. One of the neighboring houses is owned by Marcus Lycus, who is a buyer and seller of beautiful women; the other belongs to the ancient Erronius, who is abroad searching for his long-lost children (stolen in infancy by pirates). One day, Senex and Domina go on a trip and leave Pseudolus in charge of Hero. Hero confides in Pseudolus that he is in love with the lovely Philia, one of the courtesans in the House of Lycus (albeit still a virgin)."""

doc = nlp(text)
sents = doc.sents

displacy.render(doc.sents, style='dep', jupyter=True, options={'distance':50})

In [7]:
doc1 = nlp("This is a sentence.")
doc2 = nlp("This is another sentence.")
displacy.render([doc1, doc2], style="dep", page=True)

In [8]:
doc = nlp("Rats are various medium-sized, long-tailed rodents.")
displacy.render(doc, style="dep", options={'distance': 100})

In [9]:
from IPython.display import HTML, display

html = displacy.render(nlp("I'm become death the destroyer of worlds."))

display(HTML(html))

<IPython.core.display.HTML object>

In [10]:
# saving visualization as svg(html) files
from pathlib import Path

sentences = ["I'd like to know more about marching squares", "Perhaps I'm the villian"]
for sent in sentences:
    doc = nlp(sent)
    svg = displacy.render(doc, style="dep", jupyter= False)
    file_name = '-'.join([w.text for w in doc if not w.is_punct]) + ".svg"
    output_path = Path(file_name)
    output_path.open("w", encoding="utf-8").write(svg)

In [17]:
doc = nlp("I'm become death the destroyer of worlds.")
displacy.render(doc, style="dep", options={"compact": True, "bg": "#09a3d5", 'distance': 100,"color": "white", "font": "Source Sans Pro"})

In [20]:
doc = nlp("The government is to increase GDP by 13% over the coming 5 years.")
displacy.render(doc, style='ent', jupyter=True)

In [49]:
text = 'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million. By contrast, my kids sold a lot of lemonade.'

In [50]:
doc2 = nlp(text)

for sent in doc2.sents:
    docx = nlp(sent.text)
    if docx.ents:
        displacy.render(docx, style='ent', jupyter=True)
    else:
        print(docx.text)

By contrast, my kids sold a lot of lemonade.


In [53]:
displacy.render(nlp(text), style='ent', jupyter=True)

In [56]:
import wikipedia as wiki

page = wiki.page("Egypt")
text = page.content

print(text)

Egypt (Arabic: مِصر, romanized: Miṣr, Egyptian Arabic pronunciation: [mæsˤr]), officially the Arab Republic of Egypt, is a transcontinental country spanning the northeast corner of Africa and southwest corner of Asia via a land bridge formed by the Sinai Peninsula. It is bordered by the Mediterranean Sea to the north, the Gaza Strip of Palestine and Israel to the northeast, the Red Sea to the east, Sudan to the south, and Libya to the west. The Gulf of Aqaba in the northeast separates Egypt from Jordan and Saudi Arabia. Cairo is the capital and largest city of Egypt, while Alexandria, the second-largest city, is an important industrial and tourist hub at the Mediterranean coast. At approximately 100 million inhabitants, Egypt is the 14th-most populated country in the world.
Egypt has one of the longest histories of any country, tracing its heritage along the Nile Delta back to the 6th–4th millennia BCE. Considered a cradle of civilisation, Ancient Egypt saw some of the earliest develop

In [109]:
doc = nlp(text)

In [113]:
displacy.render(doc[:300], style='ent', jupyter=True)

In [71]:
#all types of entities in spacy
for ent in nlp.get_pipe("ner").labels:
    print(f'{ent:{12}} {spacy.explain(ent)}')

CARDINAL     Numerals that do not fall under another type
DATE         Absolute or relative dates or periods
EVENT        Named hurricanes, battles, wars, sports events, etc.
FAC          Buildings, airports, highways, bridges, etc.
GPE          Countries, cities, states
LANGUAGE     Any named language
LAW          Named documents made into laws.
LOC          Non-GPE locations, mountain ranges, bodies of water
MONEY        Monetary values, including unit
NORP         Nationalities or religious or political groups
ORDINAL      "first", "second", etc.
ORG          Companies, agencies, institutions, etc.
PERCENT      Percentage, including "%"
PERSON       People, including fictional
PRODUCT      Objects, vehicles, foods, etc. (not services)
QUANTITY     Measurements, as of weight or distance
TIME         Times smaller than a day
WORK_OF_ART  Titles of books, songs, etc.


In [83]:
#to specify the entities 
options = {'ents':['NORP']}

displacy.render(doc[:300], style='ent', jupyter=True, options=options)

In [106]:
doc = nlp('In China over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.By contrast, my kids sold a lot of lemonade.')

In [107]:
colors = {'ORG': 'linear-gradient(90deg, #aa9cfc, #fc9ce7)', 'GPE': 'radial-gradient(yellow, green)'}

options = {'ents': ['ORG', 'GPE'], 'colors':colors}

displacy.render(doc, style='ent', jupyter=True, options=options)

In [114]:
doc = nlp('Elon Musk has done it again.')
doc.user_data['title'] = 'Elon Musk'
displacy.render(doc, style='ent', jupyter=True)

In [115]:
doc = nlp("But Google is starting from behind.")
displacy.render(doc, style='ent', jupyter=True)

In [121]:
ex = [{"text": "But Google is starting from behind.",
       "ents": [{"start": 4, "end": 10, "label": "ORG"}],
       "title": None}]
displacy.render(ex, style="ent", manual=True)