## Lightning Tour

This is a tutorial notebook of the [Lightning Tour](https://spacy.io/docs/usage/lightning-tour) page.

The following examples code snippets give you an overview of spaCy's functionality and its usage.

### Load resources and process text

In [1]:
import spacy
# this is to make sure we get no unicode based errors
from __future__ import unicode_literals

en_nlp = spacy.load('en')
de_nlp = spacy.load('de')
en_doc = en_nlp(u'Hello, world. Here are two sentences.')
de_doc = de_nlp(u'ich bin ein Berliner.')

### Multi-threaded generator

In [2]:
texts = [u'One document.', u'...', u'Lots of documents']
# .pipe streams input, and produces streaming output
iter_texts = (texts[i % 3] for i in xrange(100000000))
for i, doc in enumerate(en_nlp.pipe(iter_texts, batch_size=50, n_threads=4)):
    assert doc.is_parsed
    if i == 100:
        break

### Get tokens and sentences

In [3]:
token = en_doc[0]
sentence = next(en_doc.sents)
assert token is sentence[0]
assert sentence.text == 'Hello, world.'

### Use integer IDs for any string

In [4]:
hello_id = en_nlp.vocab.strings['Hello']
hello_str = en_nlp.vocab.strings[hello_id]

assert token.orth  == hello_id  == 3125
assert token.orth_ == hello_str == 'Hello'

### Get and set string views and flags

In [5]:
assert token.shape_ == u"Xxxxx"
for lexeme in en_nlp.vocab:
    if lexeme.is_alpha:
        lexeme.shape_ = 'W'
    elif lexeme.is_digit:
        lexeme.shape_ = 'D'
    elif lexeme.is_punct:
        lexeme.shape_ = 'P'
    else:
        lexeme.shape_ = 'M'
assert token.shape_ == 'W'

### Export to numpy arrays

In [6]:
from spacy.attrs import ORTH, LIKE_URL, IS_OOV

attr_ids = [ORTH, LIKE_URL, IS_OOV]
doc_array = en_doc.to_array(attr_ids)
assert doc_array.shape == (len(en_doc), len(attr_ids))
assert en_doc[0].orth == doc_array[0, 0]
assert en_doc[1].orth == doc_array[1, 0]
assert en_doc[0].like_url == doc_array[0, 1]
assert list(doc_array[:, 1]) == [t.like_url for t in en_doc]

### Word vectors

In [7]:
doc = en_nlp(u"Apples and oranges are similar. Boots and hippos aren't.")

apples = doc[0]
oranges = doc[2]
boots = doc[6]
hippos = doc[8]

assert apples.similarity(oranges) > boots.similarity(hippos)

### Part-of-speech tags

In [8]:
from spacy.parts_of_speech import ADV

def is_adverb(token):
    return token.pos == spacy.parts_of_speech.ADV

# These are data-specific, so no constants are provided. You have to look
# up the IDs from the StringStore.
NNS = en_nlp.vocab.strings['NNS']
NNPS = en_nlp.vocab.strings['NNPS']
def is_plural_noun(token):
    return token.tag == NNS or token.tag == NNPS

def print_coarse_pos(token):
    print(token.pos_)

def print_fine_pos(token):
    print(token.tag_)