To install spacy, you can run the following command on the terminal.

`!sudo -H python3 -m pip install -U spacy`

You will also need to install one of the pre-trained models that spacy has. To do so, run the following command on the terminal.


`!sudo python3 -m spacy download en_core_web_sm`


If you have issues with the code, let me know.

In [None]:
import spacy

import pandas as pd


In [None]:
# Load the space library, instantiated for English
#note: the first time you run spaCy in a file it takes a little while to load up its modules
nlp = spacy.load('en_core_web_sm') 

In [None]:
s = nlp("This is my first sentence.")

##### Print tokens

In [None]:
for t in s:
    print(t)

In [None]:
s = nlp("This is a sentence with an exclamation mark!")
for t in s:
    print(t)

In [None]:
s = nlp("If you have a comma, what will you do?")
for t in s:
    print(t)

In [None]:
doc = nlp("And won't you split the apostrophe too?")
for t in s:
    print(t)

**Print sentences**


In [None]:
# Print the first 5 sentences (one sentence per line)
# The enumerate function is just used to add a counter
for i, sent in enumerate(doc.sents):
    print(i, "==>", sent)
    if i>5:
        break

In [None]:
# Let's look at the tokens
# All you have to do is iterate through the doc
# Each token is an object with lots of different properties
# A property with an underscore at the end returns the string representation
# while a property without the underscore returns an index (int) into spaCy's vocabulary
# The probability estimate is based on counts from a 3 billion word corpus
for i, token in enumerate(doc):
    print("original:", token.orth, token.orth_)
    print("lowercased:", token.lower, token.lower_)
    print("lemma:", token.lemma, token.lemma_)
    print("shape:", token.shape, token.shape_)
    print("prefix:", token.prefix, token.prefix_)
    print("suffix:", token.suffix, token.suffix_)
    print("part of speech:", token.pos_)
    print("log probability:", token.prob)
    print("Brown cluster id:", token.cluster)
    print("----------------------------------------")
    if i > 1:
        break

In [None]:
entities = set([ent.lemma_ for ent in doc.ents])
entities

In [None]:
chunks = [chunk.lemma_ for chunk in doc.noun_chunks if chunk.lemma_ not in entities]
chunks

In [None]:
training = pd.read_csv("data/training.txt", sep='\t', header=None, names=["text", "sentiment"])

In [None]:
training.head()

In [None]:
for i, r in training.iterrows():
    print(r[0], r[1])
    break

In [None]:

# Parse the text into sentences. For each sentence get the adjectives
for i, r in training.iterrows():
    doc = nlp(r[0])
    for j, s in enumerate(doc.sents):
        for t in s:
            if t.pos_ in ['ADJ', 'ADV']:
                print(j, t)
        print()
    break