### Installation and some basic examples
Below are a few simple examples to show spaCy's capabilities such as tokenization, visualization, etc. The aim of this section is to show what is possible overall. More advanced examples will follow.

In [None]:
import sys

!{sys.executable} -m pip install spacy
!{sys.executable} -m spacy download en_core_web_sm


Defaulting to user installation because normal site-packages is not writeable
Collecting spacy
  Downloading spacy-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (29.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m29.2/29.2 MB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting murmurhash<1.1.0,>=0.28.0
  Downloading murmurhash-1.0.12-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (124 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.3/124.3 KB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting preshed<3.1.0,>=3.0.2
  Downloading preshed-3.0.9-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (156 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.9/156.9 KB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
Collecting cymem<2.1.0,>=2.0.2
  Downloading cymem-2.0.11-cp310-cp310-many

In [None]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

# Tokenization
print("Tokens:")
for token in doc:
    print(token.text)

# Part-of-Speech Tagging
print("\nPart-of-Speech Tags:")
for token in doc:
    print(f"{token.text}: {token.pos_}")

# Named Entity Recognition
print("\nNamed Entities:")
for ent in doc.ents:
    print(f"{ent.text}: {ent.label_}")


Tokens:
Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion

Part-of-Speech Tags:
Apple: PROPN
is: AUX
looking: VERB
at: ADP
buying: VERB
U.K.: PROPN
startup: VERB
for: ADP
$: SYM
1: NUM
billion: NUM

Named Entities:
Apple: ORG
U.K.: GPE
$1 billion: MONEY


In [None]:
from spacy import displacy

doc = nlp("Apple's CEO, Tim Cook, announced the new iPhone 12 at the October event in Cupertino, aiming to boost sales in the upcoming holiday season.")

print("Dependency Parsing:")
for token in doc:
    print(f"{token.text} -> {token.dep_} -> {token.head.text}")

print("\nNamed Entities:")
for ent in doc.ents:
    print(f"{ent.text} ({ent.label_})")


Dependency Parsing:
Apple -> poss -> CEO
's -> case -> Apple
CEO -> nsubj -> announced
, -> punct -> CEO
Tim -> compound -> Cook
Cook -> appos -> CEO
, -> punct -> CEO
announced -> ROOT -> announced
the -> det -> iPhone
new -> amod -> iPhone
iPhone -> dobj -> announced
12 -> nummod -> iPhone
at -> prep -> announced
the -> det -> event
October -> compound -> event
event -> pobj -> at
in -> prep -> event
Cupertino -> pobj -> in
, -> punct -> announced
aiming -> advcl -> announced
to -> aux -> boost
boost -> xcomp -> aiming
sales -> dobj -> boost
in -> prep -> boost
the -> det -> season
upcoming -> amod -> season
holiday -> compound -> season
season -> pobj -> in
. -> punct -> announced

Named Entities:
Apple (ORG)
Tim Cook (PERSON)
iPhone (ORG)
12 (CARDINAL)
October (DATE)
Cupertino (GPE)
the upcoming holiday season (DATE)


In [None]:
displacy.render(doc, style="dep", jupyter=True, options={"compact": True, "bg": "#000000", "color": "white", "font": "Source Sans Pro"})
displacy.render(doc, style="ent", jupyter=True, options={"colors": {"ORG": "linear-gradient(90deg, #aa9cfc, #fc9ce7)"}})


In [None]:
from spacy.tokens import Doc
from spacy.tokens import Doc
from spacy.language import Language

# Define a simple sentiment analysis function
def simple_sentiment(doc: Doc) -> Doc:
    positive_words = {'good', 'great', 'excellent', 'amazing', 'happy'}
    negative_words = {'bad', 'terrible', 'poor', 'sad', 'horrible'}
    
    sentiment_score = 0
    for token in doc:
        if token.text.lower() in positive_words:
            sentiment_score += 1
        elif token.text.lower() in negative_words:
            sentiment_score -= 1
    doc._.sentiment_score = sentiment_score
    return doc

@Language.component("simple_sentiment")
def simple_sentiment_component(doc):
    return simple_sentiment(doc)

nlp = spacy.load("en_core_web_sm")
nlp.add_pipe("simple_sentiment", last=True)

if not Doc.has_extension("sentiment_score"):
    Doc.set_extension("sentiment_score", default=0)

# The positive and negative words cancel each other out, resulting in a sentiment score of 0:
doc = nlp("I had a great day, but the weather was terrible.")
print(f"Sentiment Score: {doc._.sentiment_score}")

doc = nlp("I had a terrible day, but the weather was terrible.")
print(f"Sentiment Score: {doc._.sentiment_score}")

# Here is an example of a positive sentiment despite the presence of a negative word:
doc = nlp("I had a great day, however I felt dissatisfied with life.")
print(f"Sentiment Score: {doc._.sentiment_score}")

Sentiment Score: 0
Sentiment Score: -2
Sentiment Score: 1
