In [3]:
import spacy
nlp = spacy.load("en_core_web_sm")
text = "Apple is looking at buying a startup in the UK for $1 billion."
doc = nlp(text)
print(doc)

Apple is looking at buying a startup in the UK for $1 billion.


In [7]:
print('tokens:')
for token in doc:
    print(token)

tokens:
Apple
is
looking
at
buying
a
startup
in
the
UK
for
$
1
billion
.


In [8]:
for token in doc:
    print(f"{token.text:<15} -> {token.pos_:10} ({token.tag_})")

Apple           -> PROPN      (NNP)
is              -> AUX        (VBZ)
looking         -> VERB       (VBG)
at              -> ADP        (IN)
buying          -> VERB       (VBG)
a               -> DET        (DT)
startup         -> NOUN       (NN)
in              -> ADP        (IN)
the             -> DET        (DT)
UK              -> PROPN      (NNP)
for             -> ADP        (IN)
$               -> SYM        ($)
1               -> NUM        (CD)
billion         -> NUM        (CD)
.               -> PUNCT      (.)


In [9]:
print("\n=== Named Entities ===")
for ent in doc.ents:
    print(f"{ent.text:<25} -> {ent.label_}")


=== Named Entities ===
Apple                     -> ORG
UK                        -> GPE
$1 billion                -> MONEY


In [10]:
print("\n=== Lemmatization (Base Forms) ===")
for token in doc:
    print(f"{token.text:<15} -> {token.lemma_}")


=== Lemmatization (Base Forms) ===
Apple           -> Apple
is              -> be
looking         -> look
at              -> at
buying          -> buy
a               -> a
startup         -> startup
in              -> in
the             -> the
UK              -> UK
for             -> for
$               -> $
1               -> 1
billion         -> billion
.               -> .


In [12]:
print("\n=== Stopword Removal ===")
filtered_tokens = []
for token in doc:
    if not token.is_stop and token.is_alpha:
        filtered_tokens.append(token.text)
print(filtered_tokens)


=== Stopword Removal ===
['Apple', 'looking', 'buying', 'startup', 'UK', 'billion']
