According to spacy documentation we can run inbuilt pipelines for tokenization 

In [1]:
import spacy

In [3]:
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
     ---------------------------------------- 0.0/12.8 MB ? eta -:--:--
     ------------------------------- ------- 10.5/12.8 MB 59.4 MB/s eta 0:00:01
     --------------------------------------  12.6/12.8 MB 32.9 MB/s eta 0:00:01
     ---------------------------------------- 12.8/12.8 MB 32.1 MB/s  0:00:00
Installing collected packages: en-core-web-sm
Successfully installed en-core-web-sm-3.8.0
[38;5;2mâœ” Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
nlp = spacy.load("en_core_web_sm") # please read through the documentation 
doc = nlp("This is Mr.Dev's Notebook")

In [9]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x2aa7b194050>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x2aa7b1941d0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x2aa7ae220a0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x2aa7bfa4c10>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x2aa7d1b5690>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x2aa7ae21a10>)]

In [None]:
nlp.pipe_names # shows all the components of the pipeline

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [None]:
for token in doc: 
    print(token, "|",token.pos_,"|",token.lemma_) # pos means part of speech and lemma is for lemmatization 

This | PRON | this
is | AUX | be
Mr. | PROPN | Mr.
Dev | PROPN | Dev
's | PART | 's
Notebook | NOUN | notebook


In [None]:
# use of NER 

doc = nlp("Tesla is going to acquire twitter for $45 billion")

for ent in doc.ents:
    print(ent.text, "|",ent.label_, "|", spacy.explain(ent.label_))  # explains the meaning of each token

Tesla | ORG | Companies, agencies, institutions, etc.
$45 billion | MONEY | Monetary values, including unit


In [14]:
# note that these functionalities such as Ner or Pos are not present in the blank pipeline
# but you can import only necessary functions from the prebuilt pipelines into your blank pipeline as done below 

source_nlp = spacy.load("en_core_web_sm")

nlp = spacy.blank("en")

nlp.add_pipe("ner", source=source_nlp) # adding the NER functionality from a prebuilt function 

nlp.pipe_names

['ner']