## Spacy Pipelines

In [None]:
import spacy

text = "This is sentence one. This is sentence two. This is sentence three. This continues for thousands of sentences."

# Creating blank English model
nlp = spacy.blank("en")

# Adding component to blank model
nlp.add_pipe("sentencizer")

doc = nlp(text)
sentences = [sent for sent in doc.sents]
sentences
    # [This is sentence one., This is sentence two., This is sentence three., This continues for thousands of sentences.]


nlp.analyze_pipes(pretty=True)

"""
============================= Pipeline Overview =============================

#   Component     Assigns               Requires   Scores    Retokenizes
-   -----------   -------------------   --------   -------   -----------
0   sentencizer   token.is_sent_start              sents_f   False      
                  doc.sents                        sents_p              
                                                   sents_r              

✔ No problems found.
"""

"""
{
'summary': {
    'sentencizer': {
        'assigns': ['token.is_sent_start', 'doc.sents'],
        'requires': [],
        'scores': ['sents_f', 'sents_p', 'sents_r'],
        'retokenizes': False
        }
    },
    'problems': {
        'sentencizer': []
    },
    'attrs': {
        'token.is_sent_start': {
            'assigns': ['sentencizer'], 
            'requires': []
        },
        'doc.sents': {
            'assigns': ['sentencizer'], 'requires': []
        }
    }
}
"""

[1m

#   Component     Assigns               Requires   Scores    Retokenizes
-   -----------   -------------------   --------   -------   -----------
0   sentencizer   token.is_sent_start              sents_f   False      
                  doc.sents                        sents_p              
                                                   sents_r              

[38;5;2m✔ No problems found.[0m


{'summary': {'sentencizer': {'assigns': ['token.is_sent_start', 'doc.sents'],
   'requires': [],
   'scores': ['sents_f', 'sents_p', 'sents_r'],
   'retokenizes': False}},
 'problems': {'sentencizer': []},
 'attrs': {'token.is_sent_start': {'assigns': ['sentencizer'], 'requires': []},
  'doc.sents': {'assigns': ['sentencizer'], 'requires': []}}}

In [None]:
# NER focused pipeline example
import spacy

nlp = spacy.blank("en")
nlp.add_pipe("sentencizer")
nlp.add_pipe("ner")

# Analyzing pipeline components
analysis = nlp.analyze_pipes(pretty = True)
analysis

"""
============================= Pipeline Overview =============================

#   Component     Assigns               Requires   Scores          Retokenizes
-   -----------   -------------------   --------   -------------   -----------
0   sentencizer   token.is_sent_start              sents_f         False      
                  doc.sents                        sents_p                    
                                                   sents_r                    
                                                                              
1   ner           doc.ents                         ents_f          False      
                  token.ent_iob                    ents_p                     
                  token.ent_type                   ents_r                     
                                                   ents_per_type              

✔ No problems found.

"""



[1m

#   Component     Assigns               Requires   Scores          Retokenizes
-   -----------   -------------------   --------   -------------   -----------
0   sentencizer   token.is_sent_start              sents_f         False      
                  doc.sents                        sents_p                    
                                                   sents_r                    
                                                                              
1   ner           doc.ents                         ents_f          False      
                  token.ent_iob                    ents_p                     
                  token.ent_type                   ents_r                     
                                                   ents_per_type              

[38;5;2m✔ No problems found.[0m


{'summary': {'sentencizer': {'assigns': ['token.is_sent_start', 'doc.sents'],
   'requires': [],
   'scores': ['sents_f', 'sents_p', 'sents_r'],
   'retokenizes': False},
  'ner': {'assigns': ['doc.ents', 'token.ent_iob', 'token.ent_type'],
   'requires': [],
   'scores': ['ents_f', 'ents_p', 'ents_r', 'ents_per_type'],
   'retokenizes': False}},
 'problems': {'sentencizer': [], 'ner': []},
 'attrs': {'token.is_sent_start': {'assigns': ['sentencizer'], 'requires': []},
  'token.ent_type': {'assigns': ['ner'], 'requires': []},
  'doc.ents': {'assigns': ['ner'], 'requires': []},
  'token.ent_iob': {'assigns': ['ner'], 'requires': []},
  'doc.sents': {'assigns': ['sentencizer'], 'requires': []}}}

In [None]:
import spacy

# Example: Create NER-focused pipeline
nlp_ner = spacy.blank("en")
nlp_ner.add_pipe("ner")

# Analyze pipeline components
print("\nPipeline Analysis:")
analysis = nlp_full.analyze_pipes(pretty=True)

# Alternative: Analyze custom pipeline
print("\nCustom Pipeline Analysis:")
custom_analysis = nlp_blank.analyze_pipes(pretty=True)

# Example: Adding multiple components
nlp_custom = spacy.blank("en")
nlp_custom.add_pipe("sentencizer")
nlp_custom.add_pipe("ner")

print("Multi-component custom pipeline:", nlp_custom.pipe_names)

# Process text and access both sentences and entities (if trained)
doc_multi = nlp_custom(text)
print("Sentences:", [sent.text.strip() for sent in doc_multi.sents])
