In [1]:
import spacy
import pandas as pd
from spacy import displacy

In [4]:
!python -m spacy download en_core_web_md


Collecting en-core-web-md==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_md-3.8.0/en_core_web_md-3.8.0-py3-none-any.whl (33.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m33.5/33.5 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: en-core-web-md
Successfully installed en-core-web-md-3.8.0
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_md')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [5]:
nlp_sm = spacy.load("en_core_web_sm")
nlp_md = spacy.load("en_core_web_md")

In [6]:
# Sample news article text
text = """
Apple Inc. announced its new iPhone 15 in California. CEO Tim Cook said the launch will boost sales in Europe and Asia.
The event was attended by tech journalists from CNN, BBC, and Reuters.
"""

In [7]:
# Function to extract named entities
def extract_entities(nlp_model, text):
    doc = nlp_model(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return pd.DataFrame(entities, columns=["Entity", "Label"])

In [8]:
# Apply both models
df_sm = extract_entities(nlp_sm, text)
df_md = extract_entities(nlp_md, text)

In [9]:
# Display comparison
print("🔍 Results from en_core_web_sm:")
print(df_sm)

🔍 Results from en_core_web_sm:
       Entity     Label
0  Apple Inc.       ORG
1          15  CARDINAL
2  California       GPE
3    Tim Cook    PERSON
4      Europe       LOC
5        Asia       LOC
6         CNN       ORG
7         BBC       ORG
8     Reuters       ORG


In [10]:
print("\n🔍 Results from en_core_web_md:")
print(df_md)


🔍 Results from en_core_web_md:
       Entity   Label
0  Apple Inc.     ORG
1   iPhone 15     ORG
2  California     GPE
3    Tim Cook  PERSON
4      Europe     LOC
5        Asia     LOC
6         CNN     ORG
7         BBC     ORG
8     Reuters     ORG


In [11]:
# Bonus: Visualize entities using displacy
print("\n🖼️ Visualizing entities from en_core_web_md:")
displacy.render(nlp_md(text), style="ent", jupyter=False)


🖼️ Visualizing entities from en_core_web_md:


'<div class="entities" style="line-height: 2.5; direction: ltr"><br>\n<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    Apple Inc.\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>\n</mark>\n announced its new \n<mark class="entity" style="background: #7aecec; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    iPhone 15\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">ORG</span>\n</mark>\n in \n<mark class="entity" style="background: #feca74; padding: 0.45em 0.6em; margin: 0 0.25em; line-height: 1; border-radius: 0.35em;">\n    California\n    <span style="font-size: 0.8em; font-weight: bold; line-height: 1; border-radius: 0.35em; vertical-align: middle; margin-left: 0.5rem">GPE<