In [8]:
sentence="The Eiffel Tower was built from 1887 to 1889 by Gustave Eiffel, whose company specialized in building metal frameworks and structures."

In [12]:
# Download required NLTK data
import os
import nltk

# Optional: use a local nltk_data directory if it exists (keeps data inside the repo)
_local_nltk_dir = r"e:\VG Codes\DeepLearning\KrishNaik-DeepLearning\NLP\nltk_data"
if os.path.isdir(_local_nltk_dir):
    if _local_nltk_dir not in nltk.data.path:
        nltk.data.path.insert(0, _local_nltk_dir)

# Tokenizer + POS taggers (newer NLTK splits some resources into *_tab variants)
nltk.download('punkt', quiet=True)
nltk.download('punkt_tab', quiet=True)

# POS tagger (language-specific)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('averaged_perceptron_tagger_eng', quiet=True)

# Named Entity Chunker models (tabulated models required by newer nltk)
nltk.download('maxent_ne_chunker', quiet=True)
nltk.download('maxent_ne_chunker_tab', quiet=True)

# Supporting word list for NE chunker
nltk.download('words', quiet=True)

words = nltk.word_tokenize(sentence)

In [10]:
# Part-of-speech tagging
tag_elements = nltk.pos_tag(words)
print("POS Tags:")
print(tag_elements)

POS Tags:
[('The', 'DT'), ('Eiffel', 'NNP'), ('Tower', 'NNP'), ('was', 'VBD'), ('built', 'VBN'), ('from', 'IN'), ('1887', 'CD'), ('to', 'TO'), ('1889', 'CD'), ('by', 'IN'), ('Gustave', 'NNP'), ('Eiffel', 'NNP'), (',', ','), ('whose', 'WP$'), ('company', 'NN'), ('specialized', 'VBD'), ('in', 'IN'), ('building', 'NN'), ('metal', 'NN'), ('frameworks', 'NNS'), ('and', 'CC'), ('structures', 'NNS'), ('.', '.')]


In [14]:
# Named Entity Recognition
chunk_tree = nltk.ne_chunk(tag_elements)

print("Named Entities (linearized tree):")
print(chunk_tree)  # Avoids SVG rendering; prints plain text

print("\nNamed Entities (ASCII tree):")
try:
    chunk_tree.pretty_print()  # ASCII art tree in terminal/output
except Exception as e:
    # Fallback to bracketed representation if pretty_print fails
    chunk_tree.pprint()

# Extract entities into (label, text) pairs
entities = []
for subtree in chunk_tree.subtrees():
    label = subtree.label() if hasattr(subtree, 'label') else None
    if label and label not in {"S"}:  # skip top-level sentence node
        entity_text = " ".join(tok for tok, pos in subtree.leaves())
        entities.append((label, entity_text))

print("\nExtracted entities:")
for label, text in entities:
    print(f"{label}: {text}")

Named Entities (linearized tree):
(S
  The/DT
  (ORGANIZATION Eiffel/NNP Tower/NNP)
  was/VBD
  built/VBN
  from/IN
  1887/CD
  to/TO
  1889/CD
  by/IN
  (PERSON Gustave/NNP Eiffel/NNP)
  ,/,
  whose/WP$
  company/NN
  specialized/VBD
  in/IN
  building/NN
  metal/NN
  frameworks/NNS
  and/CC
  structures/NNS
  ./.)

Named Entities (ASCII tree):
                                                                                             S                                                                                                                                           
   __________________________________________________________________________________________|____________________________________________________________________________________________________________________________                
  |       |        |        |       |      |      |      |    |      |         |             |          |        |         |           |          |          |         |             OR