In [None]:
# Import necessary libraries
import spacy

# Title and description of the activity
print("Named Entity Recognition (NER) Activity")
print("This exercise demonstrates the identification of key entities like persons, organizations, and dates in a sample text dataset.")

# Step 1: Create a sample dataset
print("\nStep 1: Creating a sample dataset...")

sample_dataset = """
The Secretary of State, John Doe, announced a new policy during a press conference held in Washington, D.C.,
on January 15th, 2023. The Environmental Protection Agency (EPA) is set to implement this policy immediately.
In another event, President Jane Smith visited New York City on February 3rd, 2023, to discuss infrastructure funding.
"""

print("\nSample Dataset Created:")
print(sample_dataset)

# Step 2: Load the spaCy language model
print("\nStep 2: Loading the spaCy language model...")
nlp = spacy.load("en_core_web_sm")
print("Model Loaded: en_core_web_sm")

# Step 3: Process the sample text with spaCy
print("\nStep 3: Processing the sample dataset...")
doc = nlp(sample_dataset)

# Step 4: Extract and display named entities
print("\nStep 4: Extracting named entities...")
print(f"{'Entity':<30} | {'Type':<15}")
print("="*50)
for ent in doc.ents:
    print(f"{ent.text:<30} | {ent.label_:<15}")

# Step 5: Organize entities by type
print("\nStep 5: Organizing entities by type...")
persons = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
organizations = [ent.text for ent in doc.ents if ent.label_ == "ORG"]
dates = [ent.text for ent in doc.ents if ent.label_ == "DATE"]
locations = [ent.text for ent in doc.ents if ent.label_ == "GPE"]

# Display categorized entities
print("\nPersons Detected:", persons)
print("Organizations Detected:", organizations)
print("Dates Detected:", dates)
print("Locations Detected:", locations)

# Step 6: Reflection Questions
print("\nReflection Questions:")
print("1. What other types of entities could be useful to detect in this dataset?")
print("2. Were there any entities missed by the NER model?")
print("3. How might this tool be useful in journalism or government analysis?")

# Optional: Save results to a file
print("\nOptional: Saving results to a file...")
with open("ner_results.txt", "w") as file:
    file.write("Named Entity Recognition (NER) Results\n")
    file.write("="*40 + "\n")
    file.write(f"Persons: {persons}\n")
    file.write(f"Organizations: {organizations}\n")
    file.write(f"Dates: {dates}\n")
    file.write(f"Locations: {locations}\n")
print("Results saved to 'ner_results.txt'.")
