In [2]:
# Step 1: Install necessary libraries
!pip install spacy python-docx
!python -m spacy download en_core_web_sm

# Step 2: Import the required libraries
import spacy
import docx
from google.colab import files

# Load the small English model for NER
nlp = spacy.load("en_core_web_sm")

# Step 3: Function to read text from a Word (.docx) document
def read_word_document(file_path):
    doc = docx.Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

# Step 4: Function to extract names of people from text
def extract_names(text):
    doc = nlp(text)
    people_names = [ent.text for ent in doc.ents if ent.label_ == "PERSON"]
    return people_names

# Step 5: Upload the Word document
print("Please upload your Word document (.docx file)")
uploaded = files.upload()

# Step 6: Extract names from the uploaded document
file_name = next(iter(uploaded))  # Get the uploaded file name
text = read_word_document(file_name)  # Read the content of the Word document

# Step 7: Extract and print names of people from the document
names = extract_names(text)
print("Names found in the document:", names)


Collecting en-core-web-sm==3.7.1
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m70.0 MB/s[0m eta [36m0:00:00[0m
[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.
Please upload your Word document (.docx file)


Saving SNLP word.docx to SNLP word (1).docx
Names found in the document: ['Elon Musk', 'Jeff Bezos', 'Musk', 'Bill Gates', 'Michelle Obama', 'Oprah Winfrey', 'Serena Williams']
