In [1]:
import spacy

nlp = spacy.load("en_core_web_sm")

In [2]:
my_string = '"We\'re moving to L.A.!"'
print(my_string)

"We're moving to L.A.!"


In [3]:
doc = nlp(my_string)

In [4]:
for token in doc:
    print(token.text, end=" | ")


" | We | 're | moving | to | L.A. | ! | " | 

In [5]:
text2 = "We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!"
docs = nlp(text2)
for token in docs:
    print(token.text)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!


In [6]:
doc3 = nlp(u"A 5km NYC cab ride costs $10.30")

for token in doc3:
    print(token.text)

A
5
km
NYC
cab
ride
costs
$
10.30


In [7]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

for token in doc4:
    print(token)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [10]:
print(type(doc4))

<class 'spacy.tokens.doc.Doc'>


In [11]:
len(doc.vocab)

794

In [12]:
doc5 = nlp(u"It is better to give than to recieve.")

In [16]:
print(doc5[2:5])

better to give


In [18]:
doc5[4] = "Hasnain"

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [19]:
doc6 = nlp(u"Apple to build a Hong Kong factory for $6 million")

for token in doc6:
    print(token.text, end=" | ")

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 

In [20]:
for ent in doc6.ents:
    print(ent.text + " - " + ent.label_ + " - " + str(spacy.explain(ent.label_)))

Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [21]:
doc7 = nlp(u"Autonomous cars shift insurance liability towards manufacturers")

for chunk in doc7.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [None]:
from spacy import displacy

doc = nlp(u"Apple is going to build a Hong Kong factory for $6 million")

displacy.serve(doc, style="ent")

In [29]:
from pydantic import BaseModel, Field
from typing import List

class EntityRecognitionResponse(BaseModel):
    phone_number: str = Field(description="Phone number mentioned in the text")
    credit_card_number: str = Field(description="Credit card number mentioned in the text")
    name: str =  Field(description="Names mentioned in the text")
    address: str = Field(description="Address mentioned in the text")
    masked_paragraph: str = Field(description="Exactly same paragraph as text but with masked entitites")

In [33]:
from langchain_anthropic import ChatAnthropic

llm = ChatAnthropic(model="claude-3-5-sonnet-latest")

llm_with_structured_output = llm.with_structured_output(EntityRecognitionResponse)

In [34]:
from langchain_core.prompts import PromptTemplate

template = """
You are an expert Named Entity Recognition expert with experience in extracting key PII informations.
Here is the provided text for PII/Entity Recognition

Text:
{text}
"""

entity_prompt = PromptTemplate.from_template(template)

In [35]:
entity_chain = entity_prompt | llm_with_structured_output

In [36]:
text = """
John Doe recently placed an order with XYZ Electronics. His shipping address is 123 Main Street, Apt 4B, Springfield, IL 62704, USA. For billing, he used his Visa card with the number *4111 1111 1111 1111* (expiry *05/25*, CVV *123*).

You can contact him at his phone number *+1 (555) 123-4567* or via email at john.doe@example.com. His order will be delivered by FedEx with tracking number *1234 5678 9012*."""

response = entity_chain.invoke({"text": text})

In [40]:
print(f"Name Identified: {response.name}")
print(f"Address Identified: {response.address}")
print(f"Credit Card Number Identified: {response.credit_card_number}")
print(f"Phone Number Identified: {response.phone_number}")
print(f"Masked Response: {response.masked_paragraph}")

Name Identified: John Doe
Address Identified: 123 Main Street, Apt 4B, Springfield, IL 62704, USA
Credit Card Number Identified: 4111 1111 1111 1111
Phone Number Identified: +1 (555) 123-4567
Masked Response: [NAME] recently placed an order with XYZ Electronics. His shipping address is [ADDRESS]. For billing, he used his Visa card with the number [CREDIT_CARD] (expiry *05/25*, CVV *123*).

You can contact him at his phone number [PHONE] or via email at [EMAIL]. His order will be delivered by FedEx with tracking number *1234 5678 9012*.
