In [1]:
import spacy
from spacy import displacy
from faker import Faker
import pandas as pd

nlp = spacy.load('en_core_web_trf')
fake = Faker()

def replace_names_with_fake_names(text):
    doc = nlp(text)
    new_text = ''
    for token in doc:
        if token.ent_type_ == 'PERSON':
            new_text += ' ' + fake.name()
        else:
            new_text += ' ' + token.text
    return new_text.strip()

In [2]:
data = [
    {'sentence': 'John and Sarah went to the store.', 'contains_name': True},
    {'sentence': 'The cat jumped over the fence.', 'contains_name': False},
    {'sentence': 'Mary is a good student.', 'contains_name': True},
    {'sentence': 'I like to eat pizza.', 'contains_name': False},
    {'sentence': 'David and Emily are getting married.', 'contains_name': True},
    {'sentence': 'The sun is shining today.', 'contains_name': False},
    {'sentence': 'Jessica works at a bank.', 'contains_name': True},
    {'sentence': 'The dog chased the ball.', 'contains_name': False},
    {'sentence': 'Andrew is a talented musician.', 'contains_name': True},
    {'sentence': 'I need to buy some milk.', 'contains_name': False}
]


In [5]:
df = pd.DataFrame(data)
df["sentences_replaced"] = df["sentence"].apply(lambda x: replace_names_with_fake_names(x))
df

Unnamed: 0,sentence,contains_name,sentences_replaced
0,John and Sarah went to the store.,True,Angela Diaz and Karen Price went to the store .
1,The cat jumped over the fence.,False,The cat jumped over the fence .
2,Mary is a good student.,True,Eric Robertson is a good student .
3,I like to eat pizza.,False,I like to eat pizza .
4,David and Emily are getting married.,True,Katelyn Peters and Jason Gray are getting marr...
5,The sun is shining today.,False,The sun is shining today .
6,Jessica works at a bank.,True,Roberta Yoder works at a bank .
7,The dog chased the ball.,False,The dog chased the ball .
8,Andrew is a talented musician.,True,Renee Melendez is a talented musician .
9,I need to buy some milk.,False,I need to buy some milk .


In [6]:
for i in data:
    doc_temp = nlp(i["sentence"])
    displacy.render(doc_temp, style='ent', jupyter=True)



In [7]:
doc_temp = nlp("Nikki went to the market to buy bread for $100")
displacy.render(doc_temp, style='ent', jupyter=True)

In [8]:
entities_dict = { 
    "PERSON": "Tom Hanks starred in Forrest Gump.", 
    "NORP": "The festival attracts people from all over the world.", 
    "FAC": "We visited the Eiffel Tower in Paris.", 
    "ORG": "Google is a multinational technology company.", 
    "GPE": "I was born and raised in Tokyo.", 
    "LOC": "The Grand Canyon is a natural wonder.", 
    "PRODUCT": "I love my new iPhone.", 
    "EVENT": "We're going to the concert tonight.", 
    "WORK_OF_ART": "The Mona Lisa is a famous painting by Leonardo da Vinci.", 
    "LAW": "He's studying to become a lawyer.", 
    "LANGUAGE": "Mandarin is the most widely spoken language in the world.", 
    "DATE": "The meeting is scheduled for next Tuesday.", 
    "TIME": "I'll meet you at 3:30 PM.", 
    "PERCENT": "The sales increased by 10%.", 
    "MONEY": "The house cost $500,000.", 
    "QUANTITY": "The recipe calls for two cups of sugar.", 
    "ORDINAL": "She finished in third place.", 
    "CARDINAL": "I have three dogs and two cats." 
}

entities_list = [[i,j] for i,j in entities_dict.items()]

In [10]:
df_entities = pd.DataFrame(entities_list, columns=["entity", "sentence"])
df_entities

Unnamed: 0,entity,sentence
0,PERSON,Tom Hanks starred in Forrest Gump.
1,NORP,The festival attracts people from all over the...
2,FAC,We visited the Eiffel Tower in Paris.
3,ORG,Google is a multinational technology company.
4,GPE,I was born and raised in Tokyo.
5,LOC,The Grand Canyon is a natural wonder.
6,PRODUCT,I love my new iPhone.
7,EVENT,We're going to the concert tonight.
8,WORK_OF_ART,The Mona Lisa is a famous painting by Leonardo...
9,LAW,He's studying to become a lawyer.


In [11]:
for i in entities_list:
    doc_temp = nlp(i[1])
    displacy.render(doc_temp, style='ent', jupyter=True)

