# Named Entity Recognition - Quotes Deidentification Example

## 0. Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


## 1. Environment Preparation

In [3]:
# Instal and import packages
#! pip install spacy
#! python -m spacy download en_core_web_sm

# imports and load spacy english language package
import spacy
from spacy import displacy
from spacy import tokenizer
import re

# Load English tokenizer, tagger, parser and NER
nlp = spacy.load('en_core_web_sm')

## 2. Data Explorarion

In [4]:
corpus = [
    "“ChatGPT is scary good. We are not far from dangerously strong AI.” ~Elon Musk",
    "“ChatGPT is incredibly limited, but good enough at some things to create a misleading impression of greatness.  it’s a mistake to be relying on it for anything important right now. it’s a preview of progress; we have lots of work to do on robustness and truthfulness.” ~Sam Altman, CEO OpenAI.  Twitter 12/10/22.",
    "“There’s a certain feeling that happens when a new technology adjusts your thinking about computing. Google did it. Firefox did it. AWS did it. iPhone did it. OpenAI is doing it with ChatGPT.” ~Aaron Levie"
]


## 3. Data Preparation and Anonymization

In [16]:
# Parse data with person names and filter them

anonymized_corpus = []

for doc in corpus:
  #Load the doc
  print(f'doc: {doc}')
  #Remove special characters
  cleaned_doc = re.sub('[!@#~$“”]', '', doc)
  print(f'cleaned_doc: {cleaned_doc}')
  parsed_cleaned_doc = nlp(cleaned_doc)
  print(f'cleaned_doc.sents: {list(nlp(cleaned_doc).sents)}')
  print(f'cleaned_doc.ents: {list(nlp(cleaned_doc).ents)}')
  print()
  displacy.render(nlp(cleaned_doc), style='ent', jupyter=True)
  print()
  all_ents = [(e.text, e.label_) for e in nlp(cleaned_doc).ents]
  print(f"all_ents: {all_ents}")
  named_ents = []
  for ent in nlp(cleaned_doc).ents:
    if ent.label_=='PERSON':
      named_ents.append(ent.text)
      cleaned_doc = cleaned_doc.replace(ent.text, '')
  anonymized_corpus.append(cleaned_doc)
  print(f"named_ents: {named_ents}")

print(f"\nAnonymized Corpus:")
for doc_anonymized in anonymized_corpus:
  print(f'{doc_anonymized}')

doc: “ChatGPT is scary good. We are not far from dangerously strong AI.” ~Elon Musk
cleaned_doc: ChatGPT is scary good. We are not far from dangerously strong AI. Elon Musk
cleaned_doc.sents: [ChatGPT is scary good., We are not far from dangerously strong AI., Elon Musk]
cleaned_doc.ents: [ChatGPT, AI, Elon Musk]




all_ents: [('ChatGPT', 'ORG'), ('AI', 'WORK_OF_ART'), ('Elon Musk', 'PERSON')]
named_ents: ['Elon Musk']
doc: “ChatGPT is incredibly limited, but good enough at some things to create a misleading impression of greatness.  it’s a mistake to be relying on it for anything important right now. it’s a preview of progress; we have lots of work to do on robustness and truthfulness.” ~Sam Altman, CEO OpenAI.  Twitter 12/10/22.
cleaned_doc: ChatGPT is incredibly limited, but good enough at some things to create a misleading impression of greatness.  it’s a mistake to be relying on it for anything important right now. it’s a preview of progress; we have lots of work to do on robustness and truthfulness. Sam Altman, CEO OpenAI.  Twitter 12/10/22.
cleaned_doc.sents: [ChatGPT is incredibly limited, but good enough at some things to create a misleading impression of greatness.  , it’s a mistake to be relying on it for anything important right now., it’s a preview of progress; we have lots of work t


all_ents: [('ChatGPT', 'ORG'), ('Sam Altman', 'PERSON'), ('OpenAI', 'GPE')]
named_ents: ['Sam Altman']
doc: “There’s a certain feeling that happens when a new technology adjusts your thinking about computing. Google did it. Firefox did it. AWS did it. iPhone did it. OpenAI is doing it with ChatGPT.” ~Aaron Levie
cleaned_doc: There’s a certain feeling that happens when a new technology adjusts your thinking about computing. Google did it. Firefox did it. AWS did it. iPhone did it. OpenAI is doing it with ChatGPT. Aaron Levie
cleaned_doc.sents: [There’s a certain feeling that happens when a new technology adjusts your thinking about computing., Google did it., Firefox did it., AWS did it., iPhone did it., OpenAI is doing it with ChatGPT., Aaron Levie]
cleaned_doc.ents: [Google, OpenAI, Aaron Levie]




all_ents: [('Google', 'ORG'), ('OpenAI', 'GPE'), ('Aaron Levie', 'PERSON')]
named_ents: ['Aaron Levie']

Anonymized Corpus:
ChatGPT is scary good. We are not far from dangerously strong AI. 
ChatGPT is incredibly limited, but good enough at some things to create a misleading impression of greatness.  it’s a mistake to be relying on it for anything important right now. it’s a preview of progress; we have lots of work to do on robustness and truthfulness. , CEO OpenAI.  Twitter 12/10/22.
There’s a certain feeling that happens when a new technology adjusts your thinking about computing. Google did it. Firefox did it. AWS did it. iPhone did it. OpenAI is doing it with ChatGPT. 
