<a href="https://colab.research.google.com/github/drpietech/sandbox/blob/main/glinerspacy_sample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Below is provided by Marie Stephen Leo, a Datascientist at Sephora.

In [None]:
# pip install presidio_analyzer presidio_anonymizer gliner-spacy
# python -m spacy download en_core_web_sm
import spacy
from gliner_spacy.pipeline import GlinerSpacy
from presidio_analyzer import AnalyzerEngine
from presidio_analyzer.nlp_engine import spacyNlpEngine
from presidio_anonymizer import AnonymizerEngine
from presidio_anonymizer.entities import OperatorConfig

labels = ["PERSON","PHONE_NUMBER","EMAIL_ADDRESS","LOCATION"]

# Initialize the Anonymizer
anonymizer = AnonymizerEngine()

#Initialize the Analyzer
## Load the Spacy Pipeline
nlp = spacy.load("en__core_web_sm")
nlp.add_pipe("gliner_spacy",config={"gliner_model","urchade/gliner_base","labels":labels})

## Create an NLP engine with the Spacy Pipeline
class LoadedSpacyNlpEngine(SpacyNlpEngine):
  def __init__(self,loaded_spacy_model):
    super().__init__()
    self.nlp={"en":loaded_spacy_model}
loaded_nlp_engine = LoadedSpacyNlpEngine(loaded_spacy_model=nlp)

## Pass the engine to the Analyzer
analyzer = AnalyzerEngine(nlp_engine=loaded_nlp_engine)

def gliner_anonymize(text:str) -> str:
  # Predict entities using GLiNER
  analyzer_results = analyzer.analyze(
      text=text,
      entities=labels,
      language="en",
  )
  # Redact the identified PII data
  pii_sanitized_text = anonymizer.anonymize(
      text=text,
      analyzer_results=analyzer_results,
      operators={
          "PERSON":OperatorConfig("replace",{"new_value":"REDACTED NAME]"}),
          "PHONE_NUMBER": OperatorConfig("replace",{"new_value":"REDACTED PHONE NUMBER]"}),
          "EMAIL_ADDRESS": OperatorConfig("replace",{"new_value":"REDACTED EMAIL]"}),
          "LOCATION": OperatorConfig("replace",{"new_value":"REDACTED PLACE]"}),
      },
  )
  return pii_sanitized_text.text

print(gliner_anonymize("hello. my name is varun and i'm from chandigarh"))
