# Named Entity Recognition Demo
## Justin A. Gould
## March 2021

# Required Packages

In [63]:
import spacy
import os
import re

# Set up NER Model from Prodigy Training

In [64]:
base = "C:\\Users\\sf781\\Desktop\\Cummins Annotation Data"
model = "tmp_model_14"
model_path = os.path.join(base, model)

nlp = spacy.load(model_path)

In [65]:
model_path

'C:\\Users\\sf781\\Desktop\\Cummins Annotation Data\\tmp_model_14'

In [66]:
nlp

<spacy.lang.en.English at 0x23f82f2ad60>

# Run Model

In [77]:
#Sample Example
sample = "Go to QSOL Warranty Click on Request Core Return Process**replaced faulty aftrtmnt diff press sensor EPR Part Number:430716600 Tracking Number:609213880257. CHECK ENGINE LIGHT - YELLOW AFTERTREATMENT DPF DIFFERENTIAL PRESSURE SENSOR-SHORTED (ELECTRICAL ONLY) 16139MPATRICK"

In [78]:
print(sample)

Go to QSOL Warranty Click on Request Core Return Process**replaced faulty aftrtmnt diff press sensor EPR Part Number:430716600 Tracking Number:609213880257. CHECK ENGINE LIGHT - YELLOW AFTERTREATMENT DPF DIFFERENTIAL PRESSURE SENSOR-SHORTED (ELECTRICAL ONLY) 16139MPATRICK


In [79]:
doc = nlp(sample)

In [80]:
doc

Go to QSOL Warranty Click on Request Core Return Process**replaced faulty aftrtmnt diff press sensor EPR Part Number:430716600 Tracking Number:609213880257. CHECK ENGINE LIGHT - YELLOW AFTERTREATMENT DPF DIFFERENTIAL PRESSURE SENSOR-SHORTED (ELECTRICAL ONLY) 16139MPATRICK

In [81]:
doc.ents

(press sensor, Number:430716600, PRESSURE SENSOR)

In [82]:
for ent in doc.ents:
    print(ent.label_, ent.text)

PART_NAME press sensor
PART_NUM Number:430716600
PART_NAME PRESSURE SENSOR


In [54]:
part_num_to_int = False

data = {}
for ent in doc.ents:
    #Preprocess entity text------
    text = ent.text
    #LABEL in TEXT
    text = text.replace(ent.label_, "")
    
    #For PART_NUMBER, extract only the numbers
    if ent.label_ == "PART_NUM":
        text = "".join(re.findall(r'\d+', text))
        if part_num_to_int:
            text = int(text)
        print(text)
    
    #Block to append to dictionary------
    #Existing label append to list...
    if ent.label_ in data:
        data[ent.label_].append(text)
    
    #If label does not exist, create
    else:
        data[ent.label_] = [text]

430716600


In [55]:
data

{'PART_NAME': ['press sensor', 'PRESSURE SENSOR'], 'PART_NUM': ['430716600']}

In [60]:
def ner(text, part_num_to_int=True, nlp=nlp):
    """
    Description:
        Wrapper to run the Purdue Digital Crossroads NER model on Cummins service text.
    
    Input:
     - text (str):             Input service text, as decoded (utf-8) text
     - part_num_to_int (bool): If a part number is extracted, should it remain a str or convert to int?
     - nlp (SpaCy model):      Loaded SpaCy model - NER from Prodigy-trained checkpoint
    
    Output:
        {
            entity_name: [list, of, extracted, entities],
            entity_name: [list, of, extracted, entities],
            ...
        }
    """
    
    #Run Model
    doc = nlp(text)
    
    #Parse------
    data = {}
    for ent in doc.ents:
        #Preprocess entity text------
        ent_text = ent.text
        #LABEL in TEXT
        ent_text = ent_text.replace(ent.label_, "")

        #For PART_NUMBER, extract only the numbers
        if ent.label_ == "PART_NUM":
            ent_text = "".join(re.findall(r'\d+', ent_text))
            if part_num_to_int:
                ent_text = int(ent_text)

        #Block to append to dictionary------
        #Existing label append to list...
        if ent.label_ in data:
            data[ent.label_].append(ent_text)

        #If label does not exist, create
        else:
            data[ent.label_] = [ent_text]
    
    return data

In [61]:
sample = """
SRT010G900 overlap with 0305900SRT0807E00 overlap with 0305900SRT0706Z00 \
overlap with 0305900SRT0807E00 overlap with 010G900SRT0706Z00 overlap with \
010G900steam cleaned engine added dye and ran truck at high idle found gear \
cover leaking removed hood and bumper drained coolant recovered Freon removed \
coolant reservoir, ps reservoir, both radiator support, upper and lower rad hoses, \
radiator, ac compressor and bracket, alternator, fan, fan shroud, fan hub, removed \
and resealed gear cover reinstalled all removed parts refilled coolant and Freon ran \
truck at high idle no leaks repair completeOIL LEAK EXTERNALUPPER GEAR COVER GASKETLEAKS \
OIL1045962 THURSDAY 31OCT2019 05:00:47 AM
"""

In [62]:
ner(sample)

{'PART_NAME': ['gear cover',
  'reservoir',
  'reservoir',
  'radiator',
  'fan shroud',
  'fan hub',
  'gear cover',
  'GEAR COVER'],
 'DATE': ['THURSDAY 31OCT2019'],
 'TIMESTAMP': ['05:00:47 AM']}