# NLP Pipeline
## Justin A. Gould
## April 2021

# Required Packages

In [3]:
%%time
import sys
import spacy
import pandas as pd
import numpy as np 
import os
import re

#System Path
sys.path.append("../")

#NER Model
from src import ner, polarity

CPU times: user 3.54 s, sys: 1.23 s, total: 4.77 s
Wall time: 6.45 s


# Sample Text

In [2]:
sample = """
SRT010G900 overlap with 0305900SRT0807E00 overlap with 0305900SRT0706Z00 \
overlap with 0305900SRT0807E00 overlap with 010G900SRT0706Z00 overlap with \
010G900steam cleaned engine added dye and ran truck at high idle found gear \
cover leaking removed hood and bumper drained coolant recovered Freon removed \
coolant reservoir, ps reservoir, both radiator support, upper and lower rad hoses, \
radiator, ac compressor and bracket, alternator, fan, fan shroud, fan hub, removed \
and resealed gear cover reinstalled all removed parts refilled coolant and Freon ran \
truck at high idle no leaks repair completeOIL LEAK EXTERNALUPPER GEAR COVER GASKETLEAKS \
EPR Part Number:430716600 OIL1045962 THURSDAY 31OCT2019 05:00:47 AM
"""

# Original

## Run NER Model

In [3]:
params = {
    "text"            : sample,
    "part_num_to_int" : True
}

entities = ner.ner(params)

In [4]:
entities

{'PART_NAME': ['radiator',
  'gear cover',
  'GEAR COVER',
  'reservoir',
  'fan hub',
  'fan shroud'],
 'PART_NUM': [430716600],
 'DATE': ['THURSDAY 31OCT2019'],
 'TIMESTAMP': ['05:00:47 AM']}

## Dependency Parsing
_REPLACE WITH DP TEAM MODEL_

In [5]:
#Load Model
dp_model = spacy.load('en_core_web_sm')

In [6]:
#Run Model
def dp(text):
    doc = dp_model(text)
    
    data_dp = {}
    for token in doc:
        if token.dep_ in ["amod", "ccomp", "acl", "nsubj"]:
            noun = str(token.head)
            verb = str(token.text)
            
            #Block to append to dictionary------
            #Existing label append to list...
            if noun in data_dp:
                data_dp[noun].append(verb)

            #If label does not exist, create
            else:
                data_dp[noun] = [verb]
            
    #Remove Duplicated
    for key in data_dp:
        data_dp[key] = list(set(data_dp[key]))
                
    return data_dp

In [7]:
dp_dict = dp(sample)
dp_dict

{'overlap': ['SRT010G900'],
 'engine': ['cleaned'],
 'idle': ['high'],
 'found': ['idle', 'leaking'],
 'hood': ['removed'],
 'coolant': ['recovered', 'removed', 'drained'],
 'support': ['upper'],
 'cover': ['removed'],
 'reinstalled': ['cover'],
 'parts': ['removed'],
 'ran': ['Freon'],
 'repair': ['idle'],
 'EPR': ['GASKETLEAKS']}

## Map DP to NER

In [8]:
def map_dp_and_ner(dp_dict, entities):
    data = {}
    #Loop through DP Output
    for dp_part in dp_dict:
        #Loop through NER Parts Extracted
        for part in entities["PART_NAME"]:
            part_in_dp_and_ner = ((dp_part in part) | (part in dp_part))

            if part_in_dp_and_ner is True:
                data[part] = dp_dict[dp_part]
        
    return data

In [10]:
dp_mapped = map_dp_and_ner(dp_dict, entities)
dp_mapped

{'gear cover': ['removed']}

## Polarity Analysis of Verbs

In [None]:
params = {
    "input_dict" : dp_mapped
}

polarity_ = polarity.predict_polarity(params)

In [2]:
polarity_= {'non_positive_parts': ['gear cover'],
 'polarity': {'gear cover': {'removed': 'Non-positive'}}}

## Named Entity Linking