# NLP Pipeline
## Justin A. Gould
## April 2021

# Required Packages

In [1]:
%%time
import sys
import requests
import json

#System Path
sys.path.append("../")

#NER Model
from src import polarity, norma, utils

CPU times: user 6.97 s, sys: 3.64 s, total: 10.6 s
Wall time: 30.7 s


# Sample Text

In [2]:
sample = """
SRT010G900 overlap with 0305900SRT0807E00 overlap with 0305900SRT0706Z00 \
overlap with 0305900SRT0807E00 overlap with 010G900SRT0706Z00 overlap with \
010G900steam cleaned engine added dye and ran truck at high idle found gear \
cover leaking removed hood and bumper drained coolant recovered Freon removed \
coolant reservoir, ps reservoir, both radiator support, upper and lower rad hoses, \
radiator, ac compressor and bracket, alternator, fan, fan shroud, fan hub, removed \
and resealed gear cover reinstalled all removed parts refilled coolant and Freon ran \
truck at high idle no leaks repair completeOIL LEAK EXTERNALUPPER GEAR COVER GASKETLEAKS \
EPR Part Number:430716600 OIL1045962 THURSDAY 31OCT2019 05:00:47 AM
"""

# Original

## Run NER Model

In [13]:
payload = {
    "text"            : sample,
    "part_num_to_int" : True
}

url = "http://127.0.0.1:5000/ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [14]:
entities = resp["output"]
entities

{'DATE': ['THURSDAY 31OCT2019'],
 'PART_NAME': ['GEAR COVER',
  'gear cover',
  'radiator',
  'reservoir',
  'fan hub',
  'fan shroud'],
 'PART_NUM': [430716600],
 'TIMESTAMP': ['05:00:47 AM']}

## Dependency Parsing
_REPLACE WITH DP TEAM MODEL_

In [15]:
payload = {
    "text" : sample,
    "deps" : ["amod", "ccomp", "acl", "nsubj"]
}

url = "http://127.0.0.1:5000/dp"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [16]:
dp_dict = resp["output"]
dp_dict

{'EPR': ['GASKETLEAKS'],
 'coolant': ['drained', 'removed', 'recovered'],
 'cover': ['removed'],
 'engine': ['cleaned'],
 'found': ['leaking', 'idle'],
 'hood': ['removed'],
 'idle': ['high'],
 'overlap': ['SRT010G900'],
 'parts': ['removed'],
 'ran': ['Freon'],
 'reinstalled': ['cover'],
 'repair': ['idle'],
 'support': ['upper']}

## Map DP to NER

In [17]:
payload = {
    "dp_dict"  : dp_dict,
    "entities" : entities
}

url = "http://127.0.0.1:5000/map_dp_ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [18]:
dp_mapped = resp["output"]
dp_mapped

{'gear cover': ['removed']}

## Polarity Analysis of Verbs

In [19]:
params = {
    "input_dict" : dp_mapped
}

polarity_ = polarity.predict_polarity(params)

In [20]:
polarity_

{'non_positive_parts': ['gear cover'],
 'polarity': {'gear cover': {'removed': 'Non-positive'}}}

In [22]:
causal_part_candidates_IDC = polarity_["non_positive_parts"]
causal_part_candidates_IDC

['gear cover']

## Named Entity Linking

In [17]:
#INSERT

# Norma's Approach
1. NER
2. Polarity
3. DP
4. NEL

### Named Entity Recognition

In [3]:
payload = {
    "text"            : sample,
    "part_num_to_int" : True
}

url = "http://127.0.0.1:5000/ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [4]:
entities = resp["output"]
entities

{'DATE': ['THURSDAY 31OCT2019'],
 'PART_NAME': ['GEAR COVER',
  'gear cover',
  'radiator',
  'reservoir',
  'fan hub',
  'fan shroud'],
 'PART_NUM': [430716600],
 'TIMESTAMP': ['05:00:47 AM']}

### Polarity Analysis

In [5]:
params = {
    "text" : sample
}

polarities = norma.norma_polarity(params)

In [6]:
print(polarities)

{'negative_words': ['overlap', 'removed', 'gear', 'cover', 'coolant', '\n', '0305900srt0807e00', 'ran', 'truck', 'high', 'idle', 'freon', 'reservoir', 'radiator', 'srt010g900', '0305900srt0706z00', '010g900srt0706z00', '010g900steam', 'cleaned', 'engine', 'added', 'dye', 'leaking', 'hood', 'bumper', 'drained', 'recovered', 'ps', 'support', 'upper', 'lower', 'rad', 'hoses', 'ac', 'compressor', 'bracket', 'alternator', 'shroud', 'hub', 'resealed', 'reinstalled', 'all', 'refilled', 'leaks', 'repair', 'completeoil', 'leak', 'externalupper', 'gasketleaks', 'epr', 'number:430716600', 'oil1045962', 'thursday', '31oct2019', '05:00:47', 'am'], 'polarities': {',': 'pos', 'and': 'pos', 'overlap': 'neg', 'with': 'pos', 'removed': 'neg', 'gear': 'neg', 'cover': 'neg', 'coolant': 'neg', 'fan': 'pos', '\n': 'neg', '0305900srt0807e00': 'neg', 'ran': 'neg', 'truck': 'neg', 'at': 'pos', 'high': 'neg', 'idle': 'neg', 'freon': 'neg', 'reservoir': 'neg', 'radiator': 'neg', 'srt010g900': 'neg', '0305900srt0

### Dependency Parsing

In [7]:
payload = {
    "text" : sample,
    "deps" : ["amod", "ccomp", "acl", "nsubj"]
}

url = "http://127.0.0.1:5000/dp"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [8]:
dp_dict = resp["output"]
dp_dict

{'EPR': ['GASKETLEAKS'],
 'coolant': ['drained', 'removed', 'recovered'],
 'cover': ['removed'],
 'engine': ['cleaned'],
 'found': ['leaking', 'idle'],
 'hood': ['removed'],
 'idle': ['high'],
 'overlap': ['SRT010G900'],
 'parts': ['removed'],
 'ran': ['Freon'],
 'reinstalled': ['cover'],
 'repair': ['idle'],
 'support': ['upper']}

### Map Polarity Analysis and Dependency Parsing/NER

In [9]:
#Step 1: NER and DP
payload = {
    "dp_dict"  : dp_dict,
    "entities" : entities
}

url = "http://127.0.0.1:5000/map_dp_ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [10]:
dp_mapped = resp["output"]
dp_mapped

{'gear cover': ['removed']}

In [21]:
#Step 2: DP Mapped and Polarity
causal_part_candidates_NORMA = utils.dp_and_pa_norma_map(dp_mapped, polarities)
causal_part_candidates_NORMA

['gear cover']

### Named Entity Linking

In [14]:
#INSERT