# NLP Pipeline
## Justin A. Gould
## April 2021

### Managing Memory

In [1]:
%load_ext memory_profiler

# Required Packages

In [2]:
%%time
import sys
import requests
import json

#System Path
sys.path.append("../")

#NER Model
from src import approach_a, approach_b, utils

CPU times: user 5.1 s, sys: 2.03 s, total: 7.13 s
Wall time: 10.6 s


# Sample Text

In [3]:
sample = """
SRT010G900 overlap with 0305900SRT0807E00 overlap with 0305900SRT0706Z00 \
overlap with 0305900SRT0807E00 overlap with 010G900SRT0706Z00 overlap with \
010G900steam cleaned engine added dye and ran truck at high idle found gear \
cover leaking removed hood and bumper drained coolant recovered Freon removed \
coolant reservoir, ps reservoir, both radiator support, upper and lower rad hoses, \
radiator, ac compressor and bracket, alternator, fan, fan shroud, fan hub, removed \
and resealed gear cover reinstalled all removed parts refilled coolant and Freon ran \
truck at high idle no leaks repair completeOIL LEAK EXTERNALUPPER GEAR COVER GASKETLEAKS \
EPR Part Number:430716600 OIL1045962 THURSDAY 31OCT2019 05:00:47 AM
"""

# Approach A
1. NER
2. DP
3. PA
4. NEL

## Run NER Model

In [4]:
payload = {
    "text"            : sample,
    "part_num_to_int" : True
}

url = "http://127.0.0.1:5000/ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [5]:
entities = resp["output"]
entities

{'DATE': ['THURSDAY 31OCT2019'],
 'PART_NAME': ['reservoir',
  'GEAR COVER',
  'radiator',
  'gear cover',
  'fan shroud',
  'fan hub'],
 'PART_NUM': [430716600],
 'TIMESTAMP': ['05:00:47 AM']}

## Dependency Parsing
_REPLACE WITH DP TEAM MODEL_

In [6]:
payload = {
    "text" : sample,
    "deps" : ["amod", "ccomp", "acl", "nsubj"]
}

model_to_use = "dep_students"
#model_to_use = "dp"
url = f"http://127.0.0.1:5000/{model_to_use}"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [7]:
dp_dict = resp["output"]
dp_dict

{'EPR': ['GASKETLEAKS', 'Part'],
 'Freon': ['recovered', 'ran'],
 'SRT010G900': ['overlap'],
 'coolant': ['drained', 'ran', 'refilled'],
 'cover': ['removed', 'reinstalled'],
 'dye': ['added'],
 'engine': ['cleaned'],
 'hood': ['removed', 'leaking'],
 'idle': ['high', 'found', 'high', 'repair'],
 'parts': ['removed', 'reinstalled'],
 'reservoir': ['removed'],
 'support': ['upper'],
 'truck': ['ran', 'ran']}

## Map DP to NER

In [8]:
payload = {
    "dp_dict"  : dp_dict,
    "entities" : entities
}

url = "http://127.0.0.1:5000/map_dp_ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [9]:
dp_mapped = resp["output"]
dp_mapped

{'gear cover': ['removed', 'reinstalled'], 'reservoir': ['removed']}

## Polarity Analysis of Verbs

In [10]:
%%memit
params = {
    "input_dict" : dp_mapped
}

approach_a_polarity = approach_a.predict_polarity(params)

peak memory: 804.80 MiB, increment: 2.07 MiB


In [11]:
approach_a_polarity

{'non_positive_parts': ['gear cover', 'reservoir'],
 'polarity': {'gear cover': {'removed': 'Non-positive',
   'reinstalled': 'Non-positive'},
  'reservoir': {'removed': 'Non-positive'}}}

In [12]:
causal_part_candidates_approach_a = approach_a_polarity["non_positive_parts"]
causal_part_candidates_approach_a

['gear cover', 'reservoir']

## Named Entity Linking

In [13]:
#INSERT

# Approach B
1. NER
2. Polarity
3. DP
4. NEL

### Text Sample must be Lowercase for PA

In [14]:
sample = sample.lower()

### Named Entity Recognition

In [15]:
payload = {
    "text"            : sample,
    "part_num_to_int" : True
}

url = "http://127.0.0.1:5000/ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [16]:
entities = resp["output"]
entities

{'DATE': ['thursday 31oct2019'],
 'PART_NAME': ['reservoir', 'radiator', 'gear cover', 'fan shroud', 'fan hub'],
 'TIMESTAMP': ['05:00:47 am']}

### Polarity Analysis

In [17]:
%%memit
params = {
    "text" : sample
}

polarity_approach_b = approach_b.approach_b_polarity(params)

peak memory: 804.99 MiB, increment: 0.14 MiB


In [18]:
print(polarity_approach_b)

{'negative_words': ['overlap', 'removed', 'gear', 'cover', 'coolant', '\n', '0305900srt0807e00', 'ran', 'truck', 'high', 'idle', 'freon', 'reservoir', 'radiator', 'srt010g900', '0305900srt0706z00', '010g900srt0706z00', '010g900steam', 'cleaned', 'engine', 'added', 'dye', 'leaking', 'hood', 'bumper', 'drained', 'recovered', 'ps', 'support', 'upper', 'lower', 'rad', 'hoses', 'ac', 'compressor', 'bracket', 'alternator', 'shroud', 'hub', 'resealed', 'reinstalled', 'all', 'refilled', 'leaks', 'repair', 'completeoil', 'leak', 'externalupper', 'gasketleaks', 'epr', 'number:430716600', 'oil1045962', 'thursday', '31oct2019', '05:00:47', 'am'], 'polarities': {',': 'pos', 'and': 'pos', 'overlap': 'neg', 'with': 'pos', 'removed': 'neg', 'gear': 'neg', 'cover': 'neg', 'coolant': 'neg', 'fan': 'pos', '\n': 'neg', '0305900srt0807e00': 'neg', 'ran': 'neg', 'truck': 'neg', 'at': 'pos', 'high': 'neg', 'idle': 'neg', 'freon': 'neg', 'reservoir': 'neg', 'radiator': 'neg', 'srt010g900': 'neg', '0305900srt0

### Dependency Parsing

In [19]:
payload = {
    "text" : sample,
    "deps" : ["amod", "ccomp", "acl", "nsubj"]
}

model_to_use = "dep_students"
#model_to_use = "dp"
url = f"http://127.0.0.1:5000/{model_to_use}"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [20]:
dp_dict = resp["output"]
dp_dict

{'am': ['cover', 'oil1045962'],
 'coolant': ['drained', 'ran', 'refilled'],
 'dye': ['added'],
 'engine': ['cleaned'],
 'freon': ['ran'],
 'hood': ['removed', 'leaking'],
 'idle': ['high', 'found', 'high'],
 'overlap': ['srt010g900'],
 'parts': ['removed', 'reinstalled'],
 'reservoir': ['removed', 'reinstalled'],
 'support': ['upper'],
 'truck': ['ran', 'ran']}

### Map Polarity Analysis and Dependency Parsing/NER

In [21]:
#Step 1: NER and DP
payload = {
    "dp_dict"  : dp_dict,
    "entities" : entities
}

url = "http://127.0.0.1:5000/map_dp_ner"
r = requests.post(url, data=json.dumps(payload), headers={"content-type":"application/json; charset=utf-8"})
resp = json.loads(r.content.decode("utf-8"))

In [22]:
dp_mapped = resp["output"]
dp_mapped

{'reservoir': ['removed', 'reinstalled']}

In [24]:
#Step 2: DP Mapped and Polarity
causal_part_candidates_approach_b = utils.dp_and_pa_approach_b_map(dp_mapped, polarity_approach_b)
causal_part_candidates_approach_b

['reservoir', 'reservoir']

### Named Entity Linking

In [None]:
#INSERT