# Demo for Rule Based Natural Language Processing

### Set up the path, so that the NLP modules can be found

In [7]:
import os
import sys
nlpPath = os.path.abspath(os.path.join(os.getcwd(), '..', '..'))
sys.path.append(nlpPath)

### Load Spacy module

In [26]:
import spacy
nlp = spacy.load("en_core_web_lg", exclude=[])

### Load other modules

In [27]:
import pandas as pd

### Import NLP modules 

In [28]:
from nlp.RuleBasedMatcher import RuleBasedMatcher
from nlp import config
from nlp.nlp_utils import generatePatternList
from utils.nlpUtils.OPLparser import OPLentityParser

###  Read and process entities

In [29]:
ents = []
# Parse OPM model
# some modifications, bearings --> pump bearings
if 'opm_file' in config.nlpConfig['files']:
    opmFile = config.nlpConfig['files']['opm_file']
    formList, functionList = OPLentityParser(opmFile)
    ents.extend(formList)
if 'entity_file' in config.nlpConfig['files']:
    entityFile = config.nlpConfig['files']['entity_file']
    entityList = pd.read_csv(entityFile).values.ravel().tolist()
    ents.extend(entityList)
ents = set(ents)
# convert opm formList into matcher patternsOPM
label = "pump_component"
entId = "SSC"

patternsOPM = generatePatternList(ents, label=label, id=entId, nlp=nlp, attr="LEMMA")

### Read and process causal keywords 

In [31]:
causalLabel = "causal_keywords"
causalID = "causal"
patternsCausal = []
causalFilename = config.nlpConfig['files']['cause_effect_keywords_file']
ds = pd.read_csv(causalFilename, skipinitialspace=True)
for col in ds.columns:
    vars = set(ds[col].dropna())
    patternsCausal.extend(generatePatternList(vars, label=causalLabel, id=causalID, nlp=nlp, attr="LEMMA"))

### Create Rule-based matcher with entity list and causal entity list

In [33]:
name = 'ssc_entity_ruler'
matcher = RuleBasedMatcher(nlp, entLabel=entId, causalKeywordLabel=causalID)
matcher.addEntityPattern(name, patternsOPM)

causalName = 'causal_keywords_entity_ruler'
matcher.addEntityPattern(causalName, patternsCausal)

### Read input text file, or users can provide a raw string

In [34]:
textFile = config.nlpConfig['files']['text_file']
with open(textFile, 'r') as ft:
    doc = ft.read()

### Process raw string data using matcher

In [35]:
matcher(doc)

Entity "pump" dep_ is "advcl" is not among valid list "[nsubj, nsubjpass, pobj, dobj, compound]"
Entity "pump" dep_ is "ccomp" is not among valid list "[nsubj, nsubjpass, pobj, dobj, compound]"
Entity "pump" dep_ is "advcl" is not among valid list "[nsubj, nsubjpass, pobj, dobj, compound]"


[('pump', 'pump_component', 'SSC'), ('pump', 'pump_component', 'SSC'), ('pump', 'pump_component', 'SSC'), ('pump', 'pump_component', 'SSC'), ('Pump', 'pump_component', 'SSC'), ('pump shaft', 'pump_component', 'SSC'), ('Pump', 'pump_component', 'SSC'), ('pump bearings', 'pump_component', 'SSC'), ('pump shaft', 'pump_component', 'SSC'), ('pump bearings', 'pump_component', 'SSC'), ('pump shaft', 'pump_component', 'SSC'), ('Power supply', 'pump_component', 'SSC'), ('Pump', 'pump_component', 'SSC'), ('power supply', 'pump_component', 'SSC'), ('Pump', 'pump_component', 'SSC'), ('impeller', 'pump_component', 'SSC'), ('Pump', 'pump_component', 'SSC'), ('impeller', 'pump_component', 'SSC'), ('pump', 'pump_component', 'SSC'), ('pump', 'pump_component', 'SSC'), ('pump shaft', 'pump_component', 'SSC'), ('pump', 'pump_component', 'SSC'), ('pump', 'pump_component', 'SSC'), ('pump shaft', 'pump_component', 'SSC'), ('motor', 'pump_component', 'SSC'), ('Pump', 'pump_component', 'SSC'), ('pump shaft', '

### Access process

In [23]:
matcher._extractedCausals

[[pump bearings,
  Rupture,
  caused,
  pump shaft,
  degradation,
  Rupture of pump bearings caused pump shaft degradation.,
  False],
 [pump bearings,
  Rupture,
  caused,
  pump shaft,
  degradation,
  Rupture of pump bearings caused pump shaft degradation and consequent flow reduction.,
  False],
 [power supply,
  failure,
  due to,
  Pump,
  test,
  Pump test failed due to power supply failure.,
  False],
 [Pump,
  inspection,
  revealed,
  impeller,
  degradation,
  Pump inspection revealed excessive impeller degradation.,
  False],
 [Pump,
  inspection,
  revealed,
  impeller,
  degradation,
  Pump inspection revealed excessive impeller degradation likely due to cavitation.,
  True],
 [pump shaft,
  Several cracks,
  caused,
  pump,
  failure,
  Several cracks on pump shaft were observed; they could have caused pump failure within few days.,
  True],
 [pump shaft,
  vibration,
  causing,
  motor,
  vibrate,
  The pump shaft vibration appears to be causing the motor to vibrate as