# Demo for Rule Based Natural Language Processing

### 1. Set up the path, so that the NLP modules can be found

In [1]:
import os
import sys

cwd = os.getcwd()
frameworkDir = os.path.abspath(os.path.join(cwd, os.pardir, 'src'))
sys.path.append(frameworkDir)

### 2. Load Spacy module

In [2]:
import spacy
nlp = spacy.load("en_core_web_lg", exclude=[])

  from .autonotebook import tqdm as notebook_tqdm
  _C._set_default_tensor_type(t)


### 3. Load other modules

In [3]:
import pandas as pd

### 4. Import NLP modules 

In [4]:
from dackar.workflows.RuleBasedMatcher import RuleBasedMatcher
from dackar import config
from dackar.utils.nlp.nlp_utils import generatePatternList
from dackar.utils.opm.OPLparser import OPMobject

### 5. Set up logging  

In [5]:
import logging
logging.basicConfig(format='%(asctime)s %(name)-20s %(levelname)-8s %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.DEBUG)

###  6. Read and process entities

In [6]:
ents = []
entityFile = config.nlpConfig['files']['entity_file']
entityList = pd.read_csv(entityFile).values.ravel().tolist()
ents.extend(entityList)
ents = set(ents)
label = "pump_component"
entId = "SSC"
patternsOPM = generatePatternList(ents, label=label, id=entId, nlp=nlp, attr="LEMMA")

### 7. Read and process causal keywords 

In [7]:
causalLabel = "causal_keywords"
causalID = "causal"
patternsCausal = []
causalFilename = config.nlpConfig['files']['cause_effect_keywords_file']
ds = pd.read_csv(causalFilename, skipinitialspace=True)
for col in ds.columns:
    vars = set(ds[col].dropna())
    patternsCausal.extend(generatePatternList(vars, label=causalLabel, id=causalID, nlp=nlp, attr="LEMMA"))

### 8. Create Rule-based matcher with entity list and causal entity list

In [8]:
name = 'ssc_entity_ruler'
matcher = RuleBasedMatcher(nlp, entLabel=entId, causalKeywordLabel=causalID)
matcher.addEntityPattern(name, patternsOPM)

causalName = 'causal_keywords_entity_ruler'
matcher.addEntityPattern(causalName, patternsCausal)

24-Oct-23 14:11:47 dackar.workflows.RuleBasedMatcher INFO     Create instance of RuleBasedMatcher
24-Oct-23 14:11:50 dackar.utils.nlp.nlp_utils INFO     Model: core_web_lg, Language: en
24-Oct-23 14:11:50 dackar.utils.nlp.nlp_utils INFO     Available pipelines:pysbdSentenceBoundaries, tok2vec, tagger, parser, attribute_ruler, lemmatizer, mergePhrase, normEntities, initCoref, aliasResolver, coreferee, anaphorCoref, anaphorEntCoref


### 9. Read input text file, or users can provide a raw string

In [9]:
textFile = config.nlpConfig['files']['text_file']
with open(textFile, 'r') as ft:
    doc = ft.read()

### 10. Process raw string data using matcher

In [10]:
matcher(doc)

24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    Print Coreference Info:
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher INFO     Start to extract health status
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    control room health status: an acrid odor
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    steam dump control relay health status: failed
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    atmospheric steam dump valves health status: inoperable
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    relay health status: replaced
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    ASDVs health status: service
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    control room health status: an acrid odor
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    steam dump control relay health status: failed
24-Oct-23 14:11:51 dackar.workflows.RuleBasedMatcher DEBUG    atmospheric steam dump valves he

0: relay(38), relay(53); 1: ASDVs(203), ASDVs(225), ASDVs(248), ASDVs(295), ASDVs(312), ASDVs(382); 2: relay(428), relay(446), relay(465), relay(469), relay(483); 3: The duty cycle(443), it(453); 4: ASDVs(488), ASDVs(502), ASDVs(549); 5: relay(558), relay(603), relay(622); 6: coil(593), it(610); 7: relay(709), relay(721), relay(738); 8: The preventive maintenance optimization code(714), it(725)


### 11. Access processed information from matcher

In [11]:
matcher._extractedCausals

[[Investigation,
  None,
  revealed,
  steam dump control relay,
  'failed',
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable.,
  False],
 [Investigation,
  None,
  rendering,
  atmospheric steam dump valves,
  'inoperable',
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable.,
  False],
 [steam dump control relay,
  'failed',
  rendering,
  atmospheric steam dump valves,
  'inoperable',
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable.,
  False],
 [Investigation,
  None,
  revealed,
  steam dump control relay,
  'failed',
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable, and causing an entry into a 24-hour shutdown action statement limiting condition for operation 3