# Demo for Rule Based Natural Language Processing

### 1. Set up the path, so that the NLP modules can be found

In [1]:
import os
import sys

cwd = os.getcwd()
frameworkDir = os.path.abspath(os.path.join(cwd, os.pardir, 'src'))
sys.path.append(frameworkDir)

### 2. Load Spacy module

In [2]:
import spacy
nlp = spacy.load("en_core_web_lg", exclude=[])

### 3. Load other modules

In [3]:
import pandas as pd

### 4. Import NLP modules 

In [4]:
from dackar.workflows.RuleBasedMatcher import RuleBasedMatcher
from dackar import config
from dackar.utils.nlp.nlp_utils import generatePatternList
from dackar.utils.opm.OPLparser import OPMobject

  from .autonotebook import tqdm as notebook_tqdm
Warming up PyWSD (takes ~10 secs)... took 4.646389961242676 secs.


### 5. Set up logging  

In [5]:
import logging
logging.basicConfig(format='%(asctime)s %(name)-20s %(levelname)-8s %(message)s', datefmt='%d-%b-%y %H:%M:%S', level=logging.DEBUG)

###  6. Read and process entities

In [6]:
ents = []
entityFile = config.nlpConfig['files']['entity_file']
entityList = pd.read_csv(entityFile).values.ravel().tolist()
ents.extend(entityList)
ents = set(ents)
label = "pump_component"
entId = "SSC"
patternsOPM = generatePatternList(ents, label=label, id=entId, nlp=nlp, attr="LEMMA")

### 7. Read and process causal keywords 

In [7]:
causalLabel = "causal_keywords"
causalID = "causal"
patternsCausal = []
causalFilename = config.nlpConfig['files']['cause_effect_keywords_file']
ds = pd.read_csv(causalFilename, skipinitialspace=True)
for col in ds.columns:
    vars = set(ds[col].dropna())
    patternsCausal.extend(generatePatternList(vars, label=causalLabel, id=causalID, nlp=nlp, attr="LEMMA"))

### 8. Create Rule-based matcher with entity list and causal entity list

In [8]:
name = 'ssc_entity_ruler'
matcher = RuleBasedMatcher(nlp, entID=entId, causalKeywordID=causalID)
matcher.addEntityPattern(name, patternsOPM)

causalName = 'causal_keywords_entity_ruler'
matcher.addEntityPattern(causalName, patternsCausal)

### 9. Read input text file, or users can provide a raw string

In [9]:
textFile = config.nlpConfig['files']['text_file']
with open(textFile, 'r') as ft:
    doc = ft.read()

### 10. Process raw string data using matcher

In [10]:
matcher(doc)

(Investigation, revealed, valves) (This event, prevented, structures) (Investigation, revealed, valves) (ASDVs, prevent, valves) (room, controlled, system) (fuse, resulted, power) (failure, The cause, failure) (that, the start, the event) (that, contributed, the event) (failure, The cause, failure) (This, resulted, power) (This action, prevent, this failure)


### 11. Access processed information from matcher

In [11]:
matcher._extractedCausals

[[Investigation,
  None,
  revealed,
  steam dump control relay,
  None,
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable.,
  False],
 [Investigation,
  None,
  rendering,
  atmospheric steam dump valves,
  None,
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable.,
  False],
 [steam dump control relay,
  None,
  rendering,
  atmospheric steam dump valves,
  None,
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable.,
  False],
 [Investigation,
  None,
  revealed,
  steam dump control relay,
  None,
  Investigation revealed that the steam dump control relay had failed, rendering all four atmospheric steam dump valves inoperable, and causing an entry into a 24-hour shutdown action statement limiting condition for operation 3.7.4.,
  False],
 [Investiga