Show how to use pretrained assertion status

In [1]:
import sys
sys.path.append('../../')

from pyspark.sql import SparkSession
from pyspark.ml import PipelineModel

from pathlib import Path

if sys.version_info[0] < 3:
    from urllib import urlretrieve
else:
    from urllib.request import urlretrieve

In [2]:
spark = SparkSession.builder \
    .appName("assertion-status")\
    .master("local[*]")\
    .config("spark.driver.memory","6G")\
    .config("spark.driver.maxResultSize", "2G") \
    .config("spark.jars.packages", "JohnSnowLabs:spark-nlp:1.6.2")\
    .getOrCreate()

In [3]:
from sparknlp.annotator import *
from sparknlp.common import *
from sparknlp.base import *
from sparknlp.pretrained import ResourceDownloader

Create some data for testing purposes

In [4]:
from pyspark.sql import Row
R = Row('sentence', 'start', 'end')
test_data = spark.createDataFrame([R('Peter is a good person, and he works at IBM',0,1)])

Create some pipelines

In [5]:
import time

documentAssembler = DocumentAssembler() \
    .setInputCol("sentence") \
    .setOutputCol("document")

tokenizer = Tokenizer() \
    .setInputCols(["document"]) \
    .setOutputCol("token")
    
ner_dl = NerDLModel().pretrained() \
    .setInputCols(["document", "token"]) \
    .setOutputCol("ner_dl")

finisher = Finisher() \
    .setInputCols(["ner_dl"]) \
    .setIncludeKeys(True)

pipeline_fast_dl = PipelineModel(stages = [documentAssembler, tokenizer, ner_dl, finisher])

Now let's use these pipelines and see the results

In [6]:
pipeline_fast_dl.transform(test_data).select("finished_ner_dl").show(truncate=False)

+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|finished_ner_dl                                                                                                                                                                                                                     |
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|word->Peter#result->I-PER@word->is#result->O@word->a#result->O@word->good#result->O@word->person#result->O@word->,#result->O@word->and#result->O@word->he#result->O@word->works#result->O@word->at#result->O@word->IBM#result->I-ORG|
+---------------------------------------------------------------------------