In [None]:
import sparknlp_jsl # licensed version of Spark NLP
import sparknlp

spark = sparknlp_jsl.start("xxxx") # xxxx is a secret key. If you don't have it, please get in touch with JSL.

print("Spark NLP version: ", sparknlp.version())
print("Apache Spark version: ", spark.version)

Notebook from this article https://www.johnsnowlabs.com/explain-clinical-document-spark-nlp-pretrained-pipeline/     
Loading the pretrained clinical pipeline ("explain_clinical_doc_dl"). It has the following annotators inside.

- Tokenizer
- Sentence Detector
- Clinical Word Embeddings (glove trained on pubmed dataset)
- Clinical NER-DL (trained by SOTA algorithm on i2b2 dataset)
- AssertionDL model (trained by SOTA algorithm on i2b2 dataset)

In [None]:
from pyspark.ml import PipelineModel

pretrained_model = PipelineModel.load("clinical/models/explain_clinical_doc_dl")


### with LightPipeline

In [None]:
from sparknlp.base import LightPipeline

ner_lightModel = LightPipeline(pretrained_model)

In [None]:
clinical_text = """
Patient with severe fever and sore throat. 
He shows no stomach pain and he maintained on an epidural and PCA for pain control.
He also became short of breath with climbing a flight of stairs.
After CT, lung tumour located at the right lower lobe. Father with Alzheimer.
"""

In [None]:
result = ner_lightModel.annotate(clinical_text)
result.keys()

In [None]:
list(zip(result['token'],result['ner']))

In [None]:
result = ner_lightModel.annotate(clinical_text)
list(zip(result['ner_chunk'],result['assertion']))

In [None]:
%%time 

result = ner_lightModel.fullAnnotate(clinical_text)

entity_tuples = [(n.result, n.metadata['entity'], m.result, n.begin, n.end) for n,m in zip(result[0]['ner_chunk'],result[0]['assertion'])]

In [None]:
entity_tuples

In [None]:
import pandas as pd

pd.DataFrame(entity_tuples, columns=["phrase","entity","assertion","start","end"])

## with Spark dataframes

In [None]:
data = spark.createDataFrame([
  ["Patient with severe feber and sore throat"],
  ["Patient shows no stomach pain"],
  ["She was maintained on an epidural and PCA for pain control."],
  ["He also became short of breath with climbing a flight of stairs."],
  ["Lung tumour located at the right lower lobe"],
  ["Father with Alzheimer."]
]).toDF("text")

data.show(truncate=False)

In [None]:
pretrained_model.transform(data).show()

In [None]:
pretrained_model.transform(data).select("token.result","ner.result").show(truncate=False)

In [None]:
pretrained_model.transform(data).select("ner_chunk.result", "assertion.result").show(truncate=False)
