In [11]:
# Import stanza
import stanza
#stanza.install_corenlp(dir=corenlp_dir)

# Setting StanfordCoreNLP
import os
import stanfordnlp
import pandas as pd
from tqdm import tqdm
corenlp_dir = './corenlp'
os.environ["CORENLP_HOME"] = corenlp_dir
from stanfordnlp.server import CoreNLPClient

# Setting AllenNLP
from allennlp.predictors.predictor import Predictor
from allennlp_models.coref.predictors.coref import CorefPredictor
from allennlp_models.coref.dataset_readers.conll import ConllCorefReader
print('\nInitialising AllenNLP Library models')
predictor = Predictor.from_path("https://storage.googleapis.com/allennlp-public-models/coref-spanbert-large-2020.02.27.tar.gz")
print('\nDone initialising AllenNLP Library models!\n')

# Initialising CoreNLPClient
print('\nInitialising CoreNLPClient')
nlp = CoreNLPClient(annotators=['ner'], memory='4G', endpoint='http://localhost:9001', output_format='json')
print('\nDone initialising CoreNLPClient!\n')

# Input text
text = "Huawei Technologies Co., Ltd.is a Chinese multinational technology company headquartered in Shenzhen, Guangdong. It designs, develops, and sells telecommunications equipment and consumer electronics (mainly smartphones). On 29 June 2019, U.S. President Donald Trump reached an agreement to resume trade talks with China and announced that he would ease the aforementioned sanctions on Huawei. Huawei cut 600 jobs at its Santa Clara research center in June, and in December 2019 founder Ren Zhengfei said it was moving the center to Canada because the restrictions would block them from interacting with US employees. n July 2020, Huawei surpassed Samsung and Apple to become the leading smartphone mobile brand in the world for the first time primarily due to a drop in Samsung's global sales in the second quarter of 2020 owing to the impact of the COVID-19 pandemic."
# Resolve text (Replace pronouns into references)
print('\nResolving Text')
allennlp_resolved = predictor.coref_resolved(text)
print('\nText is resolved!\n')

# Annotate texts with Named Entity Recognitions
print('\nAnnotating Resolved Text')
annotated = nlp.annotate(allennlp_resolved)
print('\nResolved Text is annotated!\n')

# Extract the annotated texts
print('\nExtracting Annotated Text')
annotated_sentences = annotated['sentences']
rows = []
for s in tqdm(range(len(annotated_sentences))):
    for r in range(s):
        try:
            entity = annotated_sentences[s]['entitymentions'][r]['text']
            start_index = annotated_sentences[s]['entitymentions'][r]['characterOffsetBegin']
            end_index = annotated_sentences[s]['entitymentions'][r]['characterOffsetEnd']
            subjectSpan = list([start_index, end_index])
            ner = annotated_sentences[s]['entitymentions'][r]['ner']
            nerconfidence = annotated_sentences[s]['entitymentions'][r]['nerConfidences'].values()
            #regexner = annotated_sentences[s]['tokens'][r]['ner']
            row = list([entity, subjectSpan, ner, nerconfidence])
            rows.append(row)
        except:
            pass
print('\nDone Extraction!\n')    

# View the output in a dataframe
entity_df = pd.DataFrame(rows, columns = ['Entity', 'SubjectSpan', 'NER', 'Confidence'])
entity_df


Initialising AllenNLP Library models





Done initialising AllenNLP Library models!


Initialising CoreNLPClient

Done initialising CoreNLPClient!


Resolving Text

Text is resolved!


Annotating Resolved Text
Starting server with command: java -Xmx4G -cp ./corenlp/* edu.stanford.nlp.pipeline.StanfordCoreNLPServer -port 9001 -timeout 60000 -threads 5 -maxCharLength 100000 -quiet True -serverProperties corenlp_server-1e11127ab30a47be.props -preload ner

Resolved Text is annotated!


Extracting Annotated Text


100%|██████████| 5/5 [00:00<00:00, 8601.94it/s]



Done Extraction!



Unnamed: 0,Entity,SubjectSpan,NER,Confidence
0,Huawei Technologies Co.,"[113, 136]",ORGANIZATION,(0.99964038055831)
1,29 June 2019,"[253, 265]",DATE,(-1)
2,U.S.,"[267, 271]",COUNTRY,(0.99440728982643)
3,Huawei Technologies Co.,"[472, 495]",ORGANIZATION,(0.9996719946319)
4,600,"[508, 511]",NUMBER,(-1)
5,Huawei Technologies Co.,"[520, 543]",ORGANIZATION,(0.99973423870165)
6,July 2020,"[803, 812]",DATE,(0.95612746477842)
7,Huawei Technologies Co.,"[814, 837]",ORGANIZATION,(0.99953820352637)
8,Samsung,"[856, 863]",ORGANIZATION,(0.98811276661443)
9,Apple,"[868, 873]",ORGANIZATION,(0.83521500573362)
