In [2]:
import flair
from flair.data import Sentence

In [3]:
sentence = Sentence("An agent in Arizona quit last year out of frustration.")

In [4]:
print(sentence.to_dict(tag_type='ner'))

{'text': 'An agent in Arizona quit last year out of frustration.', 'labels': [], 'entities': []}


In [5]:
from flair.models import SequenceTagger

In [7]:
tagger = SequenceTagger.load('ner')
tagger.predict(sentence)
print(sentence.to_tagged_string())

2019-09-15 19:16:39,379 https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/models-v0.4/NER-conll03-english/en-ner-conll03-v0.4.pt not found in cache, downloading to /var/folders/p8/4jgjw2316zg447bmtn1phdrw0000gn/T/tmpdd0cycib


100%|██████████| 432197603/432197603 [00:58<00:00, 7442923.42B/s]

2019-09-15 19:17:38,128 copying /var/folders/p8/4jgjw2316zg447bmtn1phdrw0000gn/T/tmpdd0cycib to cache at /Users/Matthias/.flair/models/en-ner-conll03-v0.4.pt





2019-09-15 19:17:41,617 removing temp file /var/folders/p8/4jgjw2316zg447bmtn1phdrw0000gn/T/tmpdd0cycib
2019-09-15 19:17:41,678 loading file /Users/Matthias/.flair/models/en-ner-conll03-v0.4.pt
An agent in Arizona <S-LOC> quit last year out of frustration.


In [8]:
print(sentence.to_dict(tag_type='ner'))

{'text': 'An agent in Arizona quit last year out of frustration.', 'labels': [], 'entities': [{'text': 'Arizona', 'start_pos': 12, 'end_pos': 19, 'type': 'LOC', 'confidence': 0.9999498128890991}]}


In [9]:
#use a better model.
#ner - named entity recognition (4 classes, slow)
#ner-ontonotes-fast - ner with 18 classes, fast

In [11]:
tagger.fast = SequenceTagger.load('ner-ontonotes-fast')
tagger.fast.predict(sentence)
print(sentence.to_tagged_string())

2019-09-15 19:32:08,898 https://s3.eu-central-1.amazonaws.com/alan-nlp/resources/models-v0.4/release-ner-ontonotes-fast-0/en-ner-ontonotes-fast-v0.4.pt not found in cache, downloading to /var/folders/p8/4jgjw2316zg447bmtn1phdrw0000gn/T/tmpqu2saah0



  0%|          | 0/1331379415 [00:00<?, ?B/s][A
  0%|          | 17408/1331379415 [00:00<2:37:16, 141082.53B/s][A
  0%|          | 52224/1331379415 [00:00<2:15:45, 163445.68B/s][A
  0%|          | 121856/1331379415 [00:00<1:47:44, 205928.96B/s][A
  0%|          | 261120/1331379415 [00:00<1:21:48, 271196.18B/s][A
  0%|          | 522240/1331379415 [00:00<1:01:30, 360578.14B/s][A
  0%|          | 783360/1331379415 [00:00<46:37, 475617.11B/s]  [A
  0%|          | 1583104/1331379415 [00:00<33:40, 658280.29B/s][A
  0%|          | 2576384/1331379415 [00:01<24:28, 905009.85B/s][A
  0%|          | 3416064/1331379415 [00:01<17:54, 1235752.11B/s][A
  0%|          | 3951616/1331379415 [00:01<13:46, 1605736.32B/s][A
  0%|          | 4694016/1331379415 [00:01<10:35, 2087375.26B/s][A
  0%|          | 5368832/1331379415 [00:01<08:23, 2632911.32B/s][A
  0%|          | 6053888/1331379415 [00:01<06:51, 3219045.99B/s][A
  1%|          | 6726656/1331379415 [00:01<05:47, 3809758.99B/s][A
  

2019-09-15 19:35:27,490 copying /var/folders/p8/4jgjw2316zg447bmtn1phdrw0000gn/T/tmpqu2saah0 to cache at /Users/Matthias/.flair/models/en-ner-ontonotes-fast-v0.4.pt
2019-09-15 19:35:32,013 removing temp file /var/folders/p8/4jgjw2316zg447bmtn1phdrw0000gn/T/tmpqu2saah0
2019-09-15 19:35:32,148 loading file /Users/Matthias/.flair/models/en-ner-ontonotes-fast-v0.4.pt
An agent in Arizona <S-GPE> quit last <B-DATE> year <E-DATE> out of frustration.


In [13]:
print(sentence.to_dict(tag_type='ner-ontonotes-fast'))

{'text': 'An agent in Arizona quit last year out of frustration.', 'labels': [], 'entities': []}


In [14]:
print(sentence.to_dict(tag_type='ner-ontonotes-fast'))

{'text': 'An agent in Arizona quit last year out of frustration.', 'labels': [], 'entities': []}


In [15]:
print(sentence.to_tagged_string())

An agent in Arizona <S-GPE> quit last <B-DATE> year <E-DATE> out of frustration.


In [16]:
print(sentence.to_dict(tag_type='ner'))

{'text': 'An agent in Arizona quit last year out of frustration.', 'labels': [], 'entities': [{'text': 'Arizona', 'start_pos': 12, 'end_pos': 19, 'type': 'GPE', 'confidence': 0.9928970336914062}, {'text': 'last year', 'start_pos': 25, 'end_pos': 34, 'type': 'DATE', 'confidence': 0.8451942503452301}]}


In [17]:
#Events can potentially be identified within a dataset of relations between entities
#These relations would be taken from parsing algorithms and loaded into a database
#Some entries in this database might be nonsense or less useful but others will say things like
#'drones struck Saudi oil fields'

#Naive approach:
#person/place/entity - active verb/predicate - person/place/entity

In [21]:
import nltk
from nltk import parse

In [28]:
nltk.parse

<module 'nltk.parse' from '/Users/Matthias/anaconda3/lib/python3.6/site-packages/nltk/parse/__init__.py'>