# Introduction

The notebook is intended to experiment with different technologies for the task of **Named Entity Recognition**.

# Spacy

In [7]:
# Import Standard Libraries
import spacy

In [None]:
# Download the model
!python -m spacy download en_core_web_smß

In [8]:
# Load the model
model = spacy.load('en_core_web_sm')

In [9]:
# Define input
input_text = ("Given the recent downturn in stocks especially in tech which is likely to persist as yields keep going up, "
              "I thought it would be prudent to share the risks of investing in ARK ETFs, written up very nicely by "
              "[The Bear Cave](https://thebearcave.substack.com/p/special-edition-will-ark-invest-blow). The risks comes "
              "primarily from ARK's illiquid and very large holdings in small cap companies. ARK is forced to sell its "
              "holdings whenever its liquid ETF gets hit with outflows as is especially the case in market downturns. "
              "This could force very painful liquidations at unfavorable prices and the ensuing crash goes into a "
              "positive feedback loop leading into a death spiral enticing even more outflows and predatory shorts.")

In [10]:
# Feed the input
document = model(input_text)

In [11]:
# Render the document visualisation
spacy.displacy.render(document, 
                      style='ent') # Show "Entities"

In [13]:
# Show explanation of entities
spacy.explain('ORG')

'Companies, agencies, institutions, etc.'

In [24]:
# Retrieve entities
entity_types = [entity.label_ for entity in document.ents]
entities = [entity.text for entity in document.ents]
org_entities = [entity.text for entity in document.ents if entity.label_ == 'ORG']

print(f'Entity Types: {entity_types}')
print(f'Entities: {entities}')
print(f'ORG Entities: {org_entities}')

Entity Types: ['ORG', 'MONEY', 'DATE']
Entities: ['Apple', '143 dollars', 'this January']
ORG Entities: ['Apple']
