In [32]:
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Span

In [33]:
nlp = spacy.load('en_core_web_sm')

In [34]:
texts = ['the company has gained a value of $9.4 million as compared to the costs of 2.7$ million last year.', 'The company had a great year and gained twelve billion dollars with a loss of $1.2b.']

In [35]:
docs = nlp.pipe(texts,  disable = 'pareser')

In [36]:
for doc in docs:
    for ent in doc.ents:
        print(ent.text, ent.label_)

$9.4 million MONEY
2.7$ million MONEY
last year DATE
a great year DATE
twelve billion dollars MONEY
1.2b MONEY


## Hashtag and Emoji Detection

In [40]:
pos_emoji = ["😀", "😃", "😂", "🤣", "😊", "😍"]  # Positive emoji
neg_emoji = ["😞", "😠", "😩", "😢", "😭", "😒"]  # Negative emoji

In [41]:
pos = [[{'ORTH': emoji}] for emoji in pos_emoji]
neg = [[{'ORTH': emoji}] for emoji in neg_emoji]

In [42]:
pos

[[{'ORTH': '😀'}],
 [{'ORTH': '😃'}],
 [{'ORTH': '😂'}],
 [{'ORTH': '🤣'}],
 [{'ORTH': '😊'}],
 [{'ORTH': '😍'}]]

In [38]:
matcher = Matcher(nlp.vocab)

In [58]:
def label_sentiment(matcher, doc, i, matches):
    match_id, start, end = matches[i]
    if doc.vocab.strings[match_id] == 'happy':
        doc.sentiment += 0.1
    elif doc.vocab.strings[match_id] == 'sad':
        doc.sentiment -= 0.1

In [59]:
matcher.add('happy', pos, on_match=label_sentiment)

In [60]:
matcher.add('sad', neg, on_match=label_sentiment)

In [65]:
matcher.add('HASHTAG', [[{'ORTH': '#'}, {'IS_ASCII': True}]])

In [66]:
doc = nlp('Hello guys 😀😂 #kgptalkie')

In [67]:
matches = matcher(doc)

In [70]:
for match_id, start, end in matches:
    string_id = nlp.vocab.strings[match_id]
    span = doc[start:end]
    print(string_id, span)

happy 😀
happy 😂
HASHTAG #kgptalkie
