In [1]:
#!nvidia-smi

In [None]:
!pip install -U spacy
!pip install -U spacy-lookups-data
!python -m spacy download en_core_web_sm
!python -m spacy download en_core_web_md

In [2]:
import spacy
import nltk

In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [5]:
#spacy.prefer_gpu()
nlp = spacy.load("en_core_web_sm")

In [6]:
text = 'Apple is looking for buying a U.K. startup for $1 billion'

In [7]:
doc = nlp(text)

## POS TAG

In [8]:
for token in doc:
    print(f'{token.text:{10}} --> {token.pos_:{10}} --> {token.tag_}')

Apple      --> PROPN      --> NNP
is         --> AUX        --> VBZ
looking    --> VERB       --> VBG
for        --> ADP        --> IN
buying     --> VERB       --> VBG
a          --> DET        --> DT
U.K.       --> PROPN      --> NNP
startup    --> NOUN       --> NN
for        --> ADP        --> IN
$          --> SYM        --> $
1          --> NUM        --> CD
billion    --> NUM        --> CD


In [9]:
from nltk import pos_tag
from nltk.tokenize import word_tokenize

nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /Users/serdar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /Users/serdar/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

In [10]:
word_tokens = word_tokenize(text)
pos = pos_tag(word_tokens)
for token in pos:
    print(f'{token[0]:{15}} --> {token[1]}')

Apple           --> NNP
is              --> VBZ
looking         --> VBG
for             --> IN
buying          --> VBG
a               --> DT
U.K.            --> NNP
startup         --> NN
for             --> IN
$               --> $
1               --> CD
billion         --> CD


In [11]:
for token in doc:
    print(f"""Token -> {token.text:{8}} Lemma -> {token.lemma_:{8}} POS -> {token.pos_:{8}} TAG -> {token.tag_:{8}} Shape -> {token.shape_:{8}} Is_Alpha -> {token.is_alpha:{2}}""")

Token -> Apple    Lemma -> Apple    POS -> PROPN    TAG -> NNP      Shape -> Xxxxx    Is_Alpha ->  1
Token -> is       Lemma -> be       POS -> AUX      TAG -> VBZ      Shape -> xx       Is_Alpha ->  1
Token -> looking  Lemma -> look     POS -> VERB     TAG -> VBG      Shape -> xxxx     Is_Alpha ->  1
Token -> for      Lemma -> for      POS -> ADP      TAG -> IN       Shape -> xxx      Is_Alpha ->  1
Token -> buying   Lemma -> buy      POS -> VERB     TAG -> VBG      Shape -> xxxx     Is_Alpha ->  1
Token -> a        Lemma -> a        POS -> DET      TAG -> DT       Shape -> x        Is_Alpha ->  1
Token -> U.K.     Lemma -> U.K.     POS -> PROPN    TAG -> NNP      Shape -> X.X.     Is_Alpha ->  0
Token -> startup  Lemma -> startup  POS -> NOUN     TAG -> NN       Shape -> xxxx     Is_Alpha ->  1
Token -> for      Lemma -> for      POS -> ADP      TAG -> IN       Shape -> xxx      Is_Alpha ->  1
Token -> $        Lemma -> $        POS -> SYM      TAG -> $        Shape -> $        Is_Al

### Word Frequency

In [36]:
from collections import Counter
complete_text = (
"Gus Proto is a Python developer currently"
" working for a London-based Fintech company. He is"
" interested in learning Natural Language Processing."
" There is a developer conference happening on 21 July"
' 2019 in London. It is titled "Applications of Natural'
' Language Processing". There is a helpline number'
" available at +44-1234567891. Gus is helping organize it."
" He keeps organizing local Python meetups and several"
" internal talks at his workplace. Gus is also presenting"
' a talk. The talk will introduce the reader about "Use'
' cases of Natural Language Processing in Fintech".'
" Apart from his work, he is very passionate about music."
" Gus is learning to play the Piano. He has enrolled"
" himself in the weekend batch of Great Piano Academy."
" Great Piano Academy is situated in Mayfair or the City"
" of London and has world-class piano instructors."
)
complete_doc = nlp(complete_text)

words = [ token.text 
          for token in complete_doc
          if not token.is_stop and not token.is_punct
        ]

print(Counter(words).most_common(5))

[('Gus', 4), ('London', 3), ('Natural', 3), ('Language', 3), ('Processing', 3)]


In [39]:
about_text = (
...     "Gus Proto is a Python developer currently"
...     " working for a London-based Fintech"
...     " company."
... )
about_doc = nlp(about_text)
for token in about_doc:
...     print(
...         f"""
... TOKEN: {str(token)}
... =====
... TAG: {str(token.tag_):10} POS: {token.pos_}
... EXPLANATION: {spacy.explain(token.tag_)}"""
...     )


TOKEN: Gus
=====
TAG: NNP        POS: PROPN
EXPLANATION: noun, proper singular

TOKEN: Proto
=====
TAG: NNP        POS: PROPN
EXPLANATION: noun, proper singular

TOKEN: is
=====
TAG: VBZ        POS: AUX
EXPLANATION: verb, 3rd person singular present

TOKEN: a
=====
TAG: DT         POS: DET
EXPLANATION: determiner

TOKEN: Python
=====
TAG: NNP        POS: PROPN
EXPLANATION: noun, proper singular

TOKEN: developer
=====
TAG: NN         POS: NOUN
EXPLANATION: noun, singular or mass

TOKEN: currently
=====
TAG: RB         POS: ADV
EXPLANATION: adverb

TOKEN: working
=====
TAG: VBG        POS: VERB
EXPLANATION: verb, gerund or present participle

TOKEN: for
=====
TAG: IN         POS: ADP
EXPLANATION: conjunction, subordinating or preposition

TOKEN: a
=====
TAG: DT         POS: DET
EXPLANATION: determiner

TOKEN: London
=====
TAG: NNP        POS: PROPN
EXPLANATION: noun, proper singular

TOKEN: -
=====
TAG: HYPH       POS: PUNCT
EXPLANATION: punctuation mark, hyphen

TOKEN: based
=====
TAG

# NER

In [12]:
for ent in doc.ents:
    print(f'{ent.text:{15}} --> {ent.label_}')

Apple           --> ORG
U.K.            --> GPE
$1 billion      --> MONEY


In [12]:
nltk.download('maxent_ne_chunker')
nltk.download('words')
from nltk import ne_chunk

[nltk_data] Downloading package maxent_ne_chunker to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping chunkers/maxent_ne_chunker.zip.


True

[nltk_data] Downloading package words to /root/nltk_data...
[nltk_data]   Unzipping corpora/words.zip.


True

In [13]:
print(text, '\n')
for chunk in nltk.ne_chunk(pos):
      if hasattr(chunk, 'label'):
        print(chunk.label(), ' '.join(c[0] for c in chunk))

Apple is looking for buying a U.K. startup for $1 billion 

GPE Apple


In [13]:
text2 = 'Apple and banana are healty fruits to consume.'
doc2 = nlp(text2)

In [14]:
for token in doc2:
    print(f'{token.text:{15}} --> {token.pos_}')

Apple           --> NOUN
and             --> CCONJ
banana          --> NOUN
are             --> AUX
healty          --> ADJ
fruits          --> NOUN
to              --> PART
consume         --> VERB
.               --> PUNCT


In [15]:
for ent in doc2.ents:
    print(f'{ent.text:{15}} --> {ent.label_} --> {ent.start_char}, {ent.end_char}')

Apple           --> ORG --> 0, 5


In [16]:
text3 = 'apple and banana are healty fruits to consume.'
doc3 = nlp(text3)

for ent in doc3.ents:
    print(f'{ent.text:{15}} --> {ent.label_}') # Can't find any entity

In [18]:
text3 = 'Did you apply for Shell DS position?'
doc3 = nlp(text3)

for ent in doc3.ents:
    print(f'{ent.text:{15}} --> {ent.label_} --> {ent.start_char}, {ent.end_char}')

Shell           --> ORG --> 18, 23


In [None]:
#!python -m spacy download en_core_web_lg

In [20]:
#lp = spacy.load("en_core_web_lg")
text3 = 'Which one is better to work at Facebook or at Yahoo?'
doc3 = nlp(text3)

for ent in doc3.ents:
    print(f'{ent.text:{15}} --> {ent.label_}')

one             --> CARDINAL
Facebook        --> ORG
Yahoo           --> ORG


# Visualization

https://spacy.io/api/annotation#named-entities

In [None]:
# https://spacy.io/api/annotation#named-entities

In [22]:
from spacy import displacy

In [23]:
doc = nlp('Apple is looking for buying a UK startup for $1 billion tomorrow')
displacy.render(doc, style = 'ent', jupyter=True, options={'distance': 90})

In [24]:
spacy.explain('GPE')

'Countries, cities, states'

In [25]:
spacy.explain('DATE')

'Absolute or relative dates or periods'

In [None]:
about_interest_text = (
...     "He is interested in learning Natural Language Processing."
... )
about_interest_doc = nlp(about_interest_text)
displacy.serve(about_interest_doc, style="dep", port= 5555)


Using the 'dep' visualizer
Serving on http://0.0.0.0:5555 ...



127.0.0.1 - - [19/Jan/2023 18:15:08] "GET / HTTP/1.1" 200 6751
127.0.0.1 - - [19/Jan/2023 18:15:08] "GET /favicon.ico HTTP/1.1" 200 6751


# Sentence Segmentation

In [26]:
text = 'Apple is looking for buying a U.K. startup. Government has given permission.'
doc = nlp(text)

In [27]:
for sent in doc.sents:
    print(sent)

Apple is looking for buying a U.K. startup.
Government has given permission.


In [31]:
about_text = (
    " Gus Proto is a Python developer currently"
    " working for a London-based Fintech"
    " company. He is interested in learning"
    " Natural Language Processing." )

about_doc = nlp(about_text)
sentences = list(about_doc.sents)
for sentence in sentences:
    print(f"{sentence}...")

Gus Proto is a Python developer currently working for a London-based Fintech company....
He is interested in learning Natural Language Processing....


# Regular Expression

In [49]:
import re

In [50]:
txt = "The rain in Spain"
x = re.search("\s", txt)

print("The first white-space character is located in position:", x.start())

The first white-space character is located in position: 3


In [None]:
text = 'my phone number is 123. ohh its wrong one. correct one is 1234567890. call me'

In [None]:
re.findall(r'\d+', text)

['123', '1234567890']

In [None]:
re.findall(r'\w+', text)

['my',
 'phone',
 'number',
 'is',
 '123',
 'ohh',
 'its',
 'wrong',
 'one',
 'correct',
 'one',
 'is',
 '1234567890',
 'call',
 'me']

In [None]:
# Wildcard'

In [None]:
re.findall(r'c...', text)

['corr', 'ct o', 'call']

In [None]:
re.findall(r'c.l', text)

['cal']

In [None]:
re.findall(r'2.+', text)

['23. ohh its wrong one. correct one is 1234567890. call me']

In [None]:
text
print()
re.findall(r'[^\d]+', text)

'my phone number is 123. ohh its wrong one. correct one is 1234567890. call me'




['my phone number is ', '. ohh its wrong one. correct one is ', '. call me']

In [None]:
re.findall(r'[^\D]+', text)

# Processing Pipeline in Spacy

In [28]:
texts = ['net income was $9.4 million compared to the prior year of 2.7$ million',
        'revenue exceeds twelve billion dollars with a loss of $1b']

In [29]:
# %%timeit
docs = nlp.pipe(texts, disable = ['tagger', 'parser'])

for doc in docs:
    for ent in doc.ents:
        print(ent.text, ent.label_)
    print()

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY





In [53]:
%%timeit
docs = nlp.pipe(texts)

for doc in docs:
    for ent in doc.ents:
        print(ent.text, ent.label_)
    print()

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
the prior year DATE
2.7$ million MONEY

twelve billion dollars MONEY
1b MONEY

$9.4 million MONEY
t

# Matcher

In [None]:
# Matcher-1

In [22]:
from spacy.matcher import Matcher

In [23]:
nlp = spacy.load('en_core_web_sm')

In [24]:
matcher = Matcher(nlp.vocab)

In [None]:
# Matcher-1 

In [25]:
# Add pattern
pattern = [[{"TEXT": "iPhone"}, {"TEXT": "X"}]]

matcher.add("IPHONE_PATTERN", pattern)

# Process the text

doc = nlp("Upcoming iPhone X release date leaked")

matches = matcher(doc)

In [26]:
for match_id, start, end in matches: # match_id --> hash value
    matched_span = doc[start:end]
    print(matched_span.text)

iPhone X


In [None]:
# Matcher-2

In [28]:
pattern =[[
           {'IS_DIGIT':True},
           {'LOWER': 'fifa'},
           {'LOWER': 'world'},
           {'LOWER': 'cup'},
           {'IS_PUNCT': True}]]

matcher.add('WC PATTERN', pattern)
doc = nlp('2018 FifA World Cup: France Won')
matches = matcher(doc)

for match_id, start, end in matches: # match_id --> hash value
    matched_span = doc[start:end]
    print(matched_span.text)

2018 FifA World Cup:


In [29]:
# Matcher-3
pattern =[[
           {"LEMMA":'love', 'POS': 'VERB'},
           {"POS": 'NOUN'}]]

matcher.add('LOVE', pattern)
doc = nlp('I loved dogs but now I love cats more')
matches = matcher(doc)

for match_id, start, end in matches: # match_id --> hash value
    matched_span = doc[start:end]
    print(matched_span.text)

loved dogs
love cats


In [62]:
# Matcher-4

pattern =[[
           {"LEMMA":'buy'},
           {'POS': "DET", "OP": "?"}, # optional match 0 or 1 time
           {"POS": 'NOUN'}]]

matcher.add('BUY', pattern)
doc = nlp("I bought a smartphone. Now I'm buying apps.")
matches = matcher(doc)

for match_id, start, end in matches: # match_id --> hash value
    matched_span = doc[start:end]
    print(matched_span.text)

bought a smartphone
buying apps


In [63]:
# Matcher-5

# Add the pattern to the matcher
pattern = [[{"LOWER": "march"}, {"IS_DIGIT": True}, {"IS_PUNCT": True}, {"IS_DIGIT": True}]]
matcher.add("DATE_PATTERN", pattern)
# Process some text
doc = nlp("SpaceX's Starlink 17 mission lifts off on a Falcon 9 rocket from Launch Complex 39A at NASA's Kennedy Space Center in Florida, on March 4, 2021")
# Call the matcher on the doc
matches = matcher(doc)
for match_id, start, end in matches:
 print(doc[start:end])

March 4, 2021


# Similarity

In [None]:
# For similarity we need to use either en_core_web_md or en_core_web_lg

In [64]:
#!python -m spacy download en_core_web_md

In [65]:
# nlp = spacy.load('en_core_web_md')

In [66]:
# Compare 2 docs
doc1 = nlp('I like fast food')
doc2 = nlp('I like pizza')

print(doc1.similarity(doc2))

  """


0.7637875080108643


In [67]:
# Comapre tokens

doc = nlp('I like pizza and pasta')
token1 = doc[2]
token2 = doc[4]

print(token1.similarity(token2))

  import sys


0.17194029688835144


In [31]:
doc = nlp("apple orange")
token1 = doc[0]
token2 = doc[1]

print(token1.similarity(token2))

  """


0.28573909401893616


In [32]:
ohe
vectorizing 3232
word embebding

NameError: ignored

In [68]:
len(doc1.vector), doc1.vector

(96, array([ 0.24498098,  0.43741864,  0.38137007, -0.34508908, -0.1005477 ,
        -0.4678353 ,  0.2666813 ,  0.04684297,  0.990312  ,  0.06843117,
        -0.49650788, -0.5087466 ,  0.06964219, -0.04939258, -0.28219414,
         0.03791597,  0.4888343 , -0.7695538 , -0.82771087, -0.44390124,
        -0.07535602, -0.2071268 ,  0.4735515 ,  0.4895326 ,  0.52134985,
         0.560056  ,  0.3186956 , -0.8008715 ,  0.20402387,  0.08067308,
        -0.41263103,  0.09250343,  0.39594236, -0.15042749,  0.07881028,
        -0.55189675, -0.99339783,  0.27072576,  0.09515302, -0.17678548,
         0.06582572,  0.10700436,  0.62078273, -0.39924288, -0.39958262,
        -0.08453593,  0.37088692,  0.2604139 ,  0.0976055 ,  0.27724546,
        -0.32712182, -0.06978658, -0.19396116,  0.1850253 , -0.59723914,
         0.29401463,  0.9980913 ,  0.03294671,  0.4976545 ,  0.6912219 ,
        -0.19535166, -0.3983915 ,  0.30121094, -0.48357677, -0.59131813,
        -0.8905617 , -0.11542082, -0.2747028 , 

# SPELL CHECKER

In [69]:
!pip install textblob
!pip install autocorrect
!pip install pyspellchecker
!pip install pattern

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting autocorrect
  Downloading autocorrect-2.6.1.tar.gz (622 kB)
[K     |████████████████████████████████| 622 kB 14.9 MB/s 
[?25hBuilding wheels for collected packages: autocorrect
  Building wheel for autocorrect (setup.py) ... [?25l[?25hdone
  Created wheel for autocorrect: filename=autocorrect-2.6.1-py3-none-any.whl size=622382 sha256=8d5f9742a55b27026ccdb7d25acd415299ffa9748b517d979cc32eeef7241256
  Stored in directory: /root/.cache/pip/wheels/54/d4/37/8244101ad50b0f7d9bffd93ce58ed7991ee1753b290923934b
Successfully built autocorrect
Installing collected packages: autocorrect
Successfully installed autocorrect-2.6.1
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyspellchecker
  Downloading pyspellchecker-0.7.0-py3-

In [70]:
from spellchecker import SpellChecker

In [71]:
spell = SpellChecker()

In [72]:
docs = ['calandar', 'lighteinig', 'misspel', 'booq', 'undrstand', 'receive', 'adress']

In [73]:
# Correction
for word in docs:
    print(f'{word:{10}} --> {spell.correction(word):{20}}')

calandar   --> calendar            
lighteinig --> lightning           
misspel    --> misspell            
booq       --> book                
undrstand  --> understand          
receive    --> receive             
adress     --> dress               


In [74]:
# Candidates
for word in docs:
    print(f'{word:{10}} --> {spell.candidates(word)}')

calandar   --> {'calendar'}
lighteinig --> {'lightening', 'lighting', 'lightning'}
misspel    --> {'misspell'}
booq       --> {'boot', 'boom', 'boo', 'book', 'boos', 'boob', 'boor', 'boon', 'boop'}
undrstand  --> {'understand', 'undestand'}
receive    --> {'receive'}
adress     --> {'address', 'dress'}


In [75]:
for word in docs:
    print(f'{word:{10}} --> {spell.candidates(word)}: frequency {spell.word_usage_frequency(word):{10}}')

calandar   --> {'calendar'}: frequency        0.0
lighteinig --> {'lightening', 'lighting', 'lightning'}: frequency        0.0
misspel    --> {'misspell'}: frequency        0.0
booq       --> {'boot', 'boom', 'boo', 'book', 'boos', 'boob', 'boor', 'boon', 'boop'}: frequency        0.0
undrstand  --> {'understand', 'undestand'}: frequency        0.0
receive    --> {'receive'}: frequency 3.159581511326326e-05
adress     --> {'address', 'dress'}: frequency        0.0


In [76]:
# TEXTBLOB

from textblob import TextBlob, Word

In [77]:
ex = TextBlob('He was veri happy in hisn neww locotion.')

In [78]:
for word in ex.words:
    print(word, ":", word.correct())

He : He
was : was
veri : very
happy : happy
in : in
hisn : his
neww : new
locotion : location


In [84]:
nltk.download('omw-1.4')
from pattern.en import suggest

[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [85]:
suggest('locotion')

[('location', 1.0)]

In [86]:
sorted(suggest('Aple'))

[('Able', 0.2022132796780684),
 ('Ale', 0.001006036217303823),
 ('Ample', 0.009054325955734407),
 ('Ape', 0.004024144869215292),
 ('Apple', 0.011066398390342052),
 ('Bile', 0.006036217303822937),
 ('Dale', 0.002012072434607646),
 ('Dole', 0.001006036217303823),
 ('File', 0.022132796780684104),
 ('Gale', 0.006036217303822937),
 ('Hale', 0.001006036217303823),
 ('Hole', 0.018108651911468814),
 ('Idle', 0.019114688128772636),
 ('Isle', 0.0030181086519114686),
 ('Le', 0.05533199195171026),
 ('Male', 0.04024144869215292),
 ('Mele', 0.001006036217303823),
 ('Mile', 0.03219315895372234),
 ('Mlle', 0.001006036217303823),
 ('Mole', 0.015090543259557344),
 ('Mule', 0.002012072434607646),
 ('Nile', 0.0030181086519114686),
 ('Ole', 0.001006036217303823),
 ('Pale', 0.1659959758551308),
 ('Pe', 0.002012072434607646),
 ('Pie', 0.005030181086519115),
 ('Pile', 0.009054325955734407),
 ('Plea', 0.02012072434607646),
 ('Ply', 0.001006036217303823),
 ('Pole', 0.022132796780684104),
 ('Pre', 0.015090543259

# SENTIMENT ANALYSIS

In [87]:
testimonial = TextBlob("The food was great!")
print(testimonial.sentiment)
# Polarity is a float that lies between [-1,1], -1 indicates negative sentiment and +1 indicates positive sentiments. 
# Subjectivity is also a float which lies in the range of [0,1]. Subjective sentences generally refer to personal opinion, emotion, or judgment. 

Sentiment(polarity=1.0, subjectivity=0.75)


In [90]:
!pip install vaderSentiment

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[K     |████████████████████████████████| 125 kB 8.8 MB/s 
Installing collected packages: vaderSentiment
Successfully installed vaderSentiment-3.3.2


In [91]:
# Vader is optimized for social media data and can yield good results when used with data from twitter, facebook, etc.


from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


analyzer = SentimentIntensityAnalyzer()
sentence = "The food was great!" 
vs = analyzer.polarity_scores(sentence)
print("{:-<65} {}".format(sentence, str(vs)))

The food was great!---------------------------------------------- {'neg': 0.0, 'neu': 0.406, 'pos': 0.594, 'compound': 0.6588}


In [93]:
!pip install flair

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flair
  Downloading flair-0.11.3-py3-none-any.whl (401 kB)
[K     |████████████████████████████████| 401 kB 14.0 MB/s 
Collecting transformers>=4.0.0
  Downloading transformers-4.22.2-py3-none-any.whl (4.9 MB)
[K     |████████████████████████████████| 4.9 MB 58.9 MB/s 
[?25hCollecting segtok>=1.5.7
  Downloading segtok-1.5.11-py3-none-any.whl (24 kB)
Collecting huggingface-hub
  Downloading huggingface_hub-0.10.0-py3-none-any.whl (163 kB)
[K     |████████████████████████████████| 163 kB 68.4 MB/s 
[?25hCollecting deprecated>=1.2.4
  Downloading Deprecated-1.2.13-py2.py3-none-any.whl (9.6 kB)
Collecting sqlitedict>=1.6.0
  Downloading sqlitedict-2.0.0.tar.gz (46 kB)
[K     |████████████████████████████████| 46 kB 4.4 MB/s 
[?25hCollecting pptree
  Downloading pptree-3.1.tar.gz (3.0 kB)
Collecting janome
  Downloading Janome-0.4.2-py2.py3-none-any.whl (19.7 MB)
[K     |██

In [94]:
from flair.models import TextClassifier
from flair.data import Sentence

classifier = TextClassifier.load('en-sentiment')
sentence = Sentence('The food was great!')
classifier.predict(sentence)

# print sentence with predicted labels
print('Sentence above is: ', sentence.labels)

2022-09-29 22:14:12,569 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /tmp/tmpb92ia6a9


100%|██████████| 265512723/265512723 [00:08<00:00, 30708964.76B/s]

2022-09-29 22:14:21,295 copying /tmp/tmpb92ia6a9 to cache at /root/.flair/models/sentiment-en-mix-distillbert_4.pt





2022-09-29 22:14:22,065 removing temp file /tmp/tmpb92ia6a9
2022-09-29 22:14:22,197 loading file /root/.flair/models/sentiment-en-mix-distillbert_4.pt


Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/483 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/466k [00:00<?, ?B/s]

Sentence above is:  ['Sentence: "The food was great !"'/'POSITIVE' (0.9961)]
