- https://stackabuse.com/python-for-nlp-introduction-to-the-pattern-library/

In [41]:
# !pip install pattern

or

In [42]:
# !conda install -c asmeurer pattern

# Pattern Library Functions for NLP

## Tokenizing, POS Tagging, and Chunking

In [43]:
from pattern.en import parse
from pattern.en import pprint

pprint(parse('I drove my car to the hospital yesterday', relations=True, lemmata=True))

          WORD   TAG    CHUNK   ROLE   ID     PNP    LEMMA       
                                                                 
             I   PRP    NP      SBJ    1      -      i           
         drove   VBD    VP      -      1      -      drive       
            my   PRP$   NP      OBJ    1      -      my          
           car   NN     NP ^    OBJ    1      -      car         
            to   TO     -       -      -      -      to          
           the   DT     NP      -      -      -      the         
      hospital   NN     NP ^    -      -      -      hospital    
     yesterday   NN     NP ^    -      -      -      yesterday   


In [44]:
from pattern.en import parse
from pattern.en import pprint

print(parse('I drove my car to the hospital yesterday', relations=True, lemmata=True).split())

[[['I', 'PRP', 'B-NP', 'O', 'NP-SBJ-1', 'i'], ['drove', 'VBD', 'B-VP', 'O', 'VP-1', 'drive'], ['my', 'PRP$', 'B-NP', 'O', 'NP-OBJ-1', 'my'], ['car', 'NN', 'I-NP', 'O', 'NP-OBJ-1', 'car'], ['to', 'TO', 'O', 'O', 'O', 'to'], ['the', 'DT', 'B-NP', 'O', 'O', 'the'], ['hospital', 'NN', 'I-NP', 'O', 'O', 'hospital'], ['yesterday', 'NN', 'I-NP', 'O', 'O', 'yesterday']]]


## Pluralizing and Singularizing the Tokens

In [45]:
from pattern.en import pluralize, singularize

print(pluralize('leaf'))
print(singularize('theives'))
print(singularize('dogs'))

leaves
theife
dog


## Converting Adjective to Comparative and Superlative Degrees

In [46]:
from pattern.en import comparative, superlative

print(comparative('good'))
print(superlative('good'))

better
best


## Finding N-Grams

In [47]:
from pattern.en import ngrams

print(ngrams("He goes to hospital", n=2))

[('He', 'goes'), ('goes', 'to'), ('to', 'hospital')]


In [48]:
from pattern.en import sentiment

print(sentiment("This is an excellent movie to watch. I really love it"))

(0.75, 0.8)


## Checking if a Statement is a Fact

In [49]:
from pattern.en import parse, Sentence
from pattern.en import modality

text = "Paris is the capital of France"
sent = parse(text, lemmata=True)
sent = Sentence(sent)

print(modality(sent))

1.0


In [50]:
text = "I think we can complete this task"
sent = parse(text, lemmata=True)
sent = Sentence(sent)

print(modality(sent))

0.25


## Spelling Corrections

In [51]:
from pattern.en import suggest

print(suggest("Whitle"))

[('While', 0.6459209419680404), ('White', 0.2968881412952061), ('Title', 0.03280067283431455), ('Whistle', 0.023549201009251473), ('Chile', 0.0008410428931875525)]


In [52]:
from pattern.en import suggest
print(suggest("Fracture"))

[('Fracture', 1.0)]


## Working with Numbers

In [53]:
from pattern.en import number, numerals

print(number("one hundred and twenty two"))
print(numerals(256.390, round=2))

122
two hundred and fifty-six point thirty-nine


In [54]:
from pattern.en import quantify

print(quantify(['apple', 'apple', 'apple', 'banana', 'banana', 'banana', 'mango', 'mango']))

several bananas, several apples and a pair of mangoes


In [55]:
from pattern.en import quantify

print(quantify({'strawberry': 200, 'peach': 15}))
print(quantify('orange', amount=1200))

hundreds of strawberries and a number of peaches
thousands of oranges


# Pattern Library Functions for Data Mining

## Accessing Web Pages

In [56]:
from pattern.web import download

page_html = download('https://en.wikipedia.org/wiki/Artificial_intelligence', unicode=True)

In [57]:
from pattern.web import URL, extension

page_url = URL('https://upload.wikimedia.org/wikipedia/commons/f/f1/RougeOr_football.jpg')
file = open('football' + extension(page_url.page), 'wb')
file.write(page_url.download())
file.close()

## Finding URLs within Text

In [58]:
from pattern.web import find_urls

print(find_urls('To search anything, go to www.google.com and also try to look https://stackabuse.com/python-for-nlp-introduction-to-the-pattern-library/', unique=True))

['https://stackabuse.com/python-for-nlp-introduction-to-the-pattern-library/', 'www.google.com']


## Making Asynchronous Requests for Webpages

In [59]:
from pattern.web import asynchronous, time, Google

asyn_req = asynchronous(Google().search, 'artificial intelligence', timeout=4)
while not asyn_req.done:
    time.sleep(0.1)
    print('searching...')

print(asyn_req.value)

print(find_urls(asyn_req.value, unique=True))

searching...
[Result({'url': 'https://en.wikipedia.org/wiki/Artificial_intelligence', 'title': 'Artificial intelligence - Wikipedia', 'text': '<b>Artificial intelligence</b> (<b>AI</b>), sometimes called machine intelligence, is intelligence <br>\ndemonstrated by machines, unlike the natural intelligence displayed by humans<br>\n&nbsp;...'}), Result({'url': 'https://www.investopedia.com/terms/a/artificial-intelligence-ai.asp', 'title': 'Artificial Intelligence (AI) Definition', 'text': '... <b>Artificial intelligence</b> refers to the simulation of human intelligence in machines <br>\nthat are programmed to think and act like humans.', 'date': 'Mar 13, 2020'}), Result({'url': 'https://www.sas.com/en_us/insights/analytics/what-is-artificial-intelligence.html', 'title': 'Artificial Intelligence – What it is and why it matters | SAS', 'text': '<b>Artificial intelligence</b> (<b>AI</b>) makes it possible for machines to learn from experience, <br>\nadjust to new inputs and perform human-li

In [60]:
from pattern.web import Google

google = Google(license=None)
for search_result in google.search('artificial intelligence'):
    print(search_result.url)
    print(search_result.text)

https://en.wikipedia.org/wiki/Artificial_intelligence
<b>Artificial intelligence</b> (<b>AI</b>), sometimes called machine intelligence, is intelligence <br>
demonstrated by machines, unlike the natural intelligence displayed by humans<br>
&nbsp;...
https://www.investopedia.com/terms/a/artificial-intelligence-ai.asp
... <b>Artificial intelligence</b> refers to the simulation of human intelligence in machines <br>
that are programmed to think and act like humans.
https://www.sas.com/en_us/insights/analytics/what-is-artificial-intelligence.html
<b>Artificial intelligence</b> (<b>AI</b>) makes it possible for machines to learn from experience, <br>
adjust to new inputs and perform human-like tasks. Most <b>AI</b> examples that you&nbsp;...
https://futureoflife.org/background/benefits-risks-of-artificial-intelligence/
From SIRI to self-driving cars, <b>artificial intelligence</b> (<b>AI</b>) is progressing rapidly. <br>
While science fiction often portrays <b>AI</b> as robots with human-li

In [61]:
from pattern.web import Twitter

twitter = Twitter()
index = None
for j in range(3):
    for tweet in twitter.search('artificial intelligence', start=index, count=3):
        print(tweet.text)
        index = tweet.id

RT @IrisID: FBI to add iris recognition technology to its portfolio of identification technology starting in October.
https://t.co/FPR3egzYFG via @FederalNewsNet #FBI #irisrecognition #identification https://t.co/eq77VFSvtv
RT @umarsaif: فرعون ...

Artificial Intelligence reconstruction of what Pharaoh Ramses II may have looked like (1,303 - 1,213 BC) https://t.co/zbbNrEgayX
@WSJ Let’s also add Nvidia- founded by Taiwan born visionary and leading the artificial intelligence revolution. 
I am an immigrant too..I know I would have picked a different country too (or stayed home) if I viewed US to be run by a racist.
RT @WIREDBusiness: Combining the two chipmakers would unite leaders in two big tech trends—artificial intelligence and mobile computing. https://t.co/9dJ3FVMRUV


## Converting HTML Data to Plain Text

In [62]:
from pattern.web import URL, plaintext

html_content = URL('https://stackabuse.com/python-for-nlp-introduction-to-the-textblob-library/').download()
cleaned_page = plaintext(html_content.decode('utf-8'))
print(cleaned_page)

Python for NLP: Introduction to the TextBlob Library

Toggle navigation Stack Abuse

* JavaScript
* Python
* Java
* Jobs

Python for NLP: Introduction to the TextBlob Library

By

Usman Malik

•0 Comments

Introduction

This is the seventh article in my series of articles on Python for NLP. In my previous article, I explained how to perform topic modeling using Latent Dirichlet Allocation and Non-Negative Matrix factorization. We used the Scikit-Learn library to perform topic modeling.

In this article, we will explore TextBlob, which is another extremely powerful NLP library for Python. TextBlob is built upon NLTK and provides an easy to use interface to the NLTK library. We will see how TextBlob can be used to perform a variety of NLP tasks ranging from parts-of-speech tagging to sentiment analysis, and language translation to text classification.

The detailed download instructions for the library can be found at the official link. I would suggest that you install the TextBlob libra

## Parsing PDF Documments

In [64]:
from pattern.web import URL, PDF
import locale
    
locale.getlocale()

('en_US', 'UTF-8')

In [65]:
pdf_doc = URL('http://demo.clab.cs.cmu.edu/NLP/syllabus_f18.pdf').download()
pdf_doc

b'%PDF-1.5\n%\n37 0 obj\n<<\n/Length 2177      \n/Filter /FlateDecode\n>>\nstream\nx\xda\x9dXKs\xdc\xb8\x11W)\xcb\xa9P$V.~]\xd9\x94&C6\x07\x0c\x19\x04\x03\x1eO\xd3\x8d\x069LiU{\x10\r_]}\x0cM,L\x00\x06l\x1f\x05f_m\xcd\xa8xE\x01>8\x17%;z\xd0\xba]\x1c\xcd\xb9m\xdd\xa8\x1dN6a\x17I("n?\nC:m{\x07zn}\xcd\xbcw-/,i\x17oah-,\x11\x16=a\x1ff\x17F~\x11t{zql\x04,V\x1f6oW(\x188[=qg\x19XG\x05#=\x1f9x^\x169]1\x0e6a\x17A\x11\x15w\x0c\x06y(h/6j,T9&Y\ngy\th9x\x00cI\xc2\xbb%;8=H\x02gGX\x14>(\x14\xdd\x9bRe7\x1a,\x03?i=h\rI?+:\x1f=VWg%\x04Z\x1420\t\xe3\x8e\xa8::\x1c q%\x0e{[\x00r!FK\x11\x02\x13D\x12p\x19|\x1e/k.4=\x0e0\xd6\x86E1\x028D\x1e7M\xc7\xb70\n+ot\x197lvA7e%N6d\x01\\A^IY\n}k}_sHXp+( \x0fm\xd9\xa3]I^b\\\x14\x19K\x1f\x19W\x054$\rvQ\x02f6~\x0b\x02$}a\x1f_>Yq\x02\x10z\x14,\x1e\x1d\x0c\xc8\xbb\x1b\x0ct\x11z\x13\x013{Zs\x01`{\x02B\x12=i:\x10^\x141\x1at\x0b]W\x7f\xce\xa3\xe8\x8e\x8aOX\xce\x9f\x10\x16%7\r\x00\x07h\x15/\x1e=\n]qY-[ju\x7f5\x0b\xd0\x9dokv\\4_b\xed\x87\xbf\xd1\x91q\xca\xa2?>mr\xcb\xadD\x1fi\x15?\

In [68]:
PDF(pdf_doc.decode('UTF-8'))

PDFError: embedded null byte

## Clearing the Cache

In [69]:
from pattern.web import cache

cache.clear()