# Pipeline
This example demonstrates how to create a NLP pipeline.

In [1]:
import nltk
import threading
import queue
import feedparser
import uuid

In [2]:
threads = []
queues = [queue.Queue(), queue.Queue()]

In [25]:
def extract_words():
    url = 'https://www.straitstimes.com/news/singapore/rss.xml'
    feed = feedparser.parse(url)
    for entry in feed['entries']:
        text = entry['title']
        if 'ex' in text:
            continue
        words = nltk.word_tokenize(text)
        data = {'uuid': uuid.uuid4(), 'input': words}
        queues[0].put(data, True)
        print('>> {}: {}'.format(data['uuid'], text))

In [11]:
def extract_pos():
    while True:
        if queues[0].empty():
            break
        else:
            data = queues[0].get()
            words = data['input']
            postags = nltk.pos_tag(words)
            queues[0].task_done()
            queues[1].put({'uuid': data['uuid'], 
                          'input': postags}, True)

In [26]:
def extract_ne():
    while True:
        if queues[1].empty():
            break
        else:
            data = queues[1].get()
            postags = data['input']
            queues[1].task_done()
            chunks = nltk.ne_chunk(postags, binary=False)
            print('<< {}:'.format(data['uuid']), end='')
            for path in chunks:
                try:
                    label = path.label()
                    print(path, end=', ')
                except:
                    pass
            print()

In [27]:
def run_program():
    e = threading.Thread(target=extract_words())
    e.start()
    threads.append(e)
    
    p = threading.Thread(target=extract_pos())
    p.start()
    threads.append(p)
    
    n = threading.Thread(target=extract_ne())
    n.start()
    threads.append(n)
    
    queues[0].join()
    queues[1].join()
    
    for t in threads:
        t.join()

In [28]:
run_program()

>> c02108ae-434d-453c-9968-2a88669a0347: Man allegedly cheated 80 people of nearly $100k in haj tour scam
>> 28d7ed28-62d3-4de0-a089-dd37bb87123d: Travellers to Singapore to wear electronic tracking device while serving Covid-19 stay-home notice outside of facilities
>> 8836ce50-1024-4a63-8520-547882d7c30e: Morning Briefing: Top stories from The Straits Times on Aug 3
>> 7e297ecf-cc82-4c4d-8e93-0e908161b94c: NDP 2020: State flag to be flown across Singapore via two routes
>> 7aa88383-0434-4964-ac12-3986506cbae3: UK court gives nod for contactless card patent application to proceed
>> eff5c8f3-49dd-4a21-a244-23d0ea84ed13: Coronavirus: It's a struggle for survival for many small firms
>> 816cc541-2a7a-421e-92f9-7eb6943f6c60: NDP 2020: Red Lions take on new challenge with jump into the heartland
>> 946873e1-800c-4b22-98bc-bb228c5c9fd7: Panel's call to protect works generated by computers
>> 8e5a2251-58ab-41b0-9165-7a6173c94290: PR linked to imported case is sole new community infection
>>