In [73]:
# set up libraries
import requests
import json
import codecs
from collections import Counter
# external libraries
# check if additional installation needed
import plotly.graph_objects as go
import ipywidgets as widgets
from ipywidgets import interact, interact_manual, Layout


In [74]:
# setup global
url = "http://nlp.ailab.lv/api/nlp"
headers = {'content-type': 'application/json'}
text = []
results = {}

In [75]:
def getNLP(url = "http://nlp.ailab.lv/api/nlp", data = \
           { "data" : { "text":"change me"},\
            "steps":  ["tokenizer", "morpho", "parser", "ner"],\
            "model": "default",\
            "config": None},\
           headers = {'content-type': 'application/json'}):
    """Give a Python object and returns a Python object decoded from JSON"""
#     print(json.dumps(data))
    response = requests.post(url, json.dumps(data), headers=headers)
    if (response.status_code != 200):        
        print(f"Bad response code: {response.status_code}")
        return None
    return response.json()

In [76]:
def getData(text, offset=0,limit=None):
    if limit:
        txt = text[offset:offset+limit]
    else:
        txt = text[offset:]
    return { "data" : { "text":txt},\
            "steps":  ["tokenizer", "morpho", "parser", "ner"],\
            "model": "default",\
            "config": None}

In [77]:
def filterForms(r=results, upos='NOUN', form='lemma'):
    words=[]
    for sentence in r['data']['sentences']:
        for token in sentence['tokens']:
            if token['upos'] == upos:
                words.append(token[form])
    return words

In [78]:
getData(text, limit=50)

{'data': {'text': []},
 'steps': ['tokenizer', 'morpho', 'parser', 'ner'],
 'model': 'default',
 'config': None}

In [51]:
# https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html#File-Upload
uploader = widgets.FileUpload(
    description = 'Upload .txt file to analyze',
    layout=Layout(width='220px'),
    accept='.txt',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)
output = widgets.Output()
display(uploader, output)
# [uploaded_file] = uploader.value
# print(f"You've uploaded {uploaded_file}")
# display(int_range, output2)

def on_value_change(change):
    with output:
        global text  #ugly but what can I do
        value = change['new']
        [uploaded_file] = value
        text = codecs.decode(value[uploaded_file]['content'], encoding="utf-8")
        tlen = len(text)
        bsize = value[uploaded_file]['metadata']['size']
        print(f"You uploaded {uploaded_file}  raw size: {bsize} text length: {tlen}")

uploader.observe(on_value_change, names='value')

FileUpload(value={}, accept='.txt', description='Upload .txt file to analyze', layout=Layout(width='220px'))

Output()

In [79]:
len(text)

258693

In [80]:
def getResults(text, offset, limit):
    global results
    results = getNLP(data=getData(text, offset, limit))
    print(f"Got results of type {type(results)}")

In [81]:
@interact_manual
def manResults(offset=(0,100,1), limit=(10000,50000, 500)):
    getResults(text, offset, limit)

interactive(children=(IntSlider(value=50, description='offset'), IntSlider(value=30000, description='limit', m…

In [83]:
@interact_manual
def showForms(start=(0,20,40), end=20, form=['lemma','form'], upos=['NOUN','VERB','ADJ','ADV']):
    words = filterForms(form=form, upos=upos)
    cnt = Counter(words)
    x = [el[0] for el in cnt.most_common()[start:end]]
    y = [el[1] for el in cnt.most_common()[start:end]]
    fig = go.Figure(
        data=[go.Bar(x=x, y=y)],
        layout_title_text="Popular lemma"
    )
    fig.show()

interactive(children=(IntSlider(value=0, description='start', max=20, step=40), IntSlider(value=20, descriptio…