In [None]:
# !pip3 install ipywidgets
# !jupyter nbextension enable --py widgetsnbextension
# !jupyter lab clean
# !jupyter labextension install @jupyter-widgets/jupyterlab-manager

In [1]:
from __future__ import print_function
from ipywidgets import interact, Box, HBox, VBox
import ipywidgets as widgets
from IPython.display import display

import re
from dataclasses import dataclass
from typing import List

## Debounce

Decorator to debounce. E.g. `@debounce(0.25)`

References:
- https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20Events.html?highlight=throttle#Debouncing

In [2]:
import asyncio

class Timer:
    def __init__(self, timeout, callback):
        self._timeout = timeout
        self._callback = callback
        self._task = asyncio.ensure_future(self._job())

    async def _job(self):
        await asyncio.sleep(self._timeout)
        self._callback()

    def cancel(self):
        self._task.cancel()

def debounce(wait):
    """ Decorator that will postpone a function's
        execution until after `wait` seconds
        have elapsed since the last time it was invoked. """
    def decorator(fn):
        timer = None
        def debounced(*args, **kwargs):
            nonlocal timer
            def call_it():
                fn(*args, **kwargs)
            if timer is not None:
                timer.cancel()
            timer = Timer(wait, call_it)
        return debounced
    return decorator

In [3]:
@dataclass
class Annotation:
    text: str
    start: int
    end: int
        
@dataclass
class Sentence:
    text: str
    annotations: List[Annotation]

In [4]:
def f(x):
    return x

output = widgets.Output()
with output:
    interact(f, x=10)

In [18]:
# Install a pip package in the current Jupyter kernel
import sys
!{sys.executable} -m pip install BeautifulSoup4



In [19]:
url = 'https://www.fullstackacademy.com/blog/nine-best-programming-languages-to-learn'

In [6]:
import nltk
import requests
from collections import Counter
from bs4 import BeautifulSoup

page = requests.get(url)
# Deal with weird characters.
page.encoding = page.apparent_encoding
if page.status_code != 200:
    raise Exception("unable to fetch page")

In [7]:
soup = BeautifulSoup(page.text, 'html.parser')
text = soup.get_text()
len(text)

13568

In [45]:
tokens = [word for sent in nltk.sent_tokenize(text.lower()) 
             for word in nltk.word_tokenize(sent)]
stopwords = set(nltk.corpus.stopwords.words('english'))
tokens = [token for token in tokens if token not in stopwords and len(token) > 1]
tokens = Counter(tokens)
tokens.most_common(10)

[('language', 28),
 ('programming', 25),
 ('development', 19),
 ('languages', 13),
 ('fullstack', 13),
 ('web', 12),
 ('javascript', 11),
 ('learn', 10),
 ('rust', 10),
 ('python', 9)]

In [46]:
def clear_newlines_and_whitespaces(s):
    return re.sub(r'\s+', ' ', s).strip()

In [48]:
sentences = nltk.sent_tokenize(text)
sentences = list(map(clear_newlines_and_whitespaces, sentences))
len(sentences), sentences[0]

(89,
 'The 9 Best Programming Languages to Learn in 2020 | Fullstack Academy Programs New York Full-time Coding Bootcamp Part-time Flex Coding Bootcamp Grace Hopper Program Web Development Fellowship (WDF) Cybersecurity Bootcamp Chicago Full-time Coding Bootcamp Part-time Flex Coding Bootcamp Grace Hopper Program Online Online Coding Bootcamp Tuition & Dates Summer of Code Campus New York Chicago Online Admissions Prep Intro to Coding Bootcamp Prep Course Admissions Prep Workshop More Events Why Fullstack Student Stories Reviews Hiring Outcomes Job Search Help Curriculum Financing Admissions FAQs About Careers Blog Press Partners For Universities For Employers Apply Closing Soon!')

In [49]:
keywords = dict(tokens.most_common(10)).keys()

In [50]:
tag = 'SKILL'
result: List[Sentence] = [None] * len(sentences)
idx = -1

def to_next():
    global idx
    if idx >= len(sentences) - 1:
        return
    idx = idx + 1

def to_prev():
    global idx
    if idx <= 0:
        return
    idx = idx - 1
    
def reset():
    global idx
    global result
    result = [None] * len(sentences)
    idx = -1

def reject():
    pass

def preprocess(sentence):
    global keywords
    global result
    
    # Create raw regex string to match the boundary, so that searching for "go" won't match "going".
    pattern = r'\b{}\b'.format('|'.join(keywords))
    annotations: List[Annotation] = []
        
    for m in re.finditer(pattern, sentence, flags=re.IGNORECASE|re.MULTILINE|re.UNICODE):
        s, e = m.span()
        annotations.append(Annotation(sentence[s:e], s, e))
    
    display(idx)
    result[idx] = Sentence(sentence, annotations)
    
def update_keywords(new_kw):
    global keywords
    tags = new_kw.split(',')
    tags = map(lambda s: s.strip(), tags)
    tags = filter(lambda s: len(s) > 0, tags)
    tags = list(set(list(tags)))
    keywords = tags

In [51]:
button_prev = widgets.Button(description="Prev")
button_next = widgets.Button(description="Next")
button_reset = widgets.Button(description='Reset')

hbox = HBox([button_prev, button_next, button_reset, button_add])

def handle_prev(e):
    to_prev()
    render()

def handle_next(e):
    to_next()
    render()

def handle_reset(e):
    reset()
    render()

button_prev.on_click(handle_prev)
button_next.on_click(handle_next)
button_reset.on_click(handle_reset)

html = widgets.HTML(value='', placeholder='Enter html', description='')
input_keyword = widgets.Text(value=','.join(keywords),
                             description='Keywords')

@debounce(0.25)
def update_keyword(new_kw):
    update_keywords(new_kw)
    render()
    
interact(update_keyword, new_kw=input_keyword)

display(hbox,
        html)

def render():
    global keywords
    if idx < 0:
        html.value = "Press next to start"
        return
    if idx >= len(sentences) - 1:
        html.value = 'Completed'
        return

    sentence = sentences[idx]
    display(sentence, idx)
    preprocess(sentence)
    
    annotations = result[idx].annotations
    for annotation in annotations:
        sentence = sentence.replace(annotation.text, f'<b style="background: #FFFBCC">{annotation.text}</b>')
    
    html.value = f"Total: {len(sentences)}, Current: {idx+1}"
    html.value += '<br/>'
    html.value += f'{idx+1}. {sentence}'
    html.value += '<br/>'

interactive(children=(Text(value='language,programming,development,languages,fullstack,web,javascript,learn,ru…

HBox(children=(Button(description='Prev', style=ButtonStyle()), Button(description='Next', style=ButtonStyle()…

HTML(value='', placeholder='Enter html')

'The 9 Best Programming Languages to Learn in 2020 | Fullstack Academy       \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPrograms\n\n\n\n\nNew York\n\n\n\nFull-time Coding Bootcamp\n\n\n\n\nPart-time Flex Coding Bootcamp\n\n\n\n\nGrace Hopper Program\n\n\n\n\nWeb Development Fellowship (WDF)\n\n\n\n\nCybersecurity Bootcamp\n\n\n\n\n\nChicago\n\n\n\nFull-time Coding Bootcamp\n\n\n\n\nPart-time Flex Coding Bootcamp\n\n\n\n\nGrace Hopper Program\n\n\n\n\n\nOnline\n\n\n\nOnline Coding Bootcamp\n\n\n\n\n\n\nTuition & Dates\n\n\n\n\nSummer of Code\n\n\n\n\n\n\n\nCampus\n\n\n\n\n\nNew York\n\n\n\n\nChicago\n\n\n\n\nOnline\n\n\n\n\n\n\n\nAdmissions Prep\n\n\n\n\n\nIntro to Coding\n\n\n\n\nBootcamp Prep Course\n\n\n\n\nAdmissions Prep Workshop\n\n\n\n\nMore Events\n\n\n\n\n\n\n\nWhy Fullstack\n\n\n\n\n\nStudent Stories\n\n\n\n\nReviews\n\n\n\n\nHiring Outcomes\n\n\n\n\nJob Search Help\n\n\n\n\nCurriculum\n\n\n\n\nFinancing\n\n\n\n\nAdmissions\n\n\n\n\nFAQs\n\n\n\n\n\n\n\nAbout

'The 9 Best Programming Languages to Learn in 2020 | Fullstack Academy Programs New York Full-time Coding Bootcamp Part-time Flex Coding Bootcamp Grace Hopper Program Web Development Fellowship (WDF) Cybersecurity Bootcamp Chicago Full-time Coding Bootcamp Part-time Flex Coding Bootcamp Grace Hopper Program Online Online Coding Bootcamp Tuition & Dates Summer of Code Campus New York Chicago Online Admissions Prep Intro to Coding Bootcamp Prep Course Admissions Prep Workshop More Events Why Fullstack Student Stories Reviews Hiring Outcomes Job Search Help Curriculum Financing Admissions FAQs About Careers Blog Press Partners For Universities For Employers Apply Closing Soon!'