In [None]:
import os
import json
import ipywidgets as widgets
from ipywidgets import Button, HBox, VBox, ToggleButtons, ToggleButton, Layout

import numpy as np
import pandas as pd
import qgrid

import requests
from urllib.parse import urljoin
from jinja2 import Environment, FileSystemLoader
from IPython.display import display

In [None]:
base_url = 'http://127.0.0.1:8000'

In [None]:
session = requests.Session()

In [None]:
template_loader = FileSystemLoader(searchpath="./templates")
template_env = Environment(loader=template_loader)


In [None]:
all_topics_template = template_env.get_template('all_topics.html')
topic_template = template_env.get_template('show_topic.html')
document_template = template_env.get_template('show_document.html')

In [None]:
def get_number_of_topics(session):
    r = session.get(urljoin(base_url, '/topics/number'))
    return r.json()['num_topics']

In [None]:
num_topics = get_number_of_topics(session)

In [None]:
def get_topics(session, num_topics):
    payload = {'num_topics':num_topics}
    r = session.get(urljoin(base_url, '/topics/get-topics'), params=payload)
    return r.json()

In [None]:
topics = get_topics(session, num_topics)
topics_dict = {t['topic_num']: t['topic_words'] for t in topics}

In [None]:
top_words_in_topic = [(t['topic_num'], ', '.join(t['topic_words'][:10])) for t in topics]

In [None]:
rendered_all_topics = all_topics_template.render(topics=top_words_in_topic)

In [None]:
def search_contains_word(session, word):
    payload = {'keywords': [word], 'keywords_neg': [], 'num_docs': 5}
    r = session.post(urljoin(base_url, '/documents/search-by-keyword'), data=json.dumps(payload))
    return r.json()

In [None]:
initial_topic_num = 0

In [None]:
topic_select_textbox = widgets.BoundedIntText(
                        value=initial_topic_num,
                        min=0,
                        max=num_topics-1,
                        step=1,
                        description='',
                        disabled=False,
                    )

In [None]:
topic_select_slider = widgets.IntSlider(
    value=initial_topic_num,
    min=0,
    max=num_topics-1,
    step=1,
    description='Topic:',
    disabled=False,
    orientation='horizontal',
    readout=False,
    readout_format='d',
)

In [None]:
topic_words_widget = widgets.HTML(
        value = all_topics_template.render(topics=top_words_in_topic)
    )
topic_words_formatted = HBox([topic_words_widget], layout=Layout(height='250px', overflow_y='auto'))

In [None]:
topic_num_link = widgets.link((topic_select_slider, 'value'), (topic_select_textbox, 'value'))

In [None]:
def on_show_all_buttons_check(change):
    if change.new:
        rendered_all_topics = all_topics_template.render(topics=top_words_in_topic)
        topic_words_widget.value = rendered_all_topics
        topic_words_formatted.layout = Layout(height='250px', overflow_y='auto')
    else:
        topic_words_widget.value = ''
        topic_words_formatted.layout.display = 'none'

In [None]:
show_all_topics_check = widgets.Checkbox(value=True, description='Show top words for all topics', indent=False)
show_all_topics_check.observe(on_show_all_buttons_check, 'value')

In [None]:
related_docs_widget = widgets.HTML(
        value = ''
    )

In [None]:
def on_button_group_click(change):
    if not change['owner'].options:
        related_docs_widget.value = ''
    elif (change['new'] == change['owner'].options[0]) and (change['old'] not in change['owner'].options):
        related_docs_widget.value = ''
    else:
        word = change['new']
        related_docs = search_contains_word(session, word)
        rendered_documents = document_template.render(documents=related_docs, character_limit=300, keywords=[word])
        related_docs_widget.value = rendered_documents

In [None]:
word_search_buttons = ToggleButtons(options=[], disabled=False)
word_search_buttons.observe(on_button_group_click, 'value')

In [None]:
if initial_topic_num != num_topics:
    word_search_buttons.options = topics_dict[initial_topic_num]

In [None]:
def handle_slider_change(change):
    topic_num = change.new
    word_search_buttons.options = [] # This is hacky
    word_search_buttons.options = topics_dict[topic_num]

topic_select_slider.observe(handle_slider_change, names='value')

In [None]:
overview = widgets.HTML('<h1>Topic explorer</h1>')
individual_topic_overview = widgets.HTML(
"""<h3>Investigate a specific topic</h3>
Move the slider (or use the textbox) to see all of the words for a specific topic. You can click on a word
to see search for documents containing that word.
""")

In [None]:
top_box = HBox([topic_select_slider, topic_select_textbox])
topic_explorer = VBox([overview, show_all_topics_check, topic_words_formatted, individual_topic_overview, 
                       top_box, word_search_buttons, related_docs_widget])

In [None]:
topic_explorer

In [None]:
def find_related_words(session, keywords):
    related_words = []
    related_words.extend([{'word': w} for w in keywords])
    # Find 3 related words for each word
    word_limit = 3
    for word in keywords:
        payload = {'keywords': [word], 'keywords_neg': [], 'num_words': word_limit}
        r = session.post(urljoin(base_url, '/words/find-similar'), data=json.dumps(payload))
        if r.status_code == 200:
            related_words.extend(r.json())
    
    return list(set([w['word'] for w in related_words]))

In [None]:
def search_docs_semantic_keywords(session, keywords, num_docs=10):
    payload = {'keywords': keywords, 'keywords_neg': [], 'num_docs': num_docs}
    r = session.post(urljoin(base_url, '/documents/search-by-keyword'), data=json.dumps(payload))
    return r.json()

In [None]:
style = {'description_width': 'initial'}

search_box_overview = widgets.HTML(
"""<h3>Search for documents containing specific keywords</h3>
Enter keywords (separated by semicolons) to search for documents containing those words. By default
it will also search for documents containing related words.
""")
search_options_header = widgets.HTML("""<p><u>Search options</u></p>""")

In [None]:
use_semantic_search_check = widgets.Checkbox(value=True, description='Include related words in search', indent=False)

In [None]:
search_results_widget = widgets.HTML(
        value = ''
    )

In [None]:
def run_search(query):
    if query:
        words = [w.strip() for w in query.split(';')]

        if use_semantic_search_check.value:
            words_to_search = find_related_words(session, words)
        else:
            words_to_search = words

        search_results = search_docs_semantic_keywords(session, words_to_search, num_docs=search_doc_limit.value)
        rendered_documents = document_template.render(documents=search_results, character_limit=doc_char_limit.value, 
                                                      keywords=words_to_search)
        search_results_widget.value = rendered_documents
    else:
        search_results_widget.value = ''

In [None]:
def on_search_box_change(change):
    run_search(change['new'])

In [None]:
search_box = widgets.Text(
    value='',
    placeholder='words ; to ; search ; for',
    description='Search for keywords separated by semicolons:',
    disabled=False,
    style=style,
    layout=Layout(width='75%'),
    continuous_update=False
)
search_box.observe(on_search_box_change, names='value')

In [None]:
search_doc_limit = widgets.BoundedIntText(
    value=10,
    min=0,
    max=1000,
    step=1,
    description='Number of documents to return:',
    disabled=False,
    style=style
)

In [None]:
doc_char_limit = widgets.BoundedIntText(
    value=300,
    min=0,
    max=10000,
    step=1,
    description='Show first N characters of document:',
    disabled=False,
    style=style
)

In [None]:
def on_search_button_click(b):
    run_search(search_box.value)

In [None]:
search_button = widgets.Button(
    description='Search',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Search for documents containing keywords',
    icon='check' # (FontAwesome names without the `fa-` prefix)
)
search_button.on_click(on_search_button_click)

In [None]:
search_tools = HBox([search_box, search_button])
search_options = VBox([search_options_header, HBox([search_doc_limit, doc_char_limit, use_semantic_search_check])])

In [None]:
widgets.VBox([search_box_overview, search_tools, search_options, search_results_widget])

In [None]:
# def search_keywords_semantic(session, keywords, num_topics=5):
#     payload = {'keywords': keywords, 'keywords_neg': [], 'num_topics': num_topics}
#     r = session.post(urljoin(base_url, '/topics/search'), data=json.dumps(payload))
#     return r.json()

In [None]:
# buttons = ToggleButtons(options=topics_dict[0])
# # buttons = ToggleButtons(options=[])
# buttons

In [None]:
# def on_button_group_click(change):
#     if change['new']:
#         print(change)
# #         word = change['new']
# #         related_docs = search_contains_word(session, word)
# #         rendered_documents = document_template.render(documents=related_docs, character_limit=300)
# #         related_docs_widget.value = rendered_documents
        
        
# buttons.observe(on_button_group_click, 'value')

In [None]:
# toggle = ToggleButton(description='tommy')
# VBox([toggle, related_docs_widget])

In [None]:
# def on_word_button_click(change):
#     if change['new']:
#         word = change['owner'].description
#         related_docs = search_contains_word(session, word)
#         rendered_documents = document_template.render(documents=related_docs, character_limit=300)
#         related_docs_widget.value = rendered_documents
        
        
# toggle.observe(on_word_button_click, 'value')

In [None]:
# related_docs = search_contains_word(session, 'tommy')
# related_docs

In [None]:
# rendered_documents = document_template.render(documents=related_docs, character_limit=300)

In [None]:
# related_docs_widget = widgets.HTML(
#         value = ''
#     )

# related_docs_widget

In [None]:
# word = 'handguns'
# payload = {'keywords': [word], 'keywords_neg': [], 'num_docs': 5}
# r = session.post(urljoin(base_url, '/documents/search-by-keyword'), data=json.dumps(payload))
# r.status_code

In [None]:
# r.json()

In [None]:
# payload = {'topic_num': 5, 'num_docs': 10}
# results = session.`get(urljoin(base_url, '/documents/search-by-topic'), params=payload)

In [None]:
# results.json()

In [None]:
# payload = {'num_topics':77}
# results = session.get(urljoin(base_url, '/topics/get-topics'), params=payload)
# results.json()