In [1]:
from ipywidgets import Layout

from main import main
import pandas as pd

SUBJECT = "france"  # the subject of data to fetch
NUMBER = 100  # the number of data to fetch from each source
BUILD_CORPUS = True  # whether to build the corpus or load it from a file

google = main(SUBJECT, NUMBER, BUILD_CORPUS)

In [2]:
from ipywidgets import HTML
import ipywidgets as widgets
from IPython.display import display

label = widgets.Label(value="Search for a term in the corpus")

search_box = widgets.Text(
    value='health',
    placeholder='Type something',
    description='Search:',
    disabled=False,
)

slider = widgets.IntSlider(
    value=10,
    min=0,
    max=20,
    step=1,
    description='Search results:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    style={'description_width': 'initial'}
)

button = widgets.Button(description="Search")

search_strength = widgets.IntSlider(
    value=1,
    min=1,
    max=3,
    step=1,
    description='Search strength:',
    disabled=False,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='d',
    style={'description_width': 'initial'}
)

enabled_sources_label = widgets.Label(value="Enabled sources")

source_checkbox_list = []

for source in google.get_distinct_sources_list():
    source_checkbox_list.append(widgets.Checkbox(value=True, description=source, layout=Layout(width='150px')))

k_label = widgets.Label(value="keyword importance (only for strength 3)")

k = widgets.FloatSlider(
    value=1.5,
    min=0,
    max=2,
    step=0.1,
    description='k:',
    disabled=True,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
    style={'description_width': 'initial'}
)

b_label = widgets.Label(value="prefer shorter documents (only for strength 3)")

b = widgets.FloatSlider(
    value=0.65,
    min=0,
    max=1,
    step=0.05,
    description='b:',
    disabled=True,
    continuous_update=False,
    orientation='horizontal',
    readout=True,
    readout_format='.2f',
    style={'description_width': 'initial'}
)

simple_output = widgets.Checkbox(
    value=False,
    description='Simple Output',
    disabled=False
)

output = widgets.Output()


def update_sliders(*args):
    if search_strength.value < 3:
        k.disabled = True
        b.disabled = True
    else:
        k.disabled = False
        b.disabled = False


search_strength.observe(update_sliders, 'value')


def on_button_clicked(btn):
    with output:
        output.clear_output()
        enabled_sources_list = [src.description for src in source_checkbox_list if src.value]
        if search_strength.value == 1:
            search_results = google.basic_search(search_box.value, enabled_sources_list)
        elif search_strength.value == 2:
            search_results = google.advanced_search(search_box.value, enabled_sources_list)
        else:
            search_results = google.bm25_search(search_box.value, k.value, b.value, enabled_sources_list)

        if search_results.empty:
            display("No results found")

        with pd.option_context('display.max_colwidth', None):
            if slider.value == 0:
                if simple_output.value:
                    display(search_results[["Body", "Score"]])
                else:
                    display(search_results)
            else:
                if simple_output.value:
                    display(search_results.head(slider.value)[["Body", "Score"]])
                else:
                    display(search_results.head(slider.value))


button.on_click(on_button_clicked)

display(
    widgets.VBox(
        [label,
         search_box,
         enabled_sources_label,
         widgets.VBox(source_checkbox_list),
         slider,
         search_strength,
         k_label,
         k,
         b_label,
         b,
         simple_output,
         button, output]))

VBox(children=(Label(value='Search for a term in the corpus'), Text(value='health', description='Search:', pla…