## Text Analysis - Topic Modeling
### <span style='color: green'>SETUP </span> Prepare Notebook and Load Model <span style='float: right; color: red'>MANDATORY</span>

In [12]:
from penelope.notebook.topic_modelling import TopicModelContainer
from penelope.topic_modelling import InferredTopicsData
from penelope.notebook.topic_modelling import topic_trends_overview_gui as ntm

PERSISTED_INFERRED_MODEL_SOURCE_FOLDER: str = '/home/roger/source/inidun/courier-lab/content/data/tm/tm-050-v1.0-lowercase/'


def current_state() -> TopicModelContainer:
    inferred_topics_data = InferredTopicsData.load(folder=PERSISTED_INFERRED_MODEL_SOURCE_FOLDER)
    container: TopicModelContainer = TopicModelContainer().update(
        inferred_topics=inferred_topics_data,
        train_corpus_folder=PERSISTED_INFERRED_MODEL_SOURCE_FOLDER,
        folder=PERSISTED_INFERRED_MODEL_SOURCE_FOLDER,
    )
    return container

state = current_state()

gui: ntm.TopicTrendsOverviewGUI = ntm.TopicTrendsOverviewGUI(state=state).setup()
display(gui.layout())
gui.update_handler()



VBox(children=(HBox(children=(VBox(children=(HTML(value='<b>Years</b> 1947-1957'), IntRangeSlider(value=(1947,…

In [1]:

from typing import Callable

import __paths__  # pylint: disable=unused-import
import bokeh.plotting
import penelope.notebook.topic_modelling as ntm
from IPython.display import display
from penelope.utility import pandas_utils

from notebooks.source.courier import overload_state_on_loaded_handler

bokeh.plotting.output_notebook(hide_banner=True)
pandas_utils.set_default_options()

__paths__.data_folder = "/data/inidun"
__paths__.resources_folder = f"{__paths__.data_folder}/resources"

corpus_folder: str = __paths__.data_folder

current_state: Callable[[], ntm.TopicModelContainer] = ntm.TopicModelContainer.singleton
current_state().register(None, callback=overload_state_on_loaded_handler)

### <span style='color: green'>PREPARE</span> Load Topic Model <span style='float: right; color: red'>MANDATORY</span>

In [None]:
load_gui: ntm.LoadGUI = ntm.LoadGUI(data_folder=corpus_folder, state=current_state()).setup()
display(load_gui.layout())

### <span style='color: green;'>BROWSE</span> Find topics by token<span style='color: red; float: right'>TRY IT</span>

Displays topics in which given token is among toplist of dominant words.

In [None]:
fd_ui = ntm.WithPivotKeysText.FindTopicDocumentsGUI(
    current_state(), vertical=True, year_span=(1990, 1992), width='160px'
).setup()
display(fd_ui.layout())

### <span style='color: green;'>BROWSE</span> Browse Topic Documents<span style='color: red; float: right'>TRY IT</span>

Displays documents in which a topic occurs above a given threshold.

In [None]:
td_ui = ntm.WithPivotKeysText.BrowseTopicDocumentsGUI(
    current_state(), vertical=True, year_span=(1990, 1995), width='400px'
).setup()
display(td_ui.layout())

### <span style='color: green;'>VISUALIZE</span> Display Topic's Word Distribution as a Wordcloud<span style='color: red; float: right'> TRY IT</span>

In [None]:
ntm.display_topic_wordcloud_gui(current_state())

### <span style='color: green;'>VISUALIZE</span> Topic-Word Distribution<span style='color: red; float: right'>TRY IT</span>


In [None]:
ntm.display_topic_word_distribution_gui(current_state())

### <span style='color: green;'>VISUALIZE</span> Topic Trends over Time<span style='color: red; float: right'>RUN</span>

In [None]:
ntm.display_topic_trends_gui(current_state())

### <span style='color: green;'>VISUALIZE</span> Topic Trends Overview<span style='color: red; float: right'>TRY IT</span>

- The topic shares  displayed as a scattered heatmap plot using gradient color based on topic's weight in document.
- [Stanford’s Termite software](http://vis.stanford.edu/papers/termite) uses a similar visualization.

In [None]:
ntm.display_topic_trends_overview_gui(current_state())

### <span style='color: green;'>VISUALIZE</span> Topic Topic Network<span style='color: red; float: right'>TRY IT</span>

Computes weighted graph of topics co-occurring in the same document. Topics are defined as co-occurring in a document if they both have a weight above given threshold. The edge weights are the number of co-occurrences (binary yes or no). Node size reflects topic proportions over the entire corpus computed in accordance to LDAvis topic proportions.

In [None]:
ntm.display_topic_topic_network_gui(current_state())

### <span style='color: green;'>VISUALIZE</span> Document Topic Network<span style='color: red; float: right'>TRY IT</span>


In [None]:
dtdn_ui: ntm.TopicDocumentNetworkGui = ntm.DefaultTopicDocumentNetworkGui(
    state=current_state(), pivot_key_specs=None
).setup()
display(dtdn_ui.layout())

### <span style='color: green;'>VISUALIZE</span> Pivot-Topic Network<span style='color: red; float: right'>TRY IT</span>


In [None]:
ptn_ui: ntm.PivotTopicNetworkGUI = ntm.PivotTopicNetworkGUI(pivot_key_specs=None, state=current_state()).setup()
display(ptn_ui.layout())

### <span style='color: green;'>VISUALIZE</span> Focus-Topic Document Network<span style='color: red; float: right'>TRY IT</span>


In [None]:
ftdn_ui: ntm.TopicDocumentNetworkGui = ntm.FocusTopicDocumentNetworkGui(
    state=current_state(), pivot_key_specs=None
).setup()
display(ftdn_ui.layout())

### <span style='color: green;'>VISUALIZE</span> Topic-Token  Network<span style='color: red; float: right'>TRY IT</span>

In [None]:
custom_styles = {'edges': {'curve-style': 'haystack'}}
w = ntm.create_topics_token_network_gui(data_folder=corpus_folder, custom_styles=custom_styles)
display(w.layout())