## Text Analysis - Topic Modelling
### <span style='color: green'>SETUP </span> Prepare and Setup Notebook <span style='float: right; color: red'>MANDATORY</span>

In [None]:
import sys, os

root_folder = os.path.abspath(os.path.join(globals()['_dh'][-1], "../../.."))

sys.path = [ root_folder ] + sys.path

#from beakerx import *
#from beakerx.object import beakerx

from IPython.display import display
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import westac.notebooks.political_in_newspapers.corpus_data as corpus_data
import bokeh.plotting

%matplotlib inline

bokeh.plotting.output_notebook()

corpus_folder = os.path.join(root_folder, "data/textblock_politisk")

### <span style='color: green'>PREPARE</span> Load Topic Model <span style='float: right; color: red'>MANDATORY</span>

In [None]:
import westac.notebooks.political_in_newspapers.load_topic_model_gui as load_gui
import text_analytic_tools.text_analysis.topic_model_container as topic_model_container
import importlib
_ = importlib.reload(load_gui)

current_state = lambda: topic_model_container.TopicModelContainer.singleton()

load_gui.display_gui(corpus_folder, current_state())
#load_gui.load_model(corpus_folder, current_state(), 'test.4days')


### <span style='color: green;'>VISUALIZE</span> Display Topic's Word Distribution as a Wordcloud<span style='color: red; float: right'>TRY IT</span>

In [None]:
import westac.notebooks.political_in_newspapers.topic_wordcloud_gui as wordcloud_gui

try:
    wordcloud_gui.display_gui(current_state())
except Exception as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> Topic-Word Distribution<span style='color: red; float: right'>TRY IT</span>


In [None]:
import westac.notebooks.political_in_newspapers.topic_word_distribution_gui as topic_word_distribution_gui

try:
    topic_word_distribution_gui.display_gui(current_state())
    #topic_word_distribution_gui.display_topic_tokens(current_state(), topic_id=0, n_words=100, output_format='Chart')
except Exception as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> Topic Trends over Time<span style='color: red; float: right'>RUN</span>

In [None]:
import westac.notebooks.political_in_newspapers.topic_trends_gui as trends_gui
import importlib
_ = importlib.reload(trends_gui)

try:
    trends_gui.display_gui(current_state())
    # trends_gui.display_topic_trend(current_state().compiled_data.document_topic_weights, topic_id=0, year=None, year_aggregate='mean', output_format='Table')
except Exception as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> Publication Topic Network<span style='color: red; float: right'>TRY IT</span>
The green nodes are documents, and blue nodes are topics. The edges (lines) indicates the strength of a topic in the connected document. The width of the edge is proportinal to the strength of the connection. Note that only edges with a strength above the certain threshold are displayed.

In [None]:
import westac.notebooks.political_in_newspapers.publication_topic_network_gui as publication_topic_network_gui
import importlib
_ = importlib.reload(publication_topic_network_gui)

try:
    publication_topic_network_gui.display_gui(current_state())
except Exception as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> Topic Trends Overview<span style='color: red; float: right'>TRY IT</span>

- The topic shares  displayed as a scattered heatmap plot using gradient color based on topic's weight in document.
- [Stanford’s Termite software](http://vis.stanford.edu/papers/termite) uses a similar visualization.

In [None]:
import westac.notebooks.political_in_newspapers.topic_trends_overview_gui as overview_gui
import importlib
_ = importlib.reload(overview_gui)

try:
    overview_gui.display_gui(current_state())
except ValueError as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> WORK IN PROGRESS!!! Topic Cooccurrence<span style='color: red; float: right'>TRY IT</span>

Computes weighted graph of topics co-occurring in the same document. Topics are defined as co-occurring if they both exists  in the same document both having weights above threshold. Weight are number of co-occurrences (binary yes or no). Node size reflects topic proportions over the entire corpus (normalized document) length, and are computed in accordance to how node sizes are computed in LDAvis.

In [None]:
import westac.notebooks.political_in_newspapers.topic_co_occurrence_gui as co_occurrence_gui
import westac.common.utility as utility
import text_analytic_tools.common.network.plot_utility as plot_utility
import importlib
_ = importlib.reload(co_occurrence_gui)
_ = importlib.reload(plot_utility)
_ = importlib.reload(utility)
   
try:
    co_occurrence_gui.display_gui(current_state(), documents)
except Exception as ex:
    print(ex)