## Text Analysis - Topic Modelling
### <span style='color: green'>SETUP </span> Prepare and Setup Notebook <span style='float: right; color: red'>MANDATORY</span>

In [None]:
import os
import sys
import importlib

if os.environ.get('JUPYTER_IMAGE_SPEC', '') == 'westac_lab':
    root_folder = '/home/jovyan/work/welfare_state_analytics'
else:
    root_folder = (lambda x: os.path.join(os.getcwd().split(x)[0], x))('welfare_state_analytics')

corpus_folder = '/data/westac/sou_kb_labb'

sys.path = list(set(sys.path + [ root_folder ]))

from IPython.display import display
from IPython.core.interactiveshell import InteractiveShell

InteractiveShell.ast_node_interactivity = "all"

import notebooks.political_in_newspapers.corpus_data as corpus_data
import bokeh.plotting

from notebooks.common import setup_pandas

%matplotlib inline

bokeh.plotting.output_notebook()
setup_pandas()

### <span style='color: green'>PREPARE</span> Load Topic Model <span style='float: right; color: red'>MANDATORY</span>

In [None]:
import notebooks.common.load_topic_model_gui as load_gui
import text_analytic_tools.text_analysis.topic_model_container as topic_model_container
_ = importlib.reload(load_gui)

current_state = lambda: topic_model_container.TopicModelContainer.singleton()

load_gui.display_gui(corpus_folder, current_state())
#load_gui.load_model(corpus_folder, current_state(), 'test.4days')

In [None]:

current_state().compiled_data.document_topic_weights.head()


### <span style='color: green;'>VISUALIZE</span> Display Topic's Word Distribution as a Wordcloud<span style='color: red; float: right'> TRY IT</span>

In [None]:
import notebooks.political_in_newspapers.notebook_gui.topic_wordcloud_gui as wordcloud_gui
try:
    wordcloud_gui.display_gui(current_state())
except Exception as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> Topic-Word Distribution<span style='color: red; float: right'>TRY IT</span>


In [None]:
import notebooks.political_in_newspapers.notebook_gui.topic_word_distribution_gui as topic_word_distribution_gui

try:
    topic_word_distribution_gui.display_gui(current_state())
    #topic_word_distribution_gui.display_topic_tokens(current_state(), topic_id=0, n_words=100, output_format='Chart')
except Exception as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> Topic Trends over Time<span style='color: red; float: right'>RUN</span>

In [None]:
import notebooks.political_in_newspapers.notebook_gui.topic_trends_gui as trends_gui
import text_analytic_tools.text_analysis.topic_weight_over_time as topic_weight_over_time
_ = importlib.reload(topic_weight_over_time)
_ = importlib.reload(trends_gui)

try:
    trends_gui.display_gui(current_state())
    # trends_gui.display_topic_trend(current_state().compiled_data.document_topic_weights, topic_id=0, year=None, year_aggregate='mean', output_format='Table')
except Exception as ex:
    print(ex)

### <span style='color: green;'>VISUALIZE</span> Topic Trends Overview<span style='color: red; float: right'>TRY IT</span>

- The topic shares  displayed as a scattered heatmap plot using gradient color based on topic's weight in document.
- [Stanford’s Termite software](http://vis.stanford.edu/papers/termite) uses a similar visualization.

In [None]:
import notebooks.political_in_newspapers.notebook_gui.topic_trends_overview_gui as overview_gui
_ = importlib.reload(overview_gui)

try:
    overview_gui.display_gui(current_state())
except ValueError as ex:
    print(ex)

### <span style='color: green;'>BROWSE</span> Browse Topic Documents<span style='color: red; float: right'>TRY IT</span>

Computes weighted graph of topics co-occurring in the same document. Topics are defined as co-occurring if they both exists  in the same document both having weights above threshold. Weight are number of co-occurrences (binary yes or no). Node size reflects topic proportions over the entire corpus (normalized document) length, and are computed in accordance to how node sizes are computed in LDAvis.

In [None]:
import notebooks.political_in_newspapers.notebook_gui.topic_document_texts_gui as texts_gui
_ = importlib.reload(texts_gui)
_ = importlib.reload(corpus_data)
   
try:
    texts_gui.display_gui(current_state())
except Exception as ex:
    raise

### <span style='color: green;'>VISUALIZE</span> Topic-Topic Network<span style='color: red; float: right'>TRY IT</span>

Computes weighted graph of topics co-occurring in the same document. Topics are defined as co-occurring in a document if they both have a weight above given threshold. The edge weights are the number of co-occurrences (binary yes or no). Node size reflects topic proportions over the entire corpus computed in accordance to LDAvis topic proportions.

In [None]:
import notebooks.political_in_newspapers.notebook_gui.topic_topic_network_gui as topic_topic_gui
import westac.common.utility as utility
import text_analytic_tools.common.network.plot_utility as plot_utility
import text_analytic_tools.common.network.utility as plot_utils
_ = importlib.reload(topic_topic_gui)
_ = importlib.reload(plot_utility)
_ = importlib.reload(plot_utils)
_ = importlib.reload(utility)
   
try:
    topic_topic_gui.display_gui(current_state())
except Exception as ex:
    print(ex)