In [1]:
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize

import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objs as go

In [2]:
# Load Data
data = pd.read_excel("data/transcripts.xlsx", index_col=0)

In [14]:
data.loc[data['country'] == "ZWE"].head()

Unnamed: 0,country,year,transcript
1393,ZWE,1980,﻿On behalf of the President of the Republic of...
1538,ZWE,1981,38 I am very grateful for the opportunity to a...
1685,ZWE,1982,It is my privilege to deliver the statement of...
1834,ZWE,1983,﻿100.\tMy first and most pleasant duty is to c...
1984,ZWE,1984,"﻿First, Mr. President, I wish to congratulate ..."


In [3]:
year = np.array(data["year"].unique())
drop_values = np.append(data.country.unique(), 'ALL')

In [4]:
# Build App
app = JupyterDash(__name__)

app.layout = html.Div([
    
    html.Div([
        html.H1("Modern Data Analytics Project"),
        html.H2("Politicians and Climate Change"),
        html.Div("This Dashboard displays and summarizes some UN-speeches"),
        
        html.Br(),
        
        dcc.Dropdown(
            id='my_dropdown',
            options=[
                {'label': i, 'value': i} for i in drop_values
            ],
            value = 'ALB',
            placeholder='Select a country',
            style={'width': '40%'}
        ),
        
        html.Br(),
        
        html.Div([
        dcc.Input(id='term_select', type='text', debounce=False,
                 value='climate', required=True),
    ]),
    
    ]),

    html.Div(id='text_out', children=[]),
    
    html.Div([
        dcc.Graph(id='the_graph', figure={})
    ]),

])

@app.callback(
    [Output(component_id='text_out', component_property='children')],
     Output(component_id='the_graph', component_property='figure'),     
    Input(component_id='my_dropdown', component_property='value'),
    Input(component_id='term_select', component_property='value')
)

def update_graph(my_dropdown, term_select):
    
    dff = data
    
    if my_dropdown == 'ALL':
        data_filtered = dff
        
    else:    
        data_filtered = dff.loc[dff['country'] == my_dropdown]
    
    
    # loop-de-loop that does the counting
    term = term_select.lower()
    count = np.empty(0)
    
    for i in data_filtered.index:
        count = np.append(count, word_tokenize(data_filtered["transcript"][i].lower()).count(term))
    
    scatter=px.scatter(
        x=year,
        y=count)
    
    container = 'Number of speeches containing the term "{}" given by {}'.format(term_select, my_dropdown)
    
    return container, scatter
    
    

# Run app and display result inline in the notebook
app.run_server(mode='jupyterlab')