In [23]:
import pandas as pd
import numpy as np
from nltk.tokenize import word_tokenize
from iso3166 import countries

import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objs as go

In [66]:
# Load Data
data = pd.read_excel("data/transcripts.xlsx", index_col=0)

In [67]:
def country_names(x):
    if x == "EU_":
        return("European Union")
    try:
        return countries.get(x)[0]
    except:
        return 'error'

In [68]:
data['country_names'] = data['country'].apply(country_names)

In [69]:
pd.set_option('display.max_rows', 100)
data.loc[data['country_names'] == "European Union"]

Unnamed: 0,country,year,transcript,country_names
6595,EU_,2011,Europe presents to you a message of \ncooperat...,European Union
6789,EU_,2012,﻿The advance of democracy has taken place in\n...,European Union
6983,EU_,2013,A year ago \nwhen we met in the General Assemb...,European Union
7176,EU_,2014,The world today is much more dangerous than \n...,European Union
7370,EU_,2015,I am here today to reassure the General Assemb...,European Union
7562,EU_,2016,"Last year, I stood here to assure the General ...",European Union
7758,EU_,2017,The European Union (EU) stands for freedom and...,European Union
7954,EU_,2018,The European Union is fighting intensely to pr...,European Union


In [3]:
year = np.array(data["year"].unique())
drop_values = np.append(data.country.unique(), 'ALL')

In [4]:
# Build App
app = JupyterDash(__name__)

app.layout = html.Div([
    
    html.Div([
        html.H1("Modern Data Analytics Project"),
        html.H2("Politicians and Climate Change"),
        html.Div("This Dashboard displays and summarizes some UN-speeches"),
        
        html.Br(),
        
        dcc.Dropdown(
            id='my_dropdown',
            options=[
                {'label': i, 'value': i} for i in drop_values
            ],
            value = 'ALB',
            placeholder='Select a country',
            style={'width': '40%'}
        ),
        
        html.Br(),
        
        html.Div([
        dcc.Input(id='term_select', type='text', debounce=False,
                 value='climate', required=True),
    ]),
    
    ]),

    html.Div(id='text_out', children=[]),
    
    html.Div([
        dcc.Graph(id='the_graph', figure={})
    ]),

])

@app.callback(
    [Output(component_id='text_out', component_property='children')],
     Output(component_id='the_graph', component_property='figure'),     
    Input(component_id='my_dropdown', component_property='value'),
    Input(component_id='term_select', component_property='value')
)

def update_graph(my_dropdown, term_select):
    
    dff = data
    
    if my_dropdown == 'ALL':
        data_filtered = dff
        
    else:    
        data_filtered = dff.loc[dff['country'] == my_dropdown]
    
    
    # loop-de-loop that does the counting
    term = term_select.lower()
    count = np.empty(0)
    
    for i in data_filtered.index:
        count = np.append(count, word_tokenize(data_filtered["transcript"][i].lower()).count(term))
    
    scatter=px.scatter(
        x=year,
        y=count)
    
    container = 'Number of speeches containing the term "{}" given by {}'.format(term_select, my_dropdown)
    
    return container, scatter
    
    

# Run app and display result inline in the notebook
app.run_server(mode='jupyterlab')