# Visualize Query Results

Jenna Jordan

23 January 2020 - 4 February 2020

In [1]:
import pandas as pd
import numpy as np
import ipywidgets as widgets
import plotly.express as px
from IPython.display import display
from IPython.display import clear_output

In [2]:
df1 = pd.read_csv('../Data/queries_4Feb.csv', parse_dates=['publication_date'], dtype={'publisher':'category'})

In [3]:
df2 = pd.read_csv('../Data/queries2_4Feb.csv', parse_dates=['publication_date'], dtype={'publisher':'category'})

In [4]:
df3 = pd.read_csv('../Data/queries1_6Feb.csv', parse_dates=['publication_date'], dtype={'publisher':'category'})

In [5]:
df = df1.merge(df2, on=['publication_date', 'publisher']).merge(df3, on=['publication_date', 'publisher'])

In [6]:
df = df.set_index('publication_date').groupby('publisher').resample('M').sum()

In [7]:
df_count = df.stack().to_frame()
df_count.columns = ['count']
df_count.index.names = ['publisher', 'publication_date', 'query']
df_count = df_count.reset_index(['publication_date', 'publisher', 'query'])

In [8]:
df_prop = df.copy()
for c in df_prop.columns:
    if c == 'BLN_total':
        pass
    else:
        df_prop[c] = df_prop[c] / df_prop.BLN_total
df_prop = df_prop.drop(columns=['BLN_total'])

df_prop = df_prop.stack().to_frame()
df_prop.columns = ['proportion']
df_prop.index.names = ['publisher', 'publication_date', 'query']
df_prop = df_prop.reset_index(['publication_date', 'publisher', 'query'])

In [9]:
df_viz = df_count.merge(df_prop, on=['publisher', 'publication_date', 'query'], how='outer')

## Queries

#### BLN_total

```
(content:*) 
AND source_name:BulkLexisNexis
```

#### climate_change

```
(content:"climate change") 
AND source_name:BulkLexisNexis
```


#### global_warming

```
(content:"global warming") 
AND source_name:BulkLexisNexis
```

#### pollinator_population

```
(content:
    (insect* OR pollinator* OR bee* OR honeybee* OR butterfl* OR moth*) 
    AND (population OR *diversity OR biomass OR ecolog* OR ecosystem* OR entomolog*)
) 
AND source_name:BulkLexisNexis
```

#### pollinator_crisis

```
(content:
    (insect* OR pollinator* OR bee* OR honeybee* OR butterfl* OR moth*) 
    AND (population OR *diversity OR biomass OR ecolog* OR ecosystem* OR entomolog*) 
    AND (crisis OR "colony collapse" OR apocalypse OR extinct* OR declin* OR drop OR decreas* OR disappear*)
) 
AND source_name:BulkLexisNexis
```

#### insect_apocalypse

```
(content:"insect apocalypse") 
AND source_name:BulkLexisNexis
```

#### insect_armageddon

```
(content:"insect armageddon") 
AND source_name:BulkLexisNexis
```

#### colony_collapse

```
(content:"colony collapse") 
AND source_name:BulkLexisNexis
```

#### pollinator_population_refined
```
(content: 
    (insect* OR pollinator* OR bee OR bees OR honeybee OR honeybees OR butterfly OR butterflies OR moth OR moths) 
    AND (population OR "biological diversity" OR biodiversity OR biomass OR ecolog* OR ecosystem* OR entomolog*) 
    AND (study OR professor OR experiment OR research OR analysis)
    ) 
AND source_name:BulkLexisNexis
```

#### pollinator_decline_refined
```
(content: 
    (insect* OR pollinator* OR bee OR bees OR honeybee OR honeybees OR butterfly OR butterflies OR moth OR moths) 
    AND (population OR "biological diversity" OR biodiversity OR biomass OR ecolog* OR ecosystem* OR entomolog*) 
    AND (study OR professor OR experiment OR research OR analysis)
    AND (
        (crisis OR "colony collapse" OR apocalypse OR extinct* OR drop OR decreas* OR disappear*)
        OR ("insect decline"~5 OR "population decline"~5 OR ")
        )
    ) 
AND source_name:BulkLexisNexis
```

#### cc_and_gw

```
(content:"climate change" OR "global warming") 
AND source_name:BulkLexisNexis
```

In [9]:
publisher_options = [('New York Times', 'NYT'), ('Washington Post', 'WP'), ('Summary of World Broadcasts', 'SWB'), 
             ('Associated Press', 'AP'), ('Agence France Presse', 'AFP'), ('Xinhua General News Service', 'XGNS'), 
             ('United Press International', 'UPI'), ('Deutsche Presse-Agentur', 'DPA'), ('Inter Press Service', 'IPS')]
query_options = list(df_viz['query'].unique())

In [10]:
choose_CountOrProp = widgets.ToggleButtons(
    options=[('Count', 'count'), ('Proportion', 'proportion')],
    description='Article: ', layout=widgets.Layout(width='25%')
)

In [11]:
choose_comparison = widgets.ToggleButtons(
    options=[('Queries', 'publisher'), ('Publishers', 'query')],
    description='Compare: ', layout=widgets.Layout(width='25%')
)

In [12]:
choose_filter = widgets.Dropdown(options=publisher_options, description='Choose: ', layout=widgets.Layout(width='25%'))

In [13]:
plot_button = widgets.Button(description="Plot")

In [14]:
clear_button = widgets.Button(description="Clear plots")

In [15]:
# set up control box organization
buttons = widgets.VBox(children=[plot_button, clear_button], layout=widgets.Layout(width='25%'))
graph_controls = widgets.HBox(children=[choose_CountOrProp, choose_comparison, choose_filter, buttons])

# if "Compare" changes, change the options for "Choose"
def change_graph_controls(change):
    graph_comparison_type = change['new']
    if graph_comparison_type == 'publisher':
        choose_filter.options = publisher_options
    elif graph_comparison_type == 'query':
        choose_filter.options = query_options

choose_comparison.observe(change_graph_controls, ['value'])

# function to create the plot
def create_plot(query_column = choose_comparison.value, query_value = choose_filter.value, y_axis = choose_CountOrProp.value):
    if query_column  == 'publisher':
        color = 'query'
    elif query_column  == 'query':
        color = 'publisher'
    
    query_text = query_column + "=='" + query_value + "'"
    title_text = query_column + ": " + query_value
    fig = px.line(df_viz.query(query_text), x="publication_date", y=y_axis, color=color, title=title_text)
    fig.update_layout(xaxis_rangeslider_visible=True)
    
    return fig

out = widgets.Output()
with out:
    display(graph_controls)

# when "add chart" button is clicked, add new chart below
def add_plot(b):
    with out:
        fig = create_plot(query_column = choose_comparison.value, query_value = choose_filter.value, y_axis = choose_CountOrProp.value)
        display(fig)

plot_button.on_click(add_plot)

# when "new plot" button is clicked, clear all plots and generate a new plot
def clear_plots(b):
    with out:
        out.clear_output()
        display(graph_controls)
    
clear_button.on_click(clear_plots)

In [16]:
display(out)

Output()