In [3]:
from elasticsearch import Elasticsearch

In [8]:
import plotly as p
import plotly.plotly as py
import pandas as pd
p.offline.init_notebook_mode(connected=True)

import collections

In [5]:
es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])

In [280]:
SCL = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

STATE_CODES = {"Mississippi": "MS", "Oklahoma": "OK", "Wyoming": "WY", "Minnesota": "MN", "Alaska": "AK", "Illinois": "IL", "Arkansas": "AR", "New Mexico": "NM", "Indiana": "IN", "Maryland": "MD", "Louisiana": "LA", "Texas": "TX", "Iowa": "IA", "Wisconsin": "WI", "Arizona": "AZ", "Michigan": "MI", "Kansas": "KS", "Utah": "UT", "Virginia": "VA", "Oregon": "OR", "Connecticut": "CT", "Tennessee": "TN", "New Hampshire": "NH", "Idaho": "ID", "West Virginia": "WV", "South Carolina": "SC", "California": "CA", "Vermont": "VT", "Georgia": "GA", "North Dakota": "ND", "Pennsylvania": "PA", "Florida": "FL", "Hawaii": "HI", "Kentucky": "KY", "Rhode Island": "RI", "Nebraska": "NE", "Missouri": "MO", "Ohio": "OH", "Alabama": "AL", "South Dakota": "SD", "Colorado": "CO", "New Jersey": "NJ", "Washington": "WA", "North Carolina": "NC", "New York": "NY", "Montana": "MT", "Nevada": "NV", "Delaware": "DE", "Maine": "MA"}

STATES = {"AK": "Alaska", "AL": "Alabama", "AZ": "Arizona", "AR": "Arkansas", "CA": "California", "CO": "Colorado", "CT": "Connecticut", "DE": "Delaware", "FL": "Florida", "GA": "Georgia", "HI": "Hawaii", "ID": "Idaho", "IL": "Illinois", "IN": "Indiana", "IA": "Iowa", "KS": "Kansas", "KY": "Kentucky", "LA": "Louisiana", "ME": "Maine", "MD": "Maryland", "MA": "Maine", "MI": "Michigan", "MN": "Minnesota", "MS": "Mississippi", "MO": "Missouri", "MT": "Montana", "NE": "Nebraska", "NV": "Nevada", "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "NY": "New York", "NC": "North Carolina", "ND": "North Dakota", "OH": "Ohio", "OK": "Oklahoma", "OR": "Oregon", "PA": "Pennsylvania", "RI": "Rhode Island", "SC": "South Carolina", "SD": "South Dakota", "TN": "Tennessee", "TX": "Texas", "UT": "Utah", "VT": "Vermont", "VA": "Virginia", "WA": "Washington", "WV": "West Virginia", "WI": "Wisconsin", "WY": "Wyoming"}

In [172]:
def get_layout():
    return dict(
        title = 'USA Arson Density (per 100,000 people)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa'),
            showlakes=True,
            lakecolor='rgb(255, 255, 255)'),
             )

In [173]:
def query_index(es, index_name, year, max_size=51):
    res = es.search(index=index_name, q="year:"+year, size=max_size)
    return res["hits"]["hits"]

In [174]:
def build_density_series(hits):
    d = {}
    for row in hits:
        source = row["_source"]
        d[STATE_CODES[source["state"]]] = source["pop_density"]
    return pd.DataFrame(d.items(), columns=["state", "pop_density"])

In [175]:
def build_text_series(es, indices, year):
    desc = collections.defaultdict(str)
    #methods = query_index(es, "method", year)["hits"]["hits"]
    #motives = query_index(es, "motives", year)["hits"]["hits"]
    #ownership = query_index(es, "ownership", year)["hits"]["hits"]
    #counts = query_index(es, "monthly_counts", year)["hits"]["hits"]

    for index in indices:
        response = query_index(es, index, year)
        for row in response:
            source = row["_source"]
            desc[source["state"]] += index + ": " + str(source[index]) + "<br>"
    return pd.DataFrame(desc.items(), columns=["state", "text"])

In [176]:
def build_data_per_year(es, indices):
    densities_per_year = []
    for year in ["2009", "2010", "2011", "2012", "2013", "2014"]:
        hits = query_index(es, "arson_density", year)
        densities_per_year.append(
            build_density_series(hits).merge(
                build_text_series(es, indices, year), on="state", how="outer"))
    return densities_per_year

In [177]:
density_df = build_data_per_year(es, ["method", "ownership"])

In [229]:
def get_density_data(df, scale=SCL):
    return [dict(
            type='choropleth',
            colorscale = scale,
            autocolorscale = False,
            locations = df["state"],
            z = df["pop_density"].astype(float),
            locationmode = 'USA-states',
            text = df["text"],
            colorbar = dict(
                title = "# arson / 100k people")
            )]

In [230]:
def density_plotter(df):
    d_2009 = df[0]
    data = get_density_data(d_2009)
    layout = get_layout()
    fig = dict(data=data, layout=layout)
    p.offline.iplot(fig, filename='d3-cloropleth-map')

In [196]:
#density_plotter(density_df)

In [281]:
def get_density_data_per_year(dfs, scale=SCL):
    density_per_year = []
    for df in dfs:
        d = [dict(
                type='choropleth',
                colorscale = scale,
                autocolorscale = False,
                locations = df["state"],
                z = df["pop_density"],
                locationmode = 'USA-states',
                text = df["text"],
                marker = dict(
                    line = dict(
                        color = 'rgb(255,255,255)',
                        width = 2
                    )),
                colorbar = dict(title = "Arson Density in the USA"),
                name=""
                )]
        density_per_year.extend(d)
    return density_per_year

In [273]:
def set_steps(dfs, years):
    steps = []
    for idx, year in enumerate(years):
        step = dict(method = "restyle",
                    args = ["visible", [False]*len(years)],
                    label = year) 
        step['args'][1][idx] = True
        steps.append(step)
    return steps

In [288]:
def set_sliders(steps):
    return [dict(active = 10,
                currentvalue = {"prefix": "Year: "},
                pad = {"t": 50},
                steps = steps)]


In [283]:
def set_layout(sliders):
    return dict(
        title = 'USA Arson Density',
        geo = dict(
            scope='usa',
            projection=dict(type='albers usa'),
            showlakes=True,
            lakecolor='rgb(255, 255, 255)'),
        sliders=sliders
    )

In [284]:
def density_plotter_with_slider(df):
    d_2009 = df[0]
    data = get_density_data_per_year(d_2009)
    layout = get_layout()
    frames = []
    fig = dict(data=data, layout=layout, frames=frames)
    p.offline.iplot(fig, filename='d3-cloropleth-map')

In [285]:
steps = set_steps(density_df, ["2009", "2010", "2011", "2012", "2013", "2014"])
sliders = set_sliders(steps)
data = get_density_data_per_year(density_df)
layout=set_layout(sliders)

In [286]:
fig = dict(data=data, layout=layout)

In [287]:
p.offline.iplot(fig, filename='d3-cloropleth-map')