In [1]:
from elasticsearch import Elasticsearch

In [2]:
import plotly as p
import plotly.plotly as py
import pandas as pd
p.offline.init_notebook_mode(connected=True)

import collections

In [3]:
es = Elasticsearch([{'host': 'localhost', 'port': '9200'}])

In [22]:
YEARS_AVAILABLE = map(str, range(2009, 2015))

SCL = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]

STATE_CODES = {"Mississippi": "MS", "Oklahoma": "OK", "Delaware": "DE", "Minnesota": "MN", "Illinois": "IL", "Arkansas": "AR", "New Mexico": "NM", "Indiana": "IN", "Maryland": "MD", "Louisiana": "LA", "Idaho": "ID", "Wyoming": "WY", "Tennessee": "TN", "Arizona": "AZ", "Iowa": "IA", "Michigan": "MI", "Kansas": "KS", "Utah": "UT", "Virginia": "VA", "Oregon": "OR", "Connecticut": "CT", "Montana": "MT", "California": "CA", "Massachusetts": "MA", "West Virginia": "WV", "South Carolina": "SC", "New Hampshire": "NH", "Wisconsin": "WI", "Vermont": "VT", "Georgia": "GA", "North Dakota": "ND", "Pennsylvania": "PA", "Florida": "FL", "Alaska": "AL", "Kentucky": "KY", "Hawaii": "HI", "Nebraska": "NE", "Missouri": "MO", "Ohio": "OH", "Alabama": "AK", "Rhode Island": "RI", "South Dakota": "SD", "Colorado": "CO", "New Jersey": "NJ", "Washington": "WA", "North Carolina": "NC", "New York": "NY", "Texas": "TX", "Nevada": "NV", "Maine": "ME"}

STATES = {"WA": "Washington", "DE": "Delaware", "WI": "Wisconsin", "WV": "West Virginia", "HI": "Hawaii", "FL": "Florida", "WY": "Wyoming", "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "TX": "Texas", "LA": "Louisiana", "NC": "North Carolina", "ND": "North Dakota", "NE": "Nebraska", "TN": "Tennessee", "NY": "New York", "PA": "Pennsylvania", "CA": "California", "NV": "Nevada", "VA": "Virginia", "CO": "Colorado", "AK": "Alabama", "AL": "Alaska", "AR": "Arkansas", "VT": "Vermont", "IL": "Illinois", "GA": "Georgia", "IN": "Indiana", "IA": "Iowa", "OK": "Oklahoma", "AZ": "Arizona", "ID": "Idaho", "CT": "Connecticut", "ME": "Maine", "MD": "Maryland", "MA": "Massachusetts", "OH": "Ohio", "UT": "Utah", "MO": "Missouri", "MN": "Minnesota", "MI": "Michigan", "RI": "Rhode Island", "KS": "Kansas", "MT": "Montana", "MS": "Mississippi", "SC": "South Carolina", "KY": "Kentucky", "OR": "Oregon", "SD": "South Dakota"}

In [172]:
def get_layout():
    return dict(
        title = 'USA Arson Density (per 100,000 people)',
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa'),
            showlakes=True,
            lakecolor='rgb(255, 255, 255)'),
             )

In [5]:
def query_index(es, index_name, year, max_size=51):
    res = es.search(index=index_name, q="year:"+year, size=max_size)
    return res["hits"]["hits"]

In [23]:
def build_density_series(hits):
    d = {}
    for row in hits:
        source = row["_source"]
        d[STATE_CODES[source["state"]]] = source["pop_density"]
    return pd.DataFrame(d.items(), columns=["state", "pop_density"])

In [24]:
def build_text_series(es, indices, year):
    desc = collections.defaultdict(str)
    #methods = query_index(es, "method", year)["hits"]["hits"]
    #motives = query_index(es, "motives", year)["hits"]["hits"]
    #ownership = query_index(es, "ownership", year)["hits"]["hits"]
    #counts = query_index(es, "monthly_counts", year)["hits"]["hits"]

    for index in indices:
        response = query_index(es, index, year)
        for row in response:
            source = row["_source"]
            desc[source["state"]] += index + ": " + str(source[index]) + "<br>"
    return pd.DataFrame(desc.items(), columns=["state", "text"])

In [25]:
def build_data_per_year(es, indices):
    densities_per_year = []
    for year in ["2009", "2010", "2011", "2012", "2013", "2014"]:
        hits = query_index(es, "arson_density", year)
        densities_per_year.append(
            build_density_series(hits).merge(
                build_text_series(es, indices, year), on="state", how="outer"))
    return densities_per_year

In [26]:
density_df = build_data_per_year(es, ["method", "ownership"])

In [229]:
def get_density_data(df, scale=SCL):
    return [dict(
            type='choropleth',
            colorscale = scale,
            autocolorscale = False,
            locations = df["state"],
            z = df["pop_density"].astype(float),
            locationmode = 'USA-states',
            text = df["text"],
            colorbar = dict(
                title = "# arson / 100k people")
            )]

In [230]:
def density_plotter(df):
    d_2009 = df[0]
    data = get_density_data(d_2009)
    layout = get_layout()
    fig = dict(data=data, layout=layout)
    p.offline.iplot(fig, filename='d3-cloropleth-map')

In [196]:
#density_plotter(density_df)

In [79]:
def set_data(dfs, scale=COLOR_SCALE):
    densities_per_year = []
    for df in dfs:
        d = {
            "type": "choropleth",
            "colorscale": scale,
            "autocolorscale": False,
            "locations": df["state"],
            "z": df["pop_density"],
            "locationmode": "USA-states",
            "text": df["text"],
            "marker": {
                "line": {
                    "color": "rgb(255,255,255)",
                    "width": 2
                }
            },
            "colorbar": {
                "title": "arson per 100k people"
            },
            "name": ""
        }
        densities_per_year.extend([d])
    return densities_per_year


In [28]:
def set_steps(dfs, years=YEARS_AVAILABLE):
    steps = []
    for idx, year in enumerate(years):
        step = dict(method = "restyle",
                    args = ["visible", [False]*len(years)],
                    label = year) 
        step['args'][1][idx] = True
        steps.append(step)
    return steps

In [29]:
def set_sliders(steps):
    return [dict(active = 10,
                currentvalue = {"prefix": "Year: "},
                pad = {"t": 50},
                steps = steps)]


In [30]:
def set_layout(sliders):
    return dict(
        title = 'USA Arson Density',
        geo = dict(
            scope='usa',
            projection=dict(type='albers usa'),
            showlakes=True,
            lakecolor='rgb(255, 255, 255)'),
        sliders=sliders
    )

In [31]:
def density_plotter_with_slider(df):
    d_2009 = df[0]
    data = get_density_data_per_year(d_2009)
    layout = get_layout()
    frames = []
    fig = dict(data=data, layout=layout, frames=frames)
    p.offline.iplot(fig, filename='d3-cloropleth-map')

In [32]:
steps = set_steps(density_df, ["2009", "2010", "2011", "2012", "2013", "2014"])
sliders = set_sliders(steps)
data = get_density_data_per_year(density_df)
layout=set_layout(sliders)

In [33]:
fig = dict(data=data, layout=layout)

In [287]:
p.offline.iplot(fig, filename='d3-cloropleth-map')

In [34]:
def plot_density(df):
    steps = set_steps(df)
    sliders = set_sliders(steps)
    data = set_data(df)
    layout = set_layout(sliders)

    fig = dict(data=data, layout=layout)
    p.offline.iplot(fig, filename="d3-cloropleth-map")


In [35]:
plot_density(density_df)

In [84]:
import collections

import pandas as pd
import plotly as p
import plotly.plotly as py

import elasticsearch
STATE_CODES = {"Mississippi": "MS", "Oklahoma": "OK", "Delaware": "DE", "Minnesota": "MN", "Illinois": "IL", "Arkansas": "AR", "New Mexico": "NM", "Indiana": "IN", "Maryland": "MD", "Louisiana": "LA", "Idaho": "ID", "Wyoming": "WY", "Tennessee": "TN", "Arizona": "AZ", "Iowa": "IA", "Michigan": "MI", "Kansas": "KS", "Utah": "UT", "Virginia": "VA", "Oregon": "OR", "Connecticut": "CT", "Montana": "MT", "California": "CA", "Massachusetts": "MA", "West Virginia": "WV", "South Carolina": "SC", "New Hampshire": "NH", "Wisconsin": "WI", "Vermont": "VT", "Georgia": "GA", "North Dakota": "ND", "Pennsylvania": "PA", "Florida": "FL", "Alaska": "AL", "Kentucky": "KY", "Hawaii": "HI", "Nebraska": "NE", "Missouri": "MO", "Ohio": "OH", "Alabama": "AK", "Rhode Island": "RI", "South Dakota": "SD", "Colorado": "CO", "New Jersey": "NJ", "Washington": "WA", "North Carolina": "NC", "New York": "NY", "Texas": "TX", "Nevada": "NV", "Maine": "ME"}
STATES = {"WA": "Washington", "DE": "Delaware", "WI": "Wisconsin", "WV": "West Virginia", "HI": "Hawaii", "FL": "Florida", "WY": "Wyoming", "NH": "New Hampshire", "NJ": "New Jersey", "NM": "New Mexico", "TX": "Texas", "LA": "Louisiana", "NC": "North Carolina", "ND": "North Dakota", "NE": "Nebraska", "TN": "Tennessee", "NY": "New York", "PA": "Pennsylvania", "CA": "California", "NV": "Nevada", "VA": "Virginia", "CO": "Colorado", "AK": "Alabama", "AL": "Alaska", "AR": "Arkansas", "VT": "Vermont", "IL": "Illinois", "GA": "Georgia", "IN": "Indiana", "IA": "Iowa", "OK": "Oklahoma", "AZ": "Arizona", "ID": "Idaho", "CT": "Connecticut", "ME": "Maine", "MD": "Maryland", "MA": "Massachusetts", "OH": "Ohio", "UT": "Utah", "MO": "Missouri", "MN": "Minnesota", "MI": "Michigan", "RI": "Rhode Island", "KS": "Kansas", "MT": "Montana", "MS": "Mississippi", "SC": "South Carolina", "KY": "Kentucky", "OR": "Oregon", "SD": "South Dakota"}

p.offline.init_notebook_mode(connected=True)
es = elasticsearch.Elasticsearch([{'host': 'localhost', 'port': '9200'}])

COLOR_SCALE = [[0.0, 'rgb(242,240,247)'],[0.2, 'rgb(218,218,235)'],[0.4, 'rgb(188,189,220)'], [0.6, 'rgb(158,154,200)'],[0.8, 'rgb(117,107,177)'],[1.0, 'rgb(84,39,143)']]
YEARS_AVAILABLE = map(str, range(2009, 2015))

def query_index(es, index_name, year, max_size=51):
    res = es.search(index=index_name, q="year:"+year, size=max_size)
    return res["hits"]["hits"]


def build_density_df(hits):
    d = collections.defaultdict(str)
    for row in hits:
        source = row["_source"]
        d[STATE_CODES[source["state"]]] = source["pop_density"]
    return pd.DataFrame(d.items(), columns=["state", "pop_density"])


def build_text_df(es, indices, year):
    desc = collections.defaultdict(str)
    for index in indices:
        response = query_index(es, index, year)
        for row in response:
            source = row["_source"]
            desc[source["state"]] += index + ": " + str(source[index]) + "<br>"
    return pd.DataFrame(desc.items(), columns=["state", "text"])


def build_data_per_year(es, indices):
    densities_per_year = []
    for year in YEARS_AVAILABLE:
        hits = query_index(es, "arson_density", year)
        densities_per_year.append(
            build_density_df(hits).merge(
                build_text_df(es, indices, year), on="state", how="outer"))
    return densities_per_year

######### PLOTTING STUFF #########
def set_data(dfs, scale=COLOR_SCALE):
    densities_per_year = []
    for df in dfs:
        d = {
            "type": "choropleth",
            "colorscale": scale,
            "autocolorscale": False,
            "locations": df["state"],
            "z": df["pop_density"],
            "locationmode": "USA-states",
            "text": df["text"],
            "marker": {
                "line": {
                    "color": "rgb(255,255,255)",
                    "width": 2
                }
            },
            "colorbar": {
                "title": "arson per 100k people"
            },
            "name": ""
        }
        densities_per_year.extend([d])
    return densities_per_year


def set_steps(dfs, years=YEARS_AVAILABLE):
    steps = []
    for idx, year in enumerate(years):
        step = {
            "method": "restyle",
            "args": ["visible", [False] * len(years)],
            "label": year
        }
        step["args"][1][idx] = True
        steps.append(step)
    return steps


def set_sliders(steps):
    return [{
        "active": 10,
        "currentvalue": {"prefix": "Year: "},
        "pad": {"t": 50},
        "steps": steps
    }]

def set_layout(sliders):
    return {
        "title": "USA Arson Density",
        "geo": {
            "scope": "usa",
            "projection": {"type": "albers usa"},
            "showlakes": True,
            "lakecolor": "rgb(255, 255, 255)"
        },
        "sliders": sliders
    }

def plot_density(df):
    steps = set_steps(df)
    sliders = set_sliders(set_steps(df))
    data = set_data(df)
    layout = set_layout(sliders)

    fig = dict(data=data, layout=layout)
    p.offline.iplot(fig, filename="d3-cloropleth-map")


def main():
    density_df = build_data_per_year(es, ["method", "ownership"])
    plot_density(density_df)


In [85]:
main()