In [1]:
import plotly.plotly as py
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

import bokeh
from bokeh.io import show
from bokeh.models import LogColorMapper, CustomJS, Toggle, ColumnDataSource, CDSView, GroupFilter
from bokeh.palettes import Viridis6 as palette
from bokeh.plotting import figure
from bokeh.layouts import column, row, widgetbox 

from bokeh.models.widgets import Slider
from bokeh.layouts import Column

from bokeh.sampledata.us_counties import data as counties

In [2]:
df = pd.read_csv("clean.csv")


Columns (19,23,24,26,33,36,37,38,39,40,41,42,43) have mixed types. Specify dtype option on import or set low_memory=False.



In [3]:
def county_name_format(name):
    final_name = ""
    try:
        if type(name) != 'float':
            words = name.split(" ")
            if words[-1] == 'COUNTY':
                words.pop()
            for i in range(0, len(words)):
                final_name += words[i].capitalize() + " "
            final_name = final_name.rstrip()
        else:
            final_name = np.nan
    except:
        pass
    return(final_name)

In [4]:
counties_ca = {
    code: county for code, county in counties.items() if (county["state"] == "ca")
}

In [11]:
county_xs = [county["lons"] for county in counties_ca.values()]
county_ys = [county["lats"] for county in counties_ca.values()]
county_names = [county['name'] for county in counties_ca.values()]

In [12]:
def replace_nan(county_info):
    for l in county_info:
        for idx, item in enumerate(l):
            if np.isnan(item):
                l[idx] = np.nanmean(l)
    return(county_info)

In [13]:
county_xs = replace_nan(county_xs)
county_ys = replace_nan(county_ys)

In [14]:
df['recipient_location_county_name'] = df['recipient_location_county_name'].apply(lambda name: county_name_format(name))

In [15]:
sum_obligation = df.groupby(['fiscal_year','recipient_location_county_name'])['total_obligation'].median().reset_index()

In [16]:
sum_obligation = sum_obligation[sum_obligation['recipient_location_county_name']!= '']

In [17]:
sum_obligation['total_obligation'] = sum_obligation['total_obligation'].apply(lambda x: x/1000000)

In [18]:
years = sum_obligation['fiscal_year'].unique()

In [19]:
for year in years:
    sub = sum_obligation[sum_obligation['fiscal_year']==year]
    temp = pd.DataFrame(columns=['fiscal_year', 'recipient_location_county_name', 'total_obligation'])
    for county in county_names:
        if county not in sub['recipient_location_county_name'].unique():
            temp = temp.append({'fiscal_year': year, 'recipient_location_county_name': county, 'total_obligation': 0}, ignore_index=True)
    for county in sub['recipient_location_county_name'].unique():
        if county not in county_names:
            sum_obligation = sum_obligation[sum_obligation['recipient_location_county_name']!= county]
    sum_obligation = sum_obligation.append(temp)
    

In [20]:
sum_obligation = sum_obligation.sort_values(by=['fiscal_year', 'recipient_location_county_name']).reset_index()

In [22]:
sum_obligation

Unnamed: 0,index,fiscal_year,recipient_location_county_name,total_obligation
0,1,2001,Alameda,0.253401
1,0,2001,Alpine,0
2,2,2001,Amador,0.207502
3,3,2001,Butte,0.023696
4,4,2001,Calaveras,0.794194
5,5,2001,Colusa,0.340466
6,6,2001,Contra Costa,0.192
7,1,2001,Del Norte,0
8,7,2001,El Dorado,0.079715
9,8,2001,Fresno,0.170854


In [None]:
# data=dict(
#     x=county_xs,
#     y=county_ys,
#     name=county_names,
#     rate=sample['total_obligation'],
# )

In [24]:
years = sum_obligation['fiscal_year'].unique()
data_list = []
for year in years:
    data = dict(
        x=county_xs,
        y=county_ys,
        name=county_names,
        year=str(year),
        rate=sum_obligation[sum_obligation['fiscal_year']==year]['total_obligation']
    )
    data_list.append(data)
plot_df = pd.DataFrame(data_list[0])
for i in range(1, len(data_list)):
    try:
        temp = pd.DataFrame(data_list[i])
        plot_df = plot_df.append(temp)
    except:
        print(i)

In [25]:
plot_df

Unnamed: 0,x,y,name,year,rate
0,"[-122.26254, -122.25876, -122.25586, -122.2530...","[37.90194, 37.89909, 37.8965, 37.8941, 37.8870...",Alameda,2001,0.253401
1,"[-119.83828, -119.832, -119.82106, -119.81054,...","[38.88801, 38.88374, 38.87596, 38.86882, 38.85...",Alpine,2001,0
2,"[-120.70401, -120.64529, -120.59707, -120.5583...","[38.53596, 38.50946, 38.50418, 38.51042, 38.51...",Amador,2001,0.207502
3,"[-121.23727, -121.23866, -121.3052, -121.33296...","[39.52657, 39.52658, 39.51325, 39.46875, 39.45...",Butte,2001,0.023696
4,"[-120.0819, -120.08198, -120.12248, -120.16929...","[38.40885, 38.40873, 38.39886, 38.37713, 38.35...",Calaveras,2001,0.794194
5,"[-121.92768, -121.94528, -121.94404, -121.9402...","[39.27397, 39.2531, 39.23476, 39.22162, 39.206...",Colusa,2001,0.340466
6,"[-121.91161, -121.89071, -121.87505, -121.8651...","[38.04503, 38.04653, 38.05055, 38.05991, 38.06...",Contra Costa,2001,0.192
7,"[-123.99403, -124.00118, -124.00875, -124.0167...","[41.46529, 41.46514, 41.46504, 41.46496, 41.46...",Del Norte,2001,0
8,"[-120.70401, -120.70405, -120.7478, -120.80083...","[38.53596, 38.53608, 38.55313, 38.55728, 38.55...",El Dorado,2001,0.079715
9,"[-120.65159, -120.63665, -120.6274, -120.59122...","[36.9528, 36.95272, 36.9528, 36.95264, 36.9728...",Fresno,2001,0.170854


In [26]:
data = ColumnDataSource(plot_df)
view = CDSView(source = data, filters = [GroupFilter(column_name = "year", group = "2001")])

In [27]:
callback = CustomJS(args = {"source": data, "view": view}, code = """
    console.log("Hey dummy")
    view.filters[0].group = cb_obj.value.toString();
    source.change.emit();
""")

slider = Slider(start=2001, end=2019, value=2001, title="Year")

slider.js_on_change("value", callback)

In [28]:
# # add slider with callback to update data source
# slider = Slider(start=2001, end=2019, value=2001, step=1, title="Year")

In [31]:
TOOLS = "pan,wheel_zoom,reset,hover,save"

p = figure(
    title="CA spending", tools=TOOLS,
    x_axis_location=None, y_axis_location=None,
    tooltips=[
        ("Name", "@name"), ("Spending)", "@rate"), ("(Long, Lat)", "($x, $y)")
    ])
p.grid.grid_line_color = None
p.hover.point_policy = "follow_mouse"

p.patches('x', 'y', source=data, view = view,
          fill_color={'field': 'rate', 'transform': LogColorMapper(palette=palette)},
          fill_alpha=0.7, line_color="black", line_width=0.5)

layout = Column(slider, p)
show(layout)

In [30]:
exp = plot_df.copy()

In [None]:
len(exp['rate'])

In [None]:
import random
exp['rate'] = random.sample(range(20, 4000), 1102)

In [None]:
data = ColumnDataSource(exp)
view = CDSView(source = data, filters = [GroupFilter(column_name = "year", group = "2001")])

In [None]:
np.nanmean([1,2,np.nan])