In [1]:
import pandas as pd

from bokeh.plotting import figure
from bokeh.io import output_notebook,show

# need these for hover tools and most other bokeh chart functionality
from bokeh.models import HoverTool, ColumnDataSource,NumeralTickFormatter

In [2]:
contrib = pd.read_excel('data/2017_Contributions.xlsx')

In [3]:
contrib.dtypes

OFFICECD              object
RECIPID               object
RECIPNAME             object
REFNO                 object
DATE          datetime64[ns]
REFUNDDATE    datetime64[ns]
NAME                  object
C_CODE                object
APARTMENT            float64
BOROUGHCD             object
CITY                  object
STATE                 object
ZIP                   object
OCCUPATION            object
EMPNAME               object
EMPSTRNO              object
EMPSTRNAME            object
EMPCITY               object
EMPSTATE              object
AMNT                 float64
MATCHAMNT              int64
PREVAMNT             float64
PAY_METHOD             int64
INTERMNO             float64
INTERMNAME            object
INTSTRNO             float64
INTSTRNM             float64
INTAPTNO             float64
INTCITY               object
INTST                 object
INTZIP               float64
INTEMPNAME            object
INTEMPSTNO            object
INTEMPSTNM            object
INTEMPCITY    

In [4]:
# create groupby object to summarize contributions by contributor type
conType = contrib.groupby(['C_CODE'])['AMNT'].sum()

# Getting x and y values from the groupby series object
x_val = conType.index.values
y_val = conType.values

# Create a columnDataSource object
conTypeCDS = ColumnDataSource(data=dict(
                    c_type = x_val,
                    tot_amt = y_val
))

# To find column names
conTypeCDS.column_names

['c_type', 'tot_amt']

In [5]:
# Create hovertool object
myHover = HoverTool(tooltips=[
                        ("Amnt Raised:","@tot_amt{$0,0 a}")
])

In [6]:
# Create a figure object
mybar = figure(x_range = x_val,
              x_axis_label="Types of Contributors",
              y_axis_label="Funds raised in $",
              width=600, height=400,
              title="Summarizing contributions by types of contributors",
              tools='xpan,box_zoom,tap')


# Adding vertical bars to the figure object
mybar.vbar(x='c_type', top='tot_amt', source=conTypeCDS,
           width=.5, color='red',
          selection_color='blue',
          nonselection_color='green',
          nonselection_alpha=.5)

#change the numerical formatting of YAxis tick marks
mybar.yaxis.formatter = NumeralTickFormatter(format='$0,0 a')

# add HoverTool
mybar.add_tools(myHover)

# output the chart and show
output_notebook()
show(mybar)

# Create a simple line chart

In [7]:
contribTime = contrib.groupby(['DATE'])['AMNT'].sum()
x_val = contribTime.index.values
y_val = contribTime.values

In [8]:
myLine = figure(x_axis_type='datetime',
               width=600, height=400,
               x_axis_label='Contributions over time', 
               y_axis_label='Funds raised in $')

myLine.line(x_val,y_val, line_color='blue')

output_notebook()
show(myLine)

# Creating linked charts for synchronous panning and brushing

In [9]:
contrib.columns

Index(['OFFICECD', 'RECIPID', 'RECIPNAME', 'REFNO', 'DATE', 'REFUNDDATE',
       'NAME', 'C_CODE', 'APARTMENT', 'BOROUGHCD', 'CITY', 'STATE', 'ZIP',
       'OCCUPATION', 'EMPNAME', 'EMPSTRNO', 'EMPSTRNAME', 'EMPCITY',
       'EMPSTATE', 'AMNT', 'MATCHAMNT', 'PREVAMNT', 'PAY_METHOD', 'INTERMNO',
       'INTERMNAME', 'INTSTRNO', 'INTSTRNM', 'INTAPTNO', 'INTCITY', 'INTST',
       'INTZIP', 'INTEMPNAME', 'INTEMPSTNO', 'INTEMPSTNM', 'INTEMPCITY',
       'INTEMPST', 'INTOCCUPA', 'PURPOSECD', 'EXEMPTCD', 'ADJTYPECD',
       'INT_C_CODE'],
      dtype='object')

In [10]:
# Read in the election results data
electionRes = pd.read_excel('data/2017_NYC_ElectionResults.xlsx')

In [11]:
electionRes.columns

Index(['AD', 'ED', 'County', 'Office/Position Title', 'District Key',
       'VoteFor', 'Unit Name', 'Tally', 'RecipId'],
      dtype='object')

In [12]:
# Aggregating contributions at the recipeint level by sum, count, mean
candFunds = contrib.groupby(['RECIPID','RECIPNAME','OFFICECD'])['AMNT'].agg(
                            ['sum','count','mean']).reset_index()

In [13]:
# Renaming column names to something more meaningful
candFunds.rename(columns={'sum':'TotalAmt','count':'TotNumContributions',
                          'mean':'AvgAmtContrib'}, inplace=True)
candFunds.columns

Index(['RECIPID', 'RECIPNAME', 'OFFICECD', 'TotalAmt', 'TotNumContributions',
       'AvgAmtContrib'],
      dtype='object')

In [14]:
candVotes = electionRes.groupby(['RecipId','Office/Position Title','Unit Name',
                                 'District Key'])['Tally'].sum().reset_index()

candVotes.columns

Index(['RecipId', 'Office/Position Title', 'Unit Name', 'District Key',
       'Tally'],
      dtype='object')

In [15]:
results = pd.merge(candFunds,candVotes,how='inner',
                   left_on='RECIPID', right_on='RecipId')
results.columns

Index(['RECIPID', 'RECIPNAME', 'OFFICECD', 'TotalAmt', 'TotNumContributions',
       'AvgAmtContrib', 'RecipId', 'Office/Position Title', 'Unit Name',
       'District Key', 'Tally'],
      dtype='object')

In [16]:
# wholesale preparation of columndatasource from a dataframe
resultsCDS = ColumnDataSource(results)
resultsCDS.column_names

['index',
 'RECIPID',
 'RECIPNAME',
 'OFFICECD',
 'TotalAmt',
 'TotNumContributions',
 'AvgAmtContrib',
 'RecipId',
 'Office/Position Title',
 'Unit Name',
 'District Key',
 'Tally']

In [17]:
myTools = 'box_select,lasso_select,box_zoom,reset,pan'

myScat1 = figure(title="Amount Raised and Votes",
                width=400, height=300,
                tools=myTools)
myScat1.circle(x='TotalAmt', y='Tally', size=3, color='blue',
               source=resultsCDS)

myScat2 = figure(title="Number of Contributions and Votes",
                width=400, height=300,
                tools=myTools,
                 # adding linked panning
                y_range=myScat1.y_range)
myScat2.circle(x='TotNumContributions', y='Tally', size=3, color='blue',
               # using same columndatasource in both charts allows linked brushing
              source=resultsCDS)

myScat3 = figure(title="Number of Contributions and Votes",
                width=400, height=300,
                tools=myTools,
                 # adding linked panning
                y_range=myScat1.y_range)
myScat3.circle(x='TotNumContributions', y='Tally', size=3, color='blue',
               # using same columndatasource in both charts allows linked brushing
              source=resultsCDS)


In [18]:
output_notebook()
show(myScat3)

In [None]:
from bokeh.layouts import row, gridplot

output_notebook()
show(gridplot([myScat1,myScat3],
              [myScat2]))