In [1]:
import pandas as pd
import sqlalchemy as sa
import numpy as np

import bokeh.io as bio
from bokeh.plotting import figure as bfig
from bokeh.models import WMTSTileSource, ColumnDataSource, LinearColorMapper, HoverTool
from bokeh.tile_providers import CARTODBPOSITRON

### Generate Data

In [40]:
###### make dataframe from Regents Database

engine = sa.create_engine('postgresql+psycopg2://pguser:pguser@b4h4r.online:5433/doedata')
meta = sa.MetaData()

regents_query = "SELECT schools.dbn, schools.school_name, schools.latitude, schools.longitude, \
                    regents.school_year, regents.test_name, variables.var_name, regents.test_num, \
                    regents.mean_score, regents.num_lt_65, regents.num_gt_65, regents.num_gt_80 \
                FROM schools \
                    INNER JOIN Regents on regents.dbn = schools.dbn \
                    INNER JOIN variables ON regents.school_demo_id = variables.var_id;"

regents_df = pd.read_sql_query(regents_query, engine)



regents_df.head()

Unnamed: 0,dbn,school_name,latitude,longitude,school_year,test_name,var_name,test_num,mean_score,num_lt_65,num_gt_65,num_gt_80
0,01m034,p.s. 034 franklin d. roosevelt,40.726473,-73.975181,2015,living environment,all students,16,77.9,1,15,7
1,01m034,p.s. 034 franklin d. roosevelt,40.726473,-73.975181,2016,living environment,all students,9,74.0,1,8,2
2,01m140,p.s. 140 nathan straus,40.71925,-73.983056,2015,living environment,all students,9,67.4,3,6,0
3,01m140,p.s. 140 nathan straus,40.71925,-73.983056,2016,living environment,all students,15,72.6,2,13,5
4,01m140,p.s. 140 nathan straus,40.71925,-73.983056,2017,living environment,all students,9,64.4,5,4,1


In [39]:
# for i in regents_df['latitude'].index:
#     regents_df['latitude'][i] = float(regents_df['latitude'][i])
#     regents_df['longitude'][i] = float(regents_df['longitude'][i])

# regents_df['latitude'] = regents_df['latitude'].apply(lambda x: float(x) if type(x)==str else x)

# print(type(regents_df['latitude'][10]))
# print(regents_df['latitude'][:10])

<class 'numpy.float64'>
0    40.726473
1    40.726473
2    40.719250
3    40.719250
4    40.719250
5    40.711549
6    40.711549
7    40.711549
8    40.711549
9    40.719598
Name: latitude, dtype: float64


In [42]:
k = 6378137
regents_df['latitude'] = regents_df['latitude'].apply( lambda x: np.log(np.tan((90+float(x)) * np.pi/360.0))*k if type(x)==str else x)
regents_df['longitude'] = regents_df['longitude'].apply( lambda x: float(x) * (k * np.pi/180) if type(x)==str else x )
regents_df['mean_score'] = regents_df['mean_score'].apply( lambda x: float(x) )

int_cols = ['school_year','num_lt_65','test_num','num_gt_65','num_gt_80']
regents_df[int_cols] = regents_df[int_cols].applymap( lambda x: int(x) )

regents_df['pct_lt_65'] = regents_df['num_lt_65']/regents_df['test_num']*100
regents_df['pct_gt_65'] = regents_df['num_gt_65']/regents_df['test_num']*100
regents_df['pct_gt_80'] = regents_df['num_gt_80']/regents_df['test_num']*100

### SET MAPPING PARAMETERS

In [43]:
###### Set search parameters
test_stat = 'pct_gt_65'
demo_var = 'black'
test_name = 'common core algebra'
school_year = 2016

###### Set map parameters
padding = 10

south = np.min(regents_df['latitude']) - padding
north = np.max(regents_df['latitude']) + padding
west = np.min(regents_df['longitude']) - padding
east = np.max(regents_df['longitude']) + padding

spectrum_low = 0
spectrum_high = 100

In [46]:
###### Refine dataframe based on search parameters

source_df = regents_df[regents_df['school_year'] == school_year]
source_df = source_df[source_df['var_name'] == demo_var]
source_df = source_df[source_df['test_name'] == test_name]

source_df = source_df[['dbn','school_name','longitude','latitude',test_stat]]

# source_df.head()

data_source = ColumnDataSource(source_df)

In [47]:
###### Create map of data

p = bfig(tools='pan, wheel_zoom', x_range=(west,east), y_range=(south,north), 
           x_axis_type="mercator", y_axis_type="mercator")
p.add_tile(CARTODBPOSITRON)

color_map = LinearColorMapper(palette='Magma256', low=spectrum_low, high=spectrum_high)

p.circle(x='longitude', y='latitude', source=data_source, fill_color={'field':test_stat, 'transform':color_map}, size=10)
p.add_tools(HoverTool(tooltips=[('DBN','@dbn'),('Name','@school_name'),('Score','@{0}'.format(test_stat))]))
bio.show(p)