In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import json
import matplotlib
from bokeh.plotting import figure, output_file, show, hplot
from bokeh.models import Rect, HoverTool, ColumnDataSource
from collections import OrderedDict
import zipfile
import codecs

%matplotlib inline

In [32]:
df = pd.read_csv('Data/Community_Workers_By_Postcode.csv',
                 skiprows=3)
reference = pd.read_csv('Data/School_Completions_By_Postcode.csv',
                        skiprows=3)

df = df[df.columns.values[1:-1]]
df.rename(columns = {'Occupation (OCCP)':'Postcode'}, inplace=True)
df = df[~(pd.isnull(df['Diversional Therapists']))].reset_index()
del(df['index'])
comcols = df.columns.values[1:-1]

reference = reference[pd.notnull(reference['Total'])]
reference = reference[[
        'Highest Year of School Completed (HSCP)','Total','Year 12 or equivalent',
        'Year 11 or equivalent','Year 10 or equivalent','Year 9 or equivalent',
        'Year 8 or below','Did not go to school'
    ]]
reference.columns = ['Postcode','Total Population','Year 12','Year 11','Year 10','Year 9','< Year 9','No School']

df = pd.merge(df,reference,how="left")
df['postcode'] = df['Postcode'].str.split(',').apply(lambda x: x[0])

In [33]:
suburbs = pd.read_csv('Data/postcodes_suburbs.csv')

suburbs = suburbs[['postcode','suburb','state']]
suburbs['postcode'] = suburbs['postcode'].astype(str).str.zfill(4)

suburbs = suburbs[pd.notnull(suburbs['suburb'])]
allsubs = suburbs.groupby('postcode')['suburb'].apply(lambda x: "%s" % ', '.join(x)).reset_index()
allsubs = pd.merge(allsubs,suburbs[['postcode','state']].drop_duplicates(),how="left").drop_duplicates()
#allsubs = allsubs.groupby('postcode')['state'].apply(lambda x: "%s" % ', '.join(x)).reset_index()

df = pd.merge(df,allsubs[['postcode','suburb']],how="left")

df['suburb'] = df['suburb'].apply(lambda x: ','.join(str(x).split(',')[:3]))
df.loc[df['postcode']=='2000','new_sub'] = 'SYDNEY CBD'
df.loc[df['postcode']=='3000','new_sub'] = 'MELBOURNE CBD'
df.loc[df['postcode']=='4000','new_sub'] = 'BRISBANE CBD'
df.loc[df['postcode']=='5000','new_sub'] = 'ADELAIDE CBD'
df.loc[df['postcode']=='6000','new_sub'] = 'PERTH CBD'

In [37]:
def get_state(x):
    try:
        return x[1][1:]
    except:
        return "Multiple States"

def get_city(pc):
    try:
        x = int(pc)
    except:
        return 'Not a valid postcode'
    if x >=2000 and x <= 2234:
        return 'Syd'
    elif x >= 2600 and x <= 2609:
        return 'Can'
    elif x >= 3000 and x <= 3207:
        return 'Mel'
    elif x >= 4000 and x <= 4207:
        return 'Bri'
    elif x in [4300,4301,4302,4303,4304,4305,4500,4501,4502,4503,
                  4504,4505,4506,4507,4508,4509,4510,4511,4512,4513,
                  4514,4515,4516,4517,4518,4519]:
        return 'Bri'
    elif x >= 5000 and x <= 5199:
        return 'Ade'
    elif x >= 6000 and x <= 6199:
        return 'Per'
    elif x >= 7000 and x <= 7099:
        return 'Hob'
    else:
        return 'Non-5Cap'
    
df['state'] = df['Postcode'].str.split(',').apply(lambda x: get_state(x))
df['city'] = df['postcode'].apply(lambda x: get_city(x))

care_workers = [
    'Enrolled and Mothercraft Nurses','Indigenous Health Workers','Welfare Support Workers',
    'Carers and Aides nfd','Child Carers','Education Aides','Personal Carers and Assistants nfd',
    'Aged and Disabled Carers','Nursing Support and Personal Care Workers','Special Care Workers',
]

df['care_workers'] = df[care_workers].sum(axis=1)

df['carers_per_1000'] = df['care_workers']/df['Total Population']*1000

domestic_carers = [
    'Carers and Aides nfd','Child Carers','Education Aides','Personal Carers and Assistants nfd',
    'Aged and Disabled Carers','Nursing Support and Personal Care Workers','Special Care Workers',
    'Community and Personal Service Workers nfd'
]

df['domestic_carers'] = df[domestic_carers].sum(axis=1)
df['domestic_carers_per_1000'] = df['domestic_carers'] / df['Total Population'] * 1000

till_Y10 = ['Year 10','Year 9','< Year 9','No School']

df['Till_Y10'] = df[till_Y10].sum(axis=1)
df['Till_Y10_Prop'] = df['Till_Y10'] / df['Total Population']

In [7]:
bytereader = codecs.getreader("utf-8")

with zipfile.ZipFile('Data/postcodes_shapes.json.zip') as izip:
    with izip.open('postcodes_shapes.json','r') as infile:
        postcode_locs = json.load(bytereader(infile))
    
colour_df = pd.DataFrame()
colour_df['postcode'] = postcode_locs.keys()

def set_hue(cdf,selected_var, cmaptype = matplotlib.cm.GnBu):
    cdf = pd.merge(cdf, df[['postcode',selected_var]],how="left")
    colours = cmaptype(cdf[
            selected_var
        ]/cdf[
            selected_var
        ].max())
    for pos,val in enumerate(['R','G','B','Int']):
        cdf[val] = colours[:,pos]
    cdf['rgb'] = list(zip(cdf['R'],cdf['G'],cdf['B'],cdf['Int']))
    cdf['hex'] = cdf['rgb'].apply(lambda x: matplotlib.colors.rgb2hex(x))
    return cdf

In [24]:
postcode_locs = {outerKey: values for outerKey, innerDict in postcode_locs.items() for innerKey, values in innerDict.items()}

pcdf = pd.DataFrame(postcode_locs).T.reset_index()
pcdf.columns = ['postcode','lat_list','lng_list']

In [25]:
ugh = pcdf[['lat_list','lng_list','postcode']]

ugh['lat_length'] = ugh['lat_list'].apply(lambda x: len(x))
ugh['lng_length'] = ugh['lng_list'].apply(lambda x: len(x))

bug = pcdf[['postcode','lng_list','lat_list']]

def ret_from_list(x,pos):
    try:
        return x[pos]
    except:
        return 'None'

for i in range(100):
    bug['lng_list_'+str(i)] = bug['lng_list'].apply(lambda x: ret_from_list(x,i))
    bug['lat_list_'+str(i)] = bug['lat_list'].apply(lambda x: ret_from_list(x,i))

lat_cols = [col for col in bug.columns.values if 'lat_list_' in col]
lng_cols = [col for col in bug.columns.values if 'lng_list_' in col]

alat = bug[lat_cols+['postcode']]
alng = bug[lng_cols+['postcode']]

alat = alat.set_index('postcode').stack().reset_index()
alng = alng.set_index('postcode').stack().reset_index()

alat.columns = ['postcode','level','lat_list']
alng.columns = ['postcode','level','lng_list']

alat = alat[alat['lat_list'] != 'None']
alng = alng[alng['lng_list'] != 'None']

all_dims = pd.merge(alat[['postcode','lat_list']],alng[['postcode','lng_list']], how="outer")

all_dims.columns = ['postcode','lat_list','lng_list']

del(alng)
del(alat)
del(bug)
del(ugh)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-

In [26]:
def plot_map_from_df(output_file_name, variable, scope, geo_filter=None, size=1000, cmap_select = matplotlib.cm.GnBu):
    '''
Creates an html file with a choropleth map of postcodes in a selected region

Variables: 
output_file_name - the location of the file to be saved
variable - numeric variable against which to shade the postcodes
scope - determines how large an area is covered by the generated map
geo_filter - if a scope is given, this specifies the area
size = how big in pixels the resulting map should be
    '''
    if scope.lower() == 'state':
        curtailed = df[df['state'].str.lower() == geo_filter.lower()]
        if geo_filter.lower() not in ['nsw','qld','vic','wa','tas','sa','nt','act']:
            print('Please select a valid state, or change the scope')
            return None
    elif scope.lower() == 'city':
        curtailed = df[df['city'].str.lower() == geo_filter.lower()[:3]]
        if geo_filter.lower()[:3] not in ['syd','mel','bri','ade','can','per','hob']:
            print('Please select a valid capital city, or change the scope')
            return None
    elif scope.lower() == 'national':
        curtailed = df.loc[pd.notnull(df['postcode'].apply(lambda x: float(x)))]
    else:
        print('Please select a valid scope: city, state or national')
    colors = set_hue(curtailed[['postcode']], variable, cmap_select)
    colors = pd.merge(colors, all_dims, how="left")
    colors = pd.merge(colors, df[['postcode','state','suburb']])
    data_source = ColumnDataSource (
    data = dict(
            lat_list=colors['lat_list'],
            lng_list=colors['lng_list'],
            color=colors['hex'],
            name=colors['postcode'],
            data=colors[variable],
            suburbs=colors['suburb'],
            state=colors['state']
    )
    )
    output_file(output_file_name)
    vaariable = '$'+variable
    TOOLS="pan,wheel_zoom,box_zoom,reset,hover,save"
    if geo_filter == 'None':
        title_str = 'Australia'
    else:
        title_str = geo_filter
    p = figure(title=variable.capitalize()+" in "+title_str.capitalize()+" by Postcode", plot_width = size, plot_height=size, tools=TOOLS)
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    p.patches('lat_list',
              'lng_list',
              fill_color = 'color', fill_alpha=0.7,
              line_color = 'white', line_width = 0.5,
             source = data_source)
    hover = p.select(dict(type=HoverTool))
    hover.point_policy = "follow_mouse"
    hover.tooltips = OrderedDict([
            ("Name", "@name"),
            (variable, '@data'),
            ("Suburbs",'@suburbs'),
            ("State",'@state'),
    ])
    colors2 = colors[[variable,'hex']].drop_duplicates()
    legend=figure(width=100,height=1000)
    legend.toolbar_location=None
    legend.rect(x=0.5,y=colors2[variable],color=colors2['hex'],height=10,width=10)
    legend.xaxis.visible = None
    legend.xgrid.grid_line_color = None
    legend.ygrid.grid_line_color = None
    layout = hplot(p,legend)
    show(layout)

In [43]:
plot_map_from_df('sydney_health.html','domestic_carers_per_1000','state',geo_filter='NSW', cmap_select = matplotlib.cm.BuPu)

In [183]:
def plot_map(output_file_name, variable, scope, geo_filter=None, size=1000, cmap_select = matplotlib.cm.GnBu):
    '''
Creates an html file with a choropleth map of postcodes in a selected region

Variables: 
output_file_name - the location of the file to be saved
variable - numeric variable against which to shade the postcodes
scope - determines how large an area is covered by the generated map
geo_filter - if a scope is given, this specifies the area
size = how big in pixels the resulting map should be
    '''
    if scope.lower() == 'state':
        curtailed = df[df['state'].str.lower() == geo_filter.lower()]
        if geo_filter.lower() not in ['nsw','qld','vic','wa','tas','sa','nt','act']:
            print('Please select a valid state, or change the scope')
            return None
    elif scope.lower() == 'city':
        curtailed = df[df['city'].str.lower() == geo_filter.lower()[:3]]
        if geo_filter.lower()[:3] not in ['syd','mel','bri','ade','can','per','hob']:
            print('Please select a valid capital city, or change the scope')
            return None
    elif scope.lower() == 'national':
        curtailed = df.loc[pd.notnull(df['postcode'].apply(lambda x: float(x)))]
    else:
        print('Please select a valid scope: city, state or national')
    colors = set_hue(curtailed[['postcode']], variable, cmap_select)
    output_file(output_file_name)
    hover = HoverTool(tooltips=[
            ('Postcode','$postcode'),
            (variable,'$'+variable)
        ])
    p = figure(plot_width = size, plot_height=size, tools=[hover])
    p.xgrid.grid_line_color = None
    p.ygrid.grid_line_color = None
    for pc in [a for a in postcode_locs.keys() if a in list(curtailed['postcode'])]:
        p.patches(postcode_locs[pc][pc]['lat_list'],
                  postcode_locs[pc][pc]['lng_list'],
                  color = colors.loc[colors['postcode']==pc,'hex'])
    colors = colors[[variable,'hex']].drop_duplicates()
    legend=figure(width=100,height=1000)
    legend.toolbar_location=None
    legend.rect(x=0.5,y=colors[variable],color=colors['hex'],height=10,width=10)
    legend.xaxis.visible = None
    legend.xgrid.grid_line_color = None
    legend.ygrid.grid_line_color = None
    layout = hplot(p,legend)
    show(layout)

In [44]:
df.columns.values

array(['Postcode', 'Community and Personal Service Workers nfd',
       'Health and Welfare Support Workers nfd',
       'Ambulance Officers and Paramedics',
       'Dental Hygienists, Technicians and Therapists',
       'Diversional Therapists', 'Enrolled and Mothercraft Nurses',
       'Indigenous Health Workers', 'Massage Therapists',
       'Welfare Support Workers', 'Carers and Aides nfd', 'Child Carers',
       'Education Aides', 'Personal Carers and Assistants nfd',
       'Aged and Disabled Carers', 'Dental Assistants',
       'Nursing Support and Personal Care Workers', 'Special Care Workers',
       'Hospitality Workers nfd', 'Bar Attendants and Baristas',
       'Cafe Workers', 'Gaming Workers', 'Hotel Service Managers',
       'Waiters', 'Other Hospitality Workers',
       'Protective Service Workers nfd',
       'Defence Force Members, Fire Fighters and Police nfd',
       'Defence Force Members - Other Ranks', 'Fire and Emergency Workers',
       'Police', 'Prison and Sec