In [1]:
# # install correct version of plotly
# !pip install plotly==4.13.0
# # Important library for many geopython libraries
# !apt install gdal-bin python-gdal python3-gdal 
# # Install rtree - Geopandas requirment
# !apt install python3-rtree 
# # Install Geopandas
# !pip install git+git://github.com/geopandas/geopandas.git
# # Install descartes - Geopandas requirment
# !pip install descartes 
# # Install Folium for Geographic data visualization
# !pip install folium
# # Install plotlyExpress
# !pip install plotly_express
# # Install contextily to make our map
# !pip install contextily

# !pip install plotly-geo==1.0.0
# !pip install dash==1.17.0

# import contextily as ctx
import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point
import matplotlib
import matplotlib.pyplot as plt 
import folium

# plotly
import plotly_express as px
import plotly.graph_objects as go
# from plotly import graph_objects as go



# geojson must be unwound before plotting
import json
# !pip install geojson_rewind
from geojson_rewind import rewind

# UK

In [2]:
baseline_file="https://raw.githubusercontent.com/fedderw/uk-ubi-regional-maps/main/baseline_regions.csv"
ubi_file="https://raw.githubusercontent.com/fedderw/uk-ubi-regional-maps/main/ubi_regions.csv"
region_geojson = "https://raw.githubusercontent.com/fedderw/uk-ubi-regional-maps/main/uk_regions.geojson"

In [3]:
def import_uk_region_boundaries():
  region_geojson = "https://raw.githubusercontent.com/fedderw/uk-ubi-regional-maps/main/uk_regions.geojson"
  # read geojson file as GeoPandas GeoDataframe
  gdf = gpd.read_file(region_geojson)
  # create dict that maps region names from ubi file to correspond with gdf
  region_name_dict = {'North East' : 'NORTH_EAST',
 'North West' : 'NORTH_WEST',
 'Yorkshire and the Humber' : 'YORKSHIRE',
 'East Midlands' : 'EAST_MIDLANDS',
 'West Midlands' : 'WEST_MIDLANDS',
 'East' : 'EAST_OF_ENGLAND',
 'London' : 'LONDON',
 'South East' : 'SOUTH_EAST',
 'South West' : 'SOUTH_WEST',
 'Northern Ireland' : 'NORTHERN_IRELAND',
 'Scotland' : 'SCOTLAND',
 'Wales' : 'WALES'}
  # replace values in col using above dict
  gdf.replace({'rgn19nm':region_name_dict},inplace=True)

  # change column name to match ubi col
  gdf.rename({'rgn19nm':'region_name'},axis=1,inplace=True)

  # set index to region name
  gdf.set_index('region_name',inplace=True)

  return gdf

# assign GeoDataFrame to regions
regions = import_uk_region_boundaries()
regions.head()

Unnamed: 0_level_0,objectid,rgn19cd,bng_e,bng_n,long,lat,st_areashape,st_lengthshape,ctry19nmw,geometry
region_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
NORTH_EAST,1,E12000001,417314,600356,-1.72888,55.297009,8597298000.0,1032344.0,,"MULTIPOLYGON (((-2.03000 55.77000, -2.02000 55..."
NORTH_WEST,2,E12000002,350014,506279,-2.77239,54.44944,14164870000.0,2387418.0,,"MULTIPOLYGON (((-2.53000 55.08000, -2.69000 55..."
YORKSHIRE,3,E12000003,446902,448736,-1.28714,53.93264,15408420000.0,1392569.0,,"MULTIPOLYGON (((-0.79000 54.56000, -0.80000 54..."
EAST_MIDLANDS,4,E12000004,477659,322635,-0.84969,52.795719,15645040000.0,1388752.0,,"MULTIPOLYGON (((-0.30000 53.61000, -0.31000 53..."
WEST_MIDLANDS,5,E12000005,386294,295477,-2.20358,52.556969,13003740000.0,953775.0,,"POLYGON ((-1.96000 53.22000, -1.98000 53.22000..."


In [4]:
# use geopandas default ploting tool to check shape
regions.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x1ada53a5c88>

In [5]:
def import_grouped_region(url):
  # import grouped data before ubi simulation is run
  data = pd.read_csv(url)
  # use the unique region name as index
  data.set_index('region_name', inplace=True)
  return data

# Import Nikhil's ubi_regions data

In [6]:
def import_grouped_region_gdf(url):
  # import grouped data before ubi simulation is run
  data = pd.read_csv(url)
  # use the unique region name as index
  data.set_index('region_name', inplace=True)
  
  # merge geometry and region ID onto dataframe
  merged_gdf = gpd.GeoDataFrame(
      data.merge(
      regions[['rgn19cd','geometry']],
      how='left',
      left_index=True,
      right_index=True
      ),crs=regions.crs
    )
  return merged_gdf




In [7]:
# import file for full ubi+dis
ubi = import_grouped_region_gdf(ubi_file)
# import file for baseline
baseline = import_grouped_region_gdf(baseline_file)

In [8]:
ubi.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 12 entries, EAST_MIDLANDS to YORKSHIRE
Data columns (total 32 columns):
adults_in_household                12 non-null float64
children_in_household              12 non-null float64
country                            12 non-null int64
household_equivalisation_ahc       12 non-null float64
household_equivalisation_bhc       12 non-null float64
household_id                       12 non-null float64
household_weight                   12 non-null float64
in_poverty_ahc                     12 non-null float64
in_poverty_bhc                     12 non-null float64
people_in_household                12 non-null float64
region                             12 non-null float64
seniors_in_household               12 non-null float64
working_age_adults_in_household    12 non-null float64
council_tax                        12 non-null float64
housing_costs                      12 non-null float64
housing_type                       12 non-null int64

In [9]:
baseline.info()

<class 'geopandas.geodataframe.GeoDataFrame'>
Index: 12 entries, EAST_MIDLANDS to YORKSHIRE
Data columns (total 31 columns):
adults_in_household                12 non-null float64
children_in_household              12 non-null float64
country                            12 non-null int64
household_equivalisation_ahc       12 non-null float64
household_equivalisation_bhc       12 non-null float64
household_id                       12 non-null float64
household_weight                   12 non-null float64
in_poverty_ahc                     12 non-null float64
in_poverty_bhc                     12 non-null float64
people_in_household                12 non-null float64
region                             12 non-null float64
seniors_in_household               12 non-null float64
working_age_adults_in_household    12 non-null float64
council_tax                        12 non-null float64
housing_costs                      12 non-null float64
housing_type                       12 non-null int64

In [10]:

baseline.head().T

region_name,EAST_MIDLANDS,EAST_OF_ENGLAND,LONDON,NORTHERN_IRELAND,NORTH_EAST
adults_in_household,1.76516,1.7635,1.88008,1.70203,1.67179
children_in_household,0.526439,0.485413,0.675048,0.553297,0.512821
country,0,0,0,0,0
household_equivalisation_ahc,0.945334,0.934413,0.941741,0.928052,0.904359
household_equivalisation_bhc,0.975078,0.964749,0.977601,0.962231,0.941282
household_id,1.09734e+06,1.09643e+06,1.0952e+06,1.09726e+06,1.09357e+06
household_weight,1556.18,1599.68,2268.67,363.697,1489.42
in_poverty_ahc,0.18818,0.170701,0.294004,0.147248,0.217949
in_poverty_bhc,0.16563,0.13532,0.176015,0.143778,0.182051
people_in_household,2.2916,2.24891,2.55513,2.25533,2.18462


I wasn't sure which columns where relevent, so here looked for columns with values that changed between the two files and added those columns to a list

In [11]:
def diff_cols(df,region="EAST_MIDLANDS"):
    """
    Identify which columns are changing in df from baseline
    
    args:
        region: of uk to use as an example
        
    
    """
    # init empty list for
    diff_cols = []

    # figure out which columns' values change between the two dataframes
    for col in df.select_dtypes(['int','float']).columns:
      if col in baseline.select_dtypes(['int','float']).columns:
        df_region = df.loc[region,col]
        baseline_region = baseline.loc[region,col]
        diff = df_region-baseline_region
        if diff!=0:
          diff_cols.append(col)
#       print(str(col)+": "+str(diff))
    
    return diff_cols

In [12]:
def rel_change_reform(data, cols=[], * args):
  '''this function calculates the relative change between specified columns
   from one simulation to the baseline and returns a GeoDataFrame object
   that includes all columns in input dataframe + new columns
  with relative changes'''
  newgdf = data.copy(deep=True)
  for col in cols:
    # name new string
    pct_change_string = str(col)+"_pct_change"
    net_change_string = str(col)+"_net_change"
    newgdf[pct_change_string] = (newgdf[col]-baseline[col])/baseline[col]*100
    newgdf[net_change_string] = newgdf[col]-baseline[col]
  return newgdf

# rel_change_reform(ubi, diff_cols)

In [13]:
full_dis = rel_change_reform(ubi, diff_cols(ubi))
full_dis.head().T

region_name,EAST_MIDLANDS,EAST_OF_ENGLAND,LONDON,NORTHERN_IRELAND,NORTH_EAST
adults_in_household,1.76516,1.7635,1.88008,1.70203,1.67179
children_in_household,0.526439,0.485413,0.675048,0.553297,0.512821
country,0,0,0,0,0
household_equivalisation_ahc,0.945334,0.934413,0.941741,0.928052,0.904359
household_equivalisation_bhc,0.975078,0.964749,0.977601,0.962231,0.941282
household_id,1.09734e+06,1.09643e+06,1.0952e+06,1.09726e+06,1.09357e+06
household_weight,1556.18,1599.68,2268.67,363.697,1489.42
in_poverty_ahc,0.0902022,0.0924891,0.168279,0.0505702,0.120513
in_poverty_bhc,0.0979782,0.0856611,0.110896,0.0614774,0.10641
people_in_household,2.2916,2.24891,2.55513,2.25533,2.18462


# Plotly Dropdown

In [14]:
import re

This cell currently works, I'll turn this into a function once I make some other changes.

In [15]:


cols_dd = list(full_dis.iloc[:,32:].columns)

in_poverty_cols = ['in_poverty_ahc_pct_change',
'in_poverty_ahc_net_change',
'in_poverty_bhc_pct_change',
'in_poverty_bhc_net_change']

# we need to add this to select which trace 
# is going to be visible
visible = np.array(cols_dd)

# define traces and buttons at once
# initialize empty lists
traces = []
buttons = []

#Populate regions['properties'] (i.e. convert to plotly-readible geojson-type)
regions_unwound = json.loads(regions.to_json())
# rewind the unwound geojson
regions_rewound = rewind(regions_unwound,rfc7946=False)

# for loop to interate through targeted columns
for value in cols_dd:
  # append object to traces
  traces.append(
      # create graph object
      go.Choropleth(
          # spatial coordinates ??
          locations=regions.index, 
          # geojson for boundaries
          geojson = regions_rewound,
          # data to color code
          z=full_dis[value].astype(float).round(2),
          # set title of color bar
          colorbar_title=value,
          # set color scale
          colorscale= "Viridis" if value in in_poverty_cols else "PiYG",
          # set midpoint of color scale value to be 0
          zmid= False if value in in_poverty_cols else 0,
          # color_continuous_scale=px.colors.diverging.RdBu,
          # colorscale_midpoint=0,
          # check if this variable is the first 
          # in the list to display by default.
          visible= True if value==cols_dd[0] else False,
          # hovertemplate = 'Average Income Change: %{z:£.2f}<extra></extra>'if re.match(r'income?.*abs?', value) else z
          ))
  
  # append dict objects to list of buttons
  buttons.append(
      dict(
          label=value,
          method="update",
          args=[{"visible":list(visible==value)},
                {"title":f"<b>{value}</b>"}])) # ??

# define updatemenus object
updatemenus = [{"active":0, # ??
                "buttons":buttons,
              }]

# now we define fig
fig = go.Figure(
    data=traces,
    layout=dict(updatemenus=updatemenus)
    )


# This is in order to get the first title displayed correctly
first_title = cols_dd[0]

# set projection type, turn off base map to just show regional boundaries
fig.update_geos(
      projection_type="mercator",
      fitbounds="locations",
      visible=False
  )

fig.update_layout(
      plot_bgcolor="#323130",
      # height=300,
      margin={"r":0,"t":0,"l":0,"b":0},
      title=f"<b>{first_title}</b>",
      title_x=0.5,
      title_y=0.05
      )  



# display figure
fig.show()

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.


## Function

In [16]:
def dropdown_choropleth(data, columns=[]):
  # make list of columns for dropdown
  cols_dd = list(full_dis.iloc[:,32:].columns)

  in_poverty_cols = ['in_poverty_ahc_pct_change',
  'in_poverty_ahc_abs_change',
  'in_poverty_bhc_pct_change',
  'in_poverty_bhc_abs_change']

  # we need to add this to select which trace 
  # is going to be visible
  visible = np.array(cols_dd)

  # define traces and buttons at once
  # initialize empty lists
  traces = []
  buttons = []

  #Populate regions['properties'] (i.e. convert to plotly-readible geojson-type)
  regions_unwound = json.loads(regions.to_json())
  # rewind the unwound geojson
  regions_rewound = rewind(regions_unwound,rfc7946=False)

  # for loop to interate through targeted columns
  for value in cols_dd:
    # append object to traces
    traces.append(
        # create graph object
        go.Choropleth(
            # spatial coordinates ??
            locations=regions.index, 
            # geojson for boundaries
            geojson = regions_rewound,
            # data to color code
            z=full_dis[value].astype(float).round(2),
            # set title of color bar
            colorbar_title=value,
            # set color scale
            colorscale= "Viridis" if value in in_poverty_cols else "PiYG",
            # set midpoint of color scale value to be 0
            zmid= False if value in in_poverty_cols else 0,
            # color_continuous_scale=px.colors.diverging.RdBu,
            # colorscale_midpoint=0,
            # check if this variable is the first 
            # in the list to display by default.
            visible= True if value==cols_dd[0] else False,
            # hovertemplate = 'Average Income Change: %{z:£.2f}<extra></extra>'if re.match(r'income?.*abs?', value) else z
            ))
    
    # append dict objects to list of buttons
    buttons.append(
        dict(
            label=value,
            method="update",
            args=[{"visible":list(visible==value)},
                  {"title":f"<b>{value}</b>"}])) # ??

  # define updatemenus object
  updatemenus = [{"active":0, # ??
                  "buttons":buttons,
                }]

  # now we define fig
  fig = go.Figure(
      data=traces,
      layout=dict(updatemenus=updatemenus)
      )


  # This is in order to get the first title displayed correctly
  first_title = cols_dd[0]

  # set projection type, turn off base map to just show regional boundaries
  fig.update_geos(
        projection_type="mercator",
        fitbounds="locations",
        visible=False
    )

  fig.update_layout(
        plot_bgcolor="#323130",
        # height=300,
        margin={"r":0,"t":0,"l":0,"b":0},
        title=f"<b>{first_title}</b>",
        title_x=0.5,
        title_y=0.05
        )  



  # display figure
  fig.show()

# Dash

In [17]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from jupyter_dash import JupyterDash

In [18]:
# def echo(message=None, file=sys.stdout, nl=True, err=False, color=None):

In [23]:
# external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = JupyterDash(__name__)
# app = dash.Dash(__name__)

cols_dd = list(full_dis.iloc[:,32:].columns)

in_poverty_cols = ['in_poverty_ahc_pct_change',
'in_poverty_ahc_net_change',
'in_poverty_bhc_pct_change',
'in_poverty_bhc_net_change']

# we need to add this to select which trace 
# is going to be visible
visible = np.array(cols_dd)

# define traces and buttons at once
# initialize empty lists
traces = []
buttons = []

#Populate regions['properties'] (i.e. convert to plotly-readible geojson-type)
regions_unwound = json.loads(regions.to_json())
# rewind the unwound geojson
regions_rewound = rewind(regions_unwound,rfc7946=False)

# for loop to interate through targeted columns
for value in cols_dd:
  # append object to traces
  traces.append(
      # create graph object
      go.Choropleth(
          # spatial coordinates ??
          locations=regions.index, 
          # geojson for boundaries
          geojson = regions_rewound,
          # data to color code
          z=full_dis[value].astype(float).round(2),
          # set title of color bar
          colorbar_title=value,
          # set color scale
          colorscale= "Greens_r" if value in in_poverty_cols else "PiYG",
          # set midpoint of color scale value to be 0
          zmid= False if value in in_poverty_cols else 0,
          # color_continuous_scale=px.colors.diverging.RdBu,
          # colorscale_midpoint=0,
          # check if this variable is the first 
          # in the list to display by default.
          visible= True if value==cols_dd[0] else False,
          # hovertemplate = 'Average Income Change: %{z:£.2f}<extra></extra>'if re.match(r'income?.*abs?', value) else z
          ))
  
  # append dict objects to list of buttons
  buttons.append(
      dict(
          label=value,
          method="update",
          args=[{"visible":list(visible==value)},
                {"title":f"<b>{value}</b>"}])) # ??

# define updatemenus object
updatemenus = [{"active":0, # ??
                "buttons":buttons,
              }]

# now we define fig
fig = go.Figure(
    data=traces,
    layout=dict(updatemenus=updatemenus)
    )


# This is in order to get the first title displayed correctly
first_title = cols_dd[0]

# set projection type, turn off base map to just show regional boundaries
fig.update_geos(
      projection_type="mercator",
      fitbounds="locations",
      visible=False
  )

fig.update_layout(
      plot_bgcolor="#323130",
      # height=300,
      margin={"r":0,"t":0,"l":0,"b":0},
      title=f"<b>{first_title}</b>",
      title_x=0.5,
      title_y=0.05
      )  




# app = dash.Dash()
app.layout = html.Div([
    dcc.Graph(figure=fig)
])

# Run app and display result inline in the notebook
app.run_server(mode='inline',port=8060)

# if __name__ == '__main__':
#     app.run_server(debug=True,
#                    use_reloader=False,
#                    port=8060
#                   )

# app.run_server(debug=True, use_reloader=False)  # Turn off reloader if inside Jupyter