In [1]:
import pandas as pd
import numpy as np
import geopandas

# Load and Prepare the Data

In [2]:
# load the census statistical data into a pandas.DataFrame

col_names = {
    'GEO_CODE (POR)':'DAUID',
    'GEO_LEVEL':'level',
    'DIM: Profile of Dissemination Areas (2247)':'stat',
    'Dim: Sex (3): Member ID: [1]: Total - Sex':'num'
}
col_dtypes = {
    'GEO_CODE (POR)':'uint32',
    'GEO_LEVEL':'uint8',
    'DIM: Profile of Dissemination Areas (2247)':'str',
    'Dim: Sex (3): Member ID: [1]: Total - Sex':'str'
}

df_raw = pd.read_csv('98-401-X2016044_BRITISH_COLUMBIA_English_CSV_data.csv', usecols=col_names.keys(), dtype=col_dtypes).rename(columns=col_names)
df_raw = df_raw[df_raw['level'] == 4]
df_raw = df_raw.set_index(['stat', 'DAUID'])

In [3]:
df_raw

Unnamed: 0_level_0,Unnamed: 1_level_0,level,num
stat,DAUID,Unnamed: 2_level_1,Unnamed: 3_level_1
"Population, 2016",59010237,4,0
"Population, 2011",59010237,4,...
"Population percentage change, 2011 to 2016",59010237,4,...
Total private dwellings,59010237,4,0
Private dwellings occupied by usual residents,59010237,4,0
...,...,...,...
Migrants,59590015,4,15
Internal migrants,59590015,4,15
Intraprovincial migrants,59590015,4,15
Interprovincial migrants,59590015,4,0


In [4]:
# remove rows with missing or ill-formatted data

df_raw = df_raw[df_raw['num'] != '...']
df_raw = df_raw[df_raw['num'] != '..']
df_raw = df_raw[df_raw['num'] != 'x']
df_raw = df_raw[df_raw['num'] != 'F']

# get a pandas.Series of just the statistical data as floats; we don't need the other stuff
data = df_raw['num'].astype('float')

In [5]:
# load the shapes of each dissemination area into a geopandas.GeoDataFrame

gdf = geopandas.read_file('lda_000b16a_e.shp')
gdf = gdf[(gdf['CCSNAME']=='Richmond') & (gdf['CDNAME']=='Greater Vancouver')]
gdf['DAUID'] = gdf['DAUID'].astype('uint32')
gdf = gdf.set_index('DAUID') # set index to ID of dissemination units


In [6]:
# labels for differents stats we're interested in
l_dupa = 'dwelling units per acre'
l_occ = 'occupation rate'
l_afford = 'unaffordability rate'
l_commutetime = 'fraction with commute < 30 min'
l_commutedriving = 'fraction who drive to work'

# compute the statistics and add them to the GeoDataFrame
gdf = gdf.join(pd.DataFrame({
    l_dupa:(data['Population density per square kilometre']/543.0),
    l_occ:(data['Private dwellings occupied by usual residents']/data['Total private dwellings']),
    l_afford:(data['Spending 30% or more of income on shelter costs']/data['Total -  Owner and tenant households with household total income greater than zero, in non-farm, non-reserve private dwellings by shelter-cost-to-income ratio - 25% sample data']),
    l_commutetime:((data['Less than 15 minutes']+data['15 to 29 minutes'])/data['Total - Commuting duration for the employed labour force aged 15 years and over in private households with a usual place of work or no fixed workplace address - 25% sample data']),
    l_commutedriving:((data['Car, truck, van - as a driver']+data['Car, truck, van - as a passenger'])/data['Total - Main mode of commuting for the employed labour force aged 15 years and over in private households with a usual place of work or no fixed workplace address - 25% sample data']),
}))

In [7]:
# this is a way to search the names of different statistics...
vals = pd.Series(data.index.get_level_values(0))
vals[vals.str.contains('Total - Main mode of commuting')].unique()

array(['Total - Main mode of commuting for the employed labour force aged 15 years and over in private households with a usual place of work or no fixed workplace address - 25% sample data'],
      dtype=object)

In [8]:
# we are especially interested in these dissemination areas
chosen_das = [
    59151095,
    59151098,
    59151038,
    59151050,
    59151102,
    59153488,
    59151057,
    59151149,
    59153408
]

# Interactive Maps

### Density

In [9]:
gdf.explore(l_dupa, vmin=0, vmax=30, tooltip=['DAUID', l_dupa])

In [10]:
gdf.explore(l_dupa, cmap='PiYG', vmin=-15, vmax=35, tooltip=['DAUID', l_dupa])


In [11]:
gdf[np.isin(gdf.index, chosen_das)].explore(l_dupa, vmin=0, vmax=18, tooltip=['DAUID', l_dupa])

### Occupation Rate

In [12]:
gdf.explore(l_occ, tooltip=['DAUID', l_occ])

### Affordability Rate

In [13]:
gdf.explore(l_afford, tooltip=['DAUID', l_afford])

In [14]:
gdf[np.isin(gdf.index, chosen_das)].explore(l_afford, tooltip=['DAUID', l_afford])

### Fraction who Commute by Car

In [15]:
gdf.explore(l_commutedriving, tooltip=['DAUID', l_commutedriving])

In [16]:
gdf[np.isin(gdf.index, chosen_das)].explore(l_commutedriving, tooltip=['DAUID', l_commutedriving])

### Commute Time

In [17]:
gdf.explore(l_commutetime, tooltip=['DAUID', l_commutetime])