This notebook uses the access library to calculate and plot spatial access metrics every census tract in Brazil. 

In [2]:
# Importing necessary libraries
import pandas as pd
import geopandas as gpd
import numpy as np
import matplotlib.pyplot as plt
from access import Access, weights, Datasets
import geopandas as gpd
from shapely import wkt
# Custom functions
from modified_access import *
Access.mod_three_stage_fca = class_mod_three_stage_fca


In [3]:
# Importing data
school_census_df = pd.read_csv("/Users/feliphlvo/Documents/Minerva/Capstone/data/local/school_census.csv",index_col=0)
dem_census_df = pd.read_csv("/Users/feliphlvo/Documents/Minerva/Capstone/data/local/dem_census.csv", index_col=0)

In [4]:
school_census_df.head()

Unnamed: 0,region_name,region_id,state_name,state_abbrev,state_id,city_name,city_id,mesoregion_name,mesoregion_id,microregion_name,...,ideb_school,saeb_school,output_indicator_school,ideb_city,saeb_city,output_indicator_city,ideb_state,saeb_state,output_indicator_state,ideb_filled
0,Norte,1,Rondônia,RO,11,Alta Floresta D'Oeste,1100015,Leste Rondoniense,1102,Cacoal,...,,,,4.5,4.863751,0.923238,4.0,4.693116,0.860582,4.5
1,Norte,1,Rondônia,RO,11,Alta Floresta D'Oeste,1100015,Leste Rondoniense,1102,Cacoal,...,5.5,5.676987,0.974683,4.5,4.863751,0.923238,4.0,4.693116,0.860582,5.5
2,Norte,1,Rondônia,RO,11,Alta Floresta D'Oeste,1100015,Leste Rondoniense,1102,Cacoal,...,4.1,4.497814,0.915462,4.5,4.863751,0.923238,4.0,4.693116,0.860582,4.1
3,Norte,1,Rondônia,RO,11,Alta Floresta D'Oeste,1100015,Leste Rondoniense,1102,Cacoal,...,,,,4.5,4.863751,0.923238,4.0,4.693116,0.860582,4.5
4,Norte,1,Rondônia,RO,11,Alta Floresta D'Oeste,1100015,Leste Rondoniense,1102,Cacoal,...,,,,4.5,4.863751,0.923238,4.0,4.693116,0.860582,4.5


In [4]:
print(dem_census_df.columns)
print(school_census_df.columns)

Index(['sector_id', 'state', 'n_households', 'n_people',
       'avg_monthly_earnings', 'n_people_15to17_white',
       'n_people_15to17_black', 'n_people_15to17_asian',
       'n_people_15to17_parda', 'n_people_15to17_indigenous', 'n_people_15',
       'n_people_16', 'n_people_17', 'n_people_15_men', 'n_people_16_men',
       'n_people_17_men', 'n_people_15_women', 'n_people_16_women',
       'n_people_17_women', 'n_people_15to17', 'n_people_15to17_alternative',
       'state_id', 'state_abbrev', 'city_id', 'city_name', 'district_id',
       'district_name', 'subdistrict_id', 'subdistrict_name',
       'neighborhood_name', 'neighborhood_id', 'zone', 'geometry',
       'microregion_id', 'microregion_name', 'mesoregion_id',
       'mesoregion_name'],
      dtype='object')
Index(['region_name', 'region_id', 'state_name', 'state_abbrev', 'state_id',
       'city_name', 'city_id', 'mesoregion_name', 'mesoregion_id',
       'microregion_name', 'microregion_id', 'sector_id', 'school_id',
   

In [9]:
# Only public schools with at least one high school class and regions with at least one high-school aged person
geo_dem_census_df = dem_census_df[dem_census_df["n_people_15to17"] > 0]
geo_school_census_df = school_census_df[(school_census_df["n_classes"] > 0) & (school_census_df["admin_type"] != 4.0)]

In [10]:
# Importing the distance matric. See Creating Distance Matrix.ipynb
dist_matrix = pd.read_csv("/Users/feliphlvo/Documents/Minerva/Capstone/data/local/dist_matrix.csv", index_col=0)
quality_dict = dict(zip(geo_school_census_df.school_id, geo_school_census_df.ideb_filled))
dist_matrix['quality'] = dist_matrix['dest'].map(quality_dict)
dist_matrix

Unnamed: 0,origin,dest,euclidean,quality
0,170025105000002,17010535,13894.563390,4.3
1,170030105000001,17004268,1524.518606,4.1
2,170030105000001,17002648,10449.349289,4.1
3,170030105000002,17004268,1450.603219,4.1
4,170030105000002,17002648,10129.682425,4.1
...,...,...,...,...
8043,352210905000161,35907935,377.659718,4.2
8044,352210905000161,35919986,4516.856908,4.4
8045,352210905000161,35036110,12842.463547,3.9
8046,352210905000161,35443980,9778.080034,4.2


In [11]:
# Creating the access object
# See documentation for the Access ibrary here: https://github.com/pysal/access/blob/main/access/access.py
A = Access(
    demand_df=geo_dem_census_df,
    demand_index="sector_id",
    demand_value="n_people_15to17",
    supply_df=geo_school_census_df,
    supply_index="school_id",
    supply_value="n_classes",
    cost_df=dist_matrix,
    cost_origin="origin",
    cost_dest="dest",
    cost_name="euclidean"
    )

In [12]:
# Defining custom weight functions

gaussian = weights.gaussian(5000)   

# Access metrics to calculate
max_cost = 16000
#A.two_stage_fca(name="2sfca", weight_fn=gaussian, cost = "euclidean", max_cost=max_cost)
#A.enhanced_two_stage_fca(name="e2sfca", weight_fn=gaussian, cost = "euclidean", max_cost=max_cost)
#A.three_stage_fca(name = "3sfca", weight_fn=gaussian, cost = "euclidean", max_cost = max_cost)
A.mod_three_stage_fca(name = "mod_3sfca", quality_name = 'Q', quality='quality', weight_fn=gaussian, cost = "euclidean", max_cost = max_cost)


Calculating supply locations...
Calculating demand locations...
Calculating average quality...


Unnamed: 0_level_0,mod_3sfca_n_classes
sector_id,Unnamed: 1_level_1
110001505000001,0.050702
110001505000002,0.049770
110001505000003,0.050120
110001505000004,0.050636
110001505000005,0.048983
...,...
530010805300152,0.013612
530010805300153,0.008730
530010805300154,0.008148
530010805300155,0.007738


In [13]:
A.access_df["Q_adj_3sfca"] = A.access_df["mod_3sfca_n_classes"] * A.access_df["Q_n_classes"]

In [15]:
A.access_df

Unnamed: 0_level_0,n_people_15to17,mod_3sfca_n_classes,Q_n_classes,Q_adj_3sfca
sector_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
110001505000001,52.0,0.050702,4.512174,0.228777
110001505000002,59.0,0.049770,4.503192,0.224123
110001505000003,47.0,0.050120,4.505624,0.225824
110001505000004,21.0,0.050636,4.524315,0.229093
110001505000005,76.0,0.048983,4.509563,0.220892
...,...,...,...,...
530010805300152,10.0,0.013612,4.000000,0.054447
530010805300153,26.0,0.008730,4.004236,0.034958
530010805300154,18.0,0.008148,4.019074,0.032748
530010805300155,15.0,0.007738,4.010513,0.031032


In [20]:
old_access_df = pd.read_csv("/Users/feliphlvo/Documents/Minerva/Capstone/data/local/access_df.csv", index_col=0)

In [27]:
dem_census_df.head()

Unnamed: 0,sector_id,state,n_households,n_people,avg_monthly_earnings,n_people_15to17_white,n_people_15to17_black,n_people_15to17_asian,n_people_15to17_parda,n_people_15to17_indigenous,...,subdistrict_id,subdistrict_name,neighborhood_name,neighborhood_id,zone,geometry,microregion_id,microregion_name,mesoregion_id,mesoregion_name
0,170025105000002,TO,115.0,300.0,648.06,1.0,0.0,0.0,10.0,0.0,...,17002510000.0,,,,2.0,"POLYGON ((4316081.043813151 8936951.569070809,...",17003.0,Miracema do Tocantins,1701.0,Ocidental do Tocantins
1,170025105000003,TO,59.0,167.0,433.73,0.0,0.0,0.0,0.0,0.0,...,17002510000.0,,,,2.0,"POLYGON ((4300023.229366692 8964455.368041243,...",17003.0,Miracema do Tocantins,1701.0,Ocidental do Tocantins
2,170030105000001,TO,286.0,1123.0,927.45,10.0,2.0,3.0,59.0,0.0,...,17003010000.0,,,,1.0,"POLYGON ((4501590.424471468 9273374.76835, 450...",17001.0,Bico do Papagaio,1701.0,Ocidental do Tocantins
3,170030105000002,TO,294.0,1101.0,478.33,11.0,0.0,1.0,46.0,0.0,...,17003010000.0,,,,1.0,POLYGON ((4503050.0514419945 9272734.518656082...,17001.0,Bico do Papagaio,1701.0,Ocidental do Tocantins
4,170030105000003,TO,102.0,385.0,453.32,3.0,1.0,0.0,17.0,0.0,...,17003010000.0,,,,2.0,"POLYGON ((4502995.959709885 9275774.210479755,...",17001.0,Bico do Papagaio,1701.0,Ocidental do Tocantins


In [29]:
# Merging access metrics to the main census dataframe
access_df = dem_census_df.set_index("sector_id").join(A.access_df, how="left", lsuffix="dem", rsuffix="acc")
# Get old metrics too
access_df = old_access_df[["3sfca_n_classes", "e2sfca_n_classes"]].join(access_df, how="left", lsuffix="dem", rsuffix="acc")

In [None]:
# Create percentage statistics
access_df.rename({"n_people_15to17acc":"n_people_15to17"}, axis=1, inplace=True)
access_df["pct_black"] = access_df["n_people_15to17_black"]/access_df["n_people_15to17"]
access_df["pct_white"] = access_df["n_people_15to17_white"]/access_df["n_people_15to17"]
access_df["pct_indigenous"] = access_df["n_people_15to17_indigenous"]/access_df["n_people_15to17"]
access_df["pct_pardos"] = access_df["n_people_15to17_parda"]/access_df["n_people_15to17"]
access_df["pct_asian"] = access_df["n_people_15to17_asian"]/access_df["n_people_15to17"]
access_df["pct_men"] = (access_df["n_people_15_men"] + access_df["n_people_16_men"] + access_df["n_people_17_men"])/access_df["n_people_15to17"]

# rename outcome variables 
access_df.rename({"Q_n_classes":"Q", "mod_3sfca_n_classes":"A", "Q_adj_3sfca":"H"}, axis=1, inplace=True)

# resizing Q
access_df["Q"] = access_df["Q"]/10

# resizing A 
access_df["A"] = access_df["A"]*35

# resizing H
access_df["H"] = access_df["H"]*35/10

# desity (people/km^2)
access_df["area"] = access_df["geometry"].area/10**6
access_df["density"] = access_df["n_people"]/access_df["area"]

# Rename zone values to urban and rural
access_df["zone"] = access_df["zone"].replace({1.0:"urban", 2.0:"rural"})
# One-hot encoding zone
access_df = pd.get_dummies(access_df, columns=["zone"])

# creating region variable (South, Southeast, Northeast, Midwest, North)
access_df["region"] = access_df["state"].replace({"AC":"North", "AL":"Northeast", "AM":"North", "AP":"Northeast", "BA":"Northeast", "CE":"Northeast", "DF":"Midwest", "ES":"Southeast", "GO":"Midwest", "MA":"Northeast", "MG":"Southeast", "MS":"Midwest", "MT":"Midwest", "PA":"North", "PB":"Northeast", "PE":"Northeast", "PI":"Northeast", "PR":"South", "RJ":"Southeast", "RN":"Northeast", "RO":"North", "RR":"North", "RS":"South", "SC":"South", "SE":"Northeast", "SP":"Southeast","SP1":"Southeast","SP2":"Southeast", "TO":"North"})
# One-hot encoding region
access_df = pd.get_dummies(access_df, columns=["region"])

#creating a variable for whether the access_df average monthly income is below the poverty line of R$ 406.00 (below/above)
access_df["fpl"] = access_df["avg_monthly_earnings"].apply(lambda x: "below" if x < 406 else "above")
# One-hot encoding fpl
access_df = pd.get_dummies(access_df, columns=["fpl"])

In [31]:
# Saving access metrics to a csv file
access_df.to_csv("/Users/feliphlvo/Documents/Minerva/Capstone/data/local/access_df.csv")