In [104]:
# import os
# os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

import pandas as pd
import numpy as np
import geopandas as gpd
# import fiona
import datetime as dt

# from utils import *

import calitp
from calitp.tables import tbl
from siuba import *

### Metric

The % of [wheelchair-using] Californians that are within 1/4 mi of a transit stop that is indicated to be explicitly accessible in a static GTFS feed and served by at least one explicitly accessible trip.

In [2]:
accessible_stops = (tbl.gtfs_schedule.stops()
                    >> filter(_.wheelchair_boarding == '1')
                    >> select(_.calitp_itp_id, _.calitp_url_number, _.stop_id,
                              _.stop_lat, _.stop_lon, _.wheelchair_boarding)
                   )

In [3]:
accessible_stops

Unnamed: 0,calitp_itp_id,calitp_url_number,stop_id,stop_lat,stop_lon,wheelchair_boarding
0,1,0,2525339,38.726421,-120.836507,1
1,1,0,2525340,38.726059,-120.83456,1
2,1,0,2611969,38.727676,-120.806514,1
3,1,0,2611970,38.732288,-120.77828,1
4,1,0,2456753,38.744336,-120.65707,1


In [4]:
accessible_trips = (tbl.gtfs_schedule.trips()
                    >> filter(_.wheelchair_accessible == '1')
                    >> select(_.calitp_itp_id, _.calitp_url_number, _.trip_id,
                                _.wheelchair_accessible)
                   )

In [5]:
accessible_trips

Unnamed: 0,calitp_itp_id,calitp_url_number,trip_id,wheelchair_accessible
0,327,0,153247,1
1,327,0,153249,1
2,327,0,153226,1
3,327,0,153213,1
4,327,0,153214,1


In [6]:
tbl.gtfs_schedule.stop_times()

Unnamed: 0,calitp_itp_id,calitp_url_number,trip_id,stop_id,stop_sequence,arrival_time,departure_time,stop_headsign,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint,calitp_extracted_at
0,168,0,5024871,90,0,14:20:00,14:20:00,,,,,,,1,2021-10-25
1,106,0,4_Trip7_SP_HOL,2309331,0,14:45:00,14:45:00,,0.0,0.0,1.0,1.0,0.0,1,2021-11-08
2,218,0,21_21,2528998,0,16:15:00,16:15:00,,0.0,0.0,1.0,1.0,0.0,1,2021-11-03
3,228,0,t2CB5-sl14-pE37-r7,131,219,8:47:39,8:47:39,,0.0,0.0,,,6439.56011,0,2021-10-13
4,228,0,t2D79-sl16-pE36-r61,347,377,10:06:17,10:06:17,,0.0,0.0,,,12824.022236,0,2021-10-13


In [7]:
df = (tbl.gtfs_schedule.stop_times()
      >> select(_.calitp_itp_id, _.calitp_url_number, _.trip_id,
               _.stop_id)
      >> inner_join(_, accessible_trips, on=['calitp_itp_id',
                            'calitp_url_number', 'trip_id'])
      >> inner_join(_, accessible_stops, on=['calitp_itp_id',
                            'calitp_url_number', 'stop_id'])
      >> collect()
      ## actually a trip count could be cool? (another use for a frequency table...)
      >> distinct(_.stop_id, _keep_all = True)
      >> select(-_.trip_id)
     )

In [8]:
df

Unnamed: 0,calitp_itp_id,calitp_url_number,stop_id,wheelchair_accessible,stop_lon,wheelchair_boarding,stop_lat
0,217,0,3820402,1,-122.077278,1,37.394672
1,217,0,7269843,1,-122.111591,1,37.404887
2,217,0,3824530,1,-122.071799,1,37.423570
3,278,0,60088,1,-117.075024,1,32.569084
4,278,0,60041,1,-117.084383,1,32.576414
...,...,...,...,...,...,...,...
5724,199,0,CTMDCOB,1,-122.163742,1,37.478597
5725,56,0,bEMY,1,-122.291808,1,37.840446
5726,226,0,24986,1,-117.103392,1,33.096585
5727,199,0,CTALMOB,1,-122.172613,1,37.449520


### Census Data

In [118]:
import requests

In [119]:
ca_counties = requests.get('https://api.census.gov/data/2019/acs/acs5?get=NAME,B01001_001E&for=county:*&in=state:06')

In [144]:
ca_counties.json()[:3]

[['NAME', 'B01001_001E', 'state', 'county'],
 ['Merced County, California', '271382', '06', '047'],
 ['Mariposa County, California', '17420', '06', '043']]

In [123]:
ca_county_codes = [x[-1] for x in ca_counties.json()[1:]]

In [107]:
variable_range = ['0' + str(x) if x < 10 else str(x) for x in range(1,34)]

In [108]:
## https://api.census.gov/data/2019/acs/acs5/variables.html
ambulatory_disability_vars = [f'B18105_0{x}E' for x in variable_range]

In [109]:
variables = ','.join(ambulatory_disability_vars)

In [111]:
query = f'''\
https://api.census.gov/data/2019/acs/acs5?get=NAME,\
{variables}&for=block%20group:*&in=state:06%20county:015\
'''
r = requests.get(query)

In [112]:
query

'https://api.census.gov/data/2019/acs/acs5?get=NAME,B18105_001E,B18105_002E,B18105_003E,B18105_004E,B18105_005E,B18105_006E,B18105_007E,B18105_008E,B18105_009E,B18105_010E,B18105_011E,B18105_012E,B18105_013E,B18105_014E,B18105_015E,B18105_016E,B18105_017E,B18105_018E,B18105_019E,B18105_020E,B18105_021E,B18105_022E,B18105_023E,B18105_024E,B18105_025E,B18105_026E,B18105_027E,B18105_028E,B18105_029E,B18105_030E,B18105_031E,B18105_032E,B18105_033E&for=block%20group:*&in=state:06%20county:015'

In [125]:
census_df = pd.DataFrame()

for county in ca_county_codes:

    query = f'''\
    https://api.census.gov/data/2019/acs/acs5?get=NAME,\
{variables}&for=tract:*&in=state:06%20county:{county}\
'''
    r = requests.get(query)
    # print(query)
    # print(r.status_code)
    json = r.json()
    cols = json[0]
    data = json[1:]
    census_df = census_df.append(pd.DataFrame(data, columns=cols))

In [137]:
census_df = census_df.drop(columns=['NAME']).astype('int64')

In [143]:
# census_df.info()

In [139]:
census_df['total_pop'] = census_df['B18105_001E']
disability_cols = ['B18105_004E', 'B18105_007E', 'B18105_010E', 
                  'B18105_013E', 'B18105_016E', 'B18105_020E',
                   'B18105_023E', 'B18105_026E', 'B18105_029E',
                   'B18105_032E'
                  ]
census_df['total_disabled'] = census_df[disability_cols].sum(axis=1)
census_summary = census_df[['state', 'county', 'tract', 'total_pop', 'total_disabled']]

In [140]:
census_df

Unnamed: 0,B18105_001E,B18105_002E,B18105_003E,B18105_004E,B18105_005E,B18105_006E,B18105_007E,B18105_008E,B18105_009E,B18105_010E,...,B18105_029E,B18105_030E,B18105_031E,B18105_032E,B18105_033E,state,county,tract,total_pop,total_disabled
0,1901,1063,208,0,208,310,8,302,366,26,...,29,46,48,15,33,6,47,503,1901,180
1,6111,3287,871,30,841,996,0,996,1079,94,...,41,161,144,68,76,6,47,504,6111,423
2,6099,3242,887,36,851,833,0,833,1090,83,...,29,169,252,72,180,6,47,505,6099,387
3,2783,1401,402,0,402,409,4,405,494,32,...,9,69,87,53,34,6,47,701,2783,129
4,14787,7324,1718,44,1674,3354,54,3300,1821,71,...,183,319,443,290,153,6,47,1002,14787,1384
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,3279,1529,138,0,138,232,21,211,777,48,...,20,108,164,64,100,6,15,201,3279,377
4,1088,471,84,0,84,55,3,52,207,14,...,21,34,103,54,49,6,15,203,1088,186
5,2220,969,302,0,302,219,0,219,364,14,...,49,72,79,66,13,6,15,104,2220,211
6,6093,2893,697,0,697,520,0,520,1284,173,...,149,148,212,38,174,6,15,105,6093,641


In [141]:
census_df.head(3)

Unnamed: 0,B18105_001E,B18105_002E,B18105_003E,B18105_004E,B18105_005E,B18105_006E,B18105_007E,B18105_008E,B18105_009E,B18105_010E,...,B18105_029E,B18105_030E,B18105_031E,B18105_032E,B18105_033E,state,county,tract,total_pop,total_disabled
0,1901,1063,208,0,208,310,8,302,366,26,...,29,46,48,15,33,6,47,503,1901,180
1,6111,3287,871,30,841,996,0,996,1079,94,...,41,161,144,68,76,6,47,504,6111,423
2,6099,3242,887,36,851,833,0,833,1090,83,...,29,169,252,72,180,6,47,505,6099,387


In [142]:
census_summary

Unnamed: 0,state,county,tract,total_pop,total_disabled
0,6,47,503,1901,180
1,6,47,504,6111,423
2,6,47,505,6099,387
3,6,47,701,2783,129
4,6,47,1002,14787,1384
...,...,...,...,...,...
3,6,15,201,3279,377
4,6,15,203,1088,186
5,6,15,104,2220,211
6,6,15,105,6093,641
