In [1]:
# import os
# os.environ["CALITP_BQ_MAX_BYTES"] = str(100_000_000_000)

import pandas as pd
import numpy as np
import geopandas as gpd
# import fiona
import datetime as dt

# from utils import *

import calitp
from calitp.tables import tbl
from siuba import *



### Metric

The % of [wheelchair-using] Californians that are within 1/4 mi of a transit stop that is indicated to be explicitly accessible in a static GTFS feed and served by at least one explicitly accessible trip.

In [2]:
accessible_stops = (tbl.gtfs_schedule.stops()
                    >> filter(_.wheelchair_boarding == '1')
                    >> select(_.calitp_itp_id, _.calitp_url_number, _.stop_id,
                              _.stop_lat, _.stop_lon, _.wheelchair_boarding)
                   )

In [3]:
accessible_stops

Unnamed: 0,calitp_itp_id,calitp_url_number,stop_id,stop_lat,stop_lon,wheelchair_boarding
0,2,6,12048536,37.736528,-122.256966,1
1,2,6,12048537,37.796869,-122.393209,1
2,2,6,12030041,37.662676,-122.377245,1
3,2,6,12149044,38.099884,-122.263077,1
4,2,6,12048538,37.809594,-122.412272,1


In [4]:
accessible_trips = (tbl.gtfs_schedule.trips()
                    >> filter(_.wheelchair_accessible == '1')
                    >> select(_.calitp_itp_id, _.calitp_url_number, _.trip_id,
                                _.wheelchair_accessible)
                   )

In [5]:
accessible_trips

Unnamed: 0,calitp_itp_id,calitp_url_number,trip_id,wheelchair_accessible
0,327,0,153231,1
1,327,0,153237,1
2,327,0,153245,1
3,327,0,153226,1
4,327,0,153260,1


In [6]:
tbl.gtfs_schedule.stop_times()

Unnamed: 0,calitp_itp_id,calitp_url_number,trip_id,stop_id,stop_sequence,arrival_time,departure_time,stop_headsign,pickup_type,drop_off_type,continuous_pickup,continuous_drop_off,shape_dist_traveled,timepoint,calitp_extracted_at
0,111,0,781453,11145,62,06:40:00,06:40:00,,1.0,,,,,,2021-04-15
1,111,0,781423,11159,60,05:35:00,05:35:00,,,,,,,,2021-04-15
2,2,0,BCT109 NB_MF.T03,161,65,8:25:00,8:25:00,,,,,,,,2021-04-15
3,2,0,BCT109 SB_MF.T14,262,71,17:11:00,17:11:00,,,,,,,,2021-04-15
4,2,0,BCT109 NB_SUN.T12,155,59,18:35:30,18:35:30,,,,,,,,2021-04-15


In [7]:
df = (tbl.gtfs_schedule.stop_times()
      >> select(_.calitp_itp_id, _.calitp_url_number, _.trip_id,
               _.stop_id)
      >> inner_join(_, accessible_trips, on=['calitp_itp_id',
                            'calitp_url_number', 'trip_id'])
      >> inner_join(_, accessible_stops, on=['calitp_itp_id',
                            'calitp_url_number', 'stop_id'])
      >> collect()
      ## actually a trip count could be cool? (another use for a frequency table...)
      >> distinct(_.stop_id, _keep_all = True)
      >> select(-_.trip_id)
     )

In [8]:
df

Unnamed: 0,calitp_itp_id,calitp_url_number,stop_id,wheelchair_accessible,wheelchair_boarding,stop_lon,stop_lat
0,217,0,3820402,1,1,-122.077278,37.394672
1,217,0,7269843,1,1,-122.111591,37.404887
2,217,0,6684616,1,1,-122.098777,37.431429
3,278,0,60088,1,1,-117.075024,32.569084
4,278,0,60427,1,1,-117.077824,32.566919
...,...,...,...,...,...,...,...
4553,278,0,98092,1,1,-116.484371,32.758215
4554,278,0,12402,1,1,-117.170922,32.905177
4555,324,0,227,1,1,-122.172729,37.432187
4556,278,0,98020,1,1,-116.883798,33.032510


### Census Data

In [9]:
import requests

In [10]:
ca_counties = requests.get('https://api.census.gov/data/2019/acs/acs5?get=NAME,B01001_001E&for=county:*&in=state:06')

In [11]:
ca_counties.json()[:3]

[['NAME', 'B01001_001E', 'state', 'county'],
 ['Merced County, California', '271382', '06', '047'],
 ['Mariposa County, California', '17420', '06', '043']]

In [12]:
ca_county_codes = [x[-1] for x in ca_counties.json()[1:]]

In [13]:
variable_range = ['0' + str(x) if x < 10 else str(x) for x in range(1,34)]

In [14]:
## https://api.census.gov/data/2019/acs/acs5/variables.html
ambulatory_disability_vars = [f'B18105_0{x}E' for x in variable_range]

In [15]:
variables = ','.join(ambulatory_disability_vars)

In [16]:
query = f'''\
https://api.census.gov/data/2019/acs/acs5?get=NAME,\
{variables}&for=block%20group:*&in=state:06%20county:015\
'''
r = requests.get(query)

In [17]:
query

'https://api.census.gov/data/2019/acs/acs5?get=NAME,B18105_001E,B18105_002E,B18105_003E,B18105_004E,B18105_005E,B18105_006E,B18105_007E,B18105_008E,B18105_009E,B18105_010E,B18105_011E,B18105_012E,B18105_013E,B18105_014E,B18105_015E,B18105_016E,B18105_017E,B18105_018E,B18105_019E,B18105_020E,B18105_021E,B18105_022E,B18105_023E,B18105_024E,B18105_025E,B18105_026E,B18105_027E,B18105_028E,B18105_029E,B18105_030E,B18105_031E,B18105_032E,B18105_033E&for=block%20group:*&in=state:06%20county:015'

In [30]:
# census_df = pd.DataFrame()

# for county in ca_county_codes:

#     query = f'''\
#     https://api.census.gov/data/2019/acs/acs5?get=NAME,\
# {variables}&for=tract:*&in=state:06%20county:{county}\
# '''
#     r = requests.get(query)
#     # print(query)
#     # print(r.status_code)
#     json = r.json()
#     cols = json[0]
#     data = json[1:]
#     census_df = census_df.append(pd.DataFrame(data, columns=cols))

In [19]:
# census_df = census_df.drop(columns=['NAME']).astype('int64')

In [20]:
# census_df.info()

In [21]:
# census_df['total_pop'] = census_df['B18105_001E']
# disability_cols = ['B18105_004E', 'B18105_007E', 'B18105_010E', 
#                   'B18105_013E', 'B18105_016E', 'B18105_020E',
#                    'B18105_023E', 'B18105_026E', 'B18105_029E',
#                    'B18105_032E'
#                   ]
# census_df['total_disabled'] = census_df[disability_cols].sum(axis=1)
# census_summary = census_df[['state', 'county', 'tract', 'total_pop', 'total_disabled']]

In [32]:
# census_df

In [33]:
census_summary

Unnamed: 0,state,county,tract,total_pop,total_disabled
0,6,47,503,1901,180
1,6,47,504,6111,423
2,6,47,505,6099,387
3,6,47,701,2783,129
4,6,47,1002,14787,1384
...,...,...,...,...,...
3,6,15,201,3279,377
4,6,15,203,1088,186
5,6,15,104,2220,211
6,6,15,105,6093,641


In [34]:
# census_summary.to_parquet('tract_disability.parquet')

In [35]:
census_summary = pd.read_parquet('tract_disability.parquet')

## Block Group Pop

In [36]:
total_pop_var = 'B01001_001E'

In [37]:
census_df2 = pd.DataFrame()

for county in ca_county_codes:

    query = f'''\
    https://api.census.gov/data/2019/acs/acs5?get=NAME,\
{total_pop_var}&for=block%20group:*&in=state:06%20county:{county}\
'''
    r = requests.get(query)
    # print(query)
    # print(r.status_code)
    json = r.json()
    cols = json[0]
    data = json[1:]
    census_df2 = census_df2.append(pd.DataFrame(data, columns=cols))

In [40]:
census_df2.rename(columns={'B01001_001E': 'population'}).to_parquet('pop_block_grp.parquet')

In [41]:
blockgrp_pop = pd.read_parquet('./pop_block_grp.parquet')

In [42]:
blockgrp_pop

Unnamed: 0,NAME,population,state,county,tract,block group
0,"Block Group 2, Census Tract 12, Merced County,...",1388,06,047,001200,2
1,"Block Group 4, Census Tract 12, Merced County,...",1531,06,047,001200,4
2,"Block Group 1, Census Tract 12, Merced County,...",1405,06,047,001200,1
3,"Block Group 2, Census Tract 14.01, Merced Coun...",2410,06,047,001401,2
4,"Block Group 1, Census Tract 15.02, Merced Coun...",1148,06,047,001502,1
...,...,...,...,...,...,...
15,"Block Group 2, Census Tract 1.01, Del Norte Co...",2048,06,015,000101,2
16,"Block Group 3, Census Tract 2.01, Del Norte Co...",3762,06,015,000201,3
17,"Block Group 4, Census Tract 2.02, Del Norte Co...",1059,06,015,000202,4
18,"Block Group 2, Census Tract 1.05, Del Norte Co...",1508,06,015,000105,2


In [43]:
census_summary

Unnamed: 0,state,county,tract,total_pop,total_disabled
0,6,47,503,1901,180
1,6,47,504,6111,423
2,6,47,505,6099,387
3,6,47,701,2783,129
4,6,47,1002,14787,1384
...,...,...,...,...,...
3,6,15,201,3279,377
4,6,15,203,1088,186
5,6,15,104,2220,211
6,6,15,105,6093,641


geoid example '482012231001'

'48 = state, 201 = county, 2231 = tract, 001 = block group?"