In [1]:
import pandas as pd, numpy as np
from shapely.geometry import Polygon
from shapely.geometry import Point
import geopandas
import matplotlib.pyplot as plot

In [3]:
path = 'Neighborhoods/' # select folder with data in it

#specify datatypes so FIPS numbers, etc. read in properly
dtypes = {'Geo_STATE': 'str', 'Geo_FIPS': 'str', 'Geo_COUNTY': 'str', 'h_tract': 'str', 'FIPS_County': 'str'}

tract_data = pd.read_csv(path + 'tract_data.csv', delimiter=',', dtype=dtypes)

tract_data.head()

Unnamed: 0.1,Unnamed: 0,Geo_FIPS,alljobs,ltbajobs,lt40kjobs,county_labor,county_unemp,county_med_rent,county_med_value,CBSA Code,...,bach_degree,pop_over_25,hh_kids,total_hh,med_rent,med_housevalue,unemp_civ,civ_labor_force,area_sqmi,HU_density
0,0,6001400100,763651.0,358749.0,301972.0,862353,61327,1432,593500,41860.0,...,1981.0,2478.0,241.0,1292.0,3202.0,1074100.0,75,1643,2.657,486.262695
1,1,6001400200,948962.0,453334.0,376760.0,862353,61327,1432,593500,41860.0,...,1278.0,1559.0,184.0,813.0,1770.0,978900.0,41,1270,0.23,3534.782471
2,2,6001400300,986396.0,472646.0,397943.0,862353,61327,1432,593500,41860.0,...,2817.0,4124.0,499.0,2439.0,1208.0,912700.0,204,3402,0.427,5711.943848
3,3,6001400400,960234.0,459758.0,387879.0,862353,61327,1432,593500,41860.0,...,2512.0,3303.0,437.0,1798.0,1584.0,848900.0,114,2678,0.272,6610.293945
4,4,6001400500,985696.0,471892.0,399812.0,862353,61327,1432,593500,41860.0,...,1822.0,2943.0,288.0,1643.0,1438.0,683500.0,82,2545,0.227,7237.885742


In [5]:
#specify datatypes so FIPS numbers, etc. read in properly
dtypes2 = {'GEOID': 'str', 'Agency': 'str', 'Stop ID': 'str', 'h_tract': 'str', 'FIPS_County': 'str'}

station_data = pd.read_csv('best_matches.csv', delimiter=',', dtype=dtypes2)

station_small = station_data[['GEOID','Agency','Stop ID','station_area','tract_area','geometry','overlap_area']]
station_small.head()

Unnamed: 0,GEOID,Agency,Stop ID,station_area,tract_area,geometry,overlap_area
0,6043000400,Yosemite Valley Shuttle System,766882,0.506879,1025.605497,"POLYGON ((801244.7131922718 4182865.720460022,...",0.506879
1,6071009910,Victor Valley Transit Authority,813390,0.506879,4.403783,"POLYGON ((1019267.793245256 3829647.707041118,...",0.440036
2,6085508602,VTA,864,0.506879,1.342321,"POLYGON ((585443.0943166042 4136955.64879438, ...",0.326389
3,6085502101,VTA,818,0.506879,1.919669,"POLYGON ((593887.8008224856 4129715.55469357, ...",0.387196
4,6085502201,VTA,810,0.506879,0.971958,"POLYGON ((595778.9889680255 4129751.222022584,...",0.207806


In [6]:
#merge together datasets by census tract
merge_data = pd.merge(station_small, tract_data, left_on="GEOID", right_on="Geo_FIPS", how="left")
merge_data.head()

Unnamed: 0.1,GEOID,Agency,Stop ID,station_area,tract_area,geometry,overlap_area,Unnamed: 0,Geo_FIPS,alljobs,...,bach_degree,pop_over_25,hh_kids,total_hh,med_rent,med_housevalue,unemp_civ,civ_labor_force,area_sqmi,HU_density
0,6043000400,Yosemite Valley Shuttle System,766882,0.506879,1025.605497,"POLYGON ((801244.7131922718 4182865.720460022,...",0.506879,3601,6043000400,2003.0,...,568.0,1579.0,175.0,520.0,533.0,363100.0,108,1466,394.127991,1.319368
1,6071009910,Victor Valley Transit Authority,813390,0.506879,4.403783,"POLYGON ((1019267.793245256 3829647.707041118,...",0.440036,5581,6071009910,66164.0,...,283.0,3153.0,637.0,1378.0,1264.0,142400.0,365,2332,1.693,813.939758
2,6085508602,VTA,864,0.506879,1.342321,"POLYGON ((585443.0943166042 4136955.64879438, ...",0.326389,7178,6085508602,793861.0,...,2368.0,3633.0,625.0,2016.0,1983.0,848500.0,219,2865,0.518,3891.891846
3,6085502101,VTA,818,0.506879,1.919669,"POLYGON ((593887.8008224856 4129715.55469357, ...",0.387196,6948,6085502101,731358.0,...,1973.0,3853.0,541.0,1935.0,1716.0,657000.0,117,2942,0.749,2583.44458
4,6085502201,VTA,810,0.506879,0.971958,"POLYGON ((595778.9889680255 4129751.222022584,...",0.207806,6950,6085502201,706077.0,...,1946.0,4784.0,752.0,3032.0,1618.0,476200.0,351,4245,0.374,8106.95166


In [8]:
unique_tract = merge_data.drop_duplicates(subset='GEOID')
len(unique_tract)

2037

In [9]:
cols = ['renters', 'occ_HU', 'hispanic', 'black',
       'white', 'asian', 'nhpi', 'total_pop', 'below200pov',
       'total_pov_status', 'rent_occ_HU', 'low_inc_rent_burden',
       'white_pov_tot', 'white_pov', 'black_pov_tot', 'black_pov',
       'asian_pov_tot', 'asian_pov', 'nhpi_pov_tot', 'nhpi_pov',
       'hispanic_pov_tot', 'hispanic_pov', 'sfdetach', 'smallmf', 'medmf',
       'bigmf', 'total_hu', 'vacant_hu', 'total_hu2', 'since2000',
       'before1950', 'total_structure', 'bach_degree', 'pop_over_25',
       'hh_kids', 'total_hh','unemp_civ','civ_labor_force','area_sqmi']

qtracts_tots=unique_tract[cols].groupby(unique_tract['CBSA Title']).sum()

qtracts_stats = pd.DataFrame(qtracts_tots.iloc[:,7])

qtracts_stats['pct_rent']=qtracts_tots['renters'] / qtracts_tots['occ_HU']

qtracts_stats['pct_white']=qtracts_tots['white'] / qtracts_tots['total_pop']
qtracts_stats['pct_hispanic']=qtracts_tots['hispanic'] / qtracts_tots['total_pop']
qtracts_stats['pct_black']=qtracts_tots['black'] / qtracts_tots['total_pop']
qtracts_stats['pct_asian']=qtracts_tots['asian'] / qtracts_tots['total_pop']

qtracts_stats['pct_below200pov']=qtracts_tots['below200pov'] / qtracts_tots['total_pov_status']

qtracts_stats['hispanic_pov']=qtracts_tots['hispanic_pov'] / qtracts_tots['hispanic_pov_tot']
qtracts_stats['black_pov']=qtracts_tots['black_pov'] / qtracts_tots['black_pov_tot']
qtracts_stats['asian_pov']=qtracts_tots['asian_pov'] / qtracts_tots['asian_pov_tot']
qtracts_stats['white_pov']=qtracts_tots['white_pov'] / qtracts_tots['white_pov_tot']

qtracts_stats['pct_sfdetach']=qtracts_tots['sfdetach'] / qtracts_tots['total_hu']
qtracts_stats['pct_smallmf']=qtracts_tots['smallmf'] / qtracts_tots['total_hu']
qtracts_stats['pct_bigmf']=qtracts_tots['bigmf'] / qtracts_tots['total_hu']
qtracts_stats['pct_medmf']=qtracts_tots['medmf'] / qtracts_tots['total_hu']
qtracts_stats['pct_vacant']=qtracts_tots['vacant_hu'] / qtracts_tots['total_hu2']

qtracts_stats['pct_since2000']=qtracts_tots['since2000'] / qtracts_tots['total_structure']
qtracts_stats['pct_before1950']=qtracts_tots['before1950'] / qtracts_tots['total_structure']

qtracts_stats['pct_bach_degree']=qtracts_tots['bach_degree'] / qtracts_tots['pop_over_25']
qtracts_stats['pct_hh_kids']=qtracts_tots['hh_kids'] / qtracts_tots['total_hh']
qtracts_stats['pct_unemp_civ']=qtracts_tots['unemp_civ'] / qtracts_tots['civ_labor_force']
qtracts_stats['density']=qtracts_tots['total_pop'] / qtracts_tots['area_sqmi']

qtracts_stats.to_csv(path + 'qtracts_stats.csv')

qtracts_stats.head()

Unnamed: 0_level_0,total_pop,pct_rent,pct_white,pct_hispanic,pct_black,pct_asian,pct_below200pov,hispanic_pov,black_pov,asian_pov,...,pct_smallmf,pct_bigmf,pct_medmf,pct_vacant,pct_since2000,pct_before1950,pct_bach_degree,pct_hh_kids,pct_unemp_civ,density
CBSA Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Bakersfield, CA",91716.0,0.529941,0.296306,0.607789,0.049381,0.020978,0.564073,0.313048,0.337901,0.179255,...,0.127202,0.041196,0.088919,0.060671,0.176925,0.116991,0.08093,0.487131,0.135765,3400.919632
"Fresno, CA",161117.0,0.634228,0.263455,0.519883,0.091573,0.097519,0.617283,0.42478,0.499259,0.370059,...,0.1745,0.089086,0.194364,0.076157,0.086132,0.187395,0.170654,0.391932,0.140613,5680.734789
"Los Angeles-Long Beach-Anaheim, CA",5270425.0,0.661975,0.220629,0.530934,0.092593,0.129324,0.46602,0.262157,0.25259,0.16253,...,0.110713,0.247724,0.201377,0.061344,0.072076,0.307686,0.282013,0.333762,0.093806,9667.09894
"Merced, CA",17792.0,0.537338,0.380789,0.374157,0.067558,0.141918,0.443744,0.324188,0.144863,0.136275,...,0.042209,0.048382,0.066066,0.075075,0.379213,0.093594,0.277091,0.373918,0.184378,2327.57715
"Modesto, CA",7600.0,0.695426,0.530395,0.312237,0.044211,0.058816,0.403111,0.22481,0.380054,0.134091,...,0.138398,0.170309,0.147416,0.067291,0.004509,0.183143,0.179587,0.363704,0.133315,4051.172607
