In [1]:
import pandas as pd, numpy as np
from shapely.geometry import Polygon
from shapely.geometry import Point
import geopandas
import matplotlib.pyplot as plot

In [2]:
path = 'Neighborhoods/' # select folder with data in it

#specify datatypes so FIPS numbers, etc. read in properly
dtypes = {'Geo_STATE': 'str', 'Geo_FIPS': 'str', 'Geo_COUNTY': 'str', 'h_tract': 'str', 'FIPS_County': 'str'}

tract_data = pd.read_csv(path + 'tract_data.csv', delimiter=',', dtype=dtypes)

tract_data.head()

Unnamed: 0.1,Unnamed: 0,Geo_FIPS,alljobs,ltbajobs,lt40kjobs,county_labor,county_unemp,county_med_rent,county_med_value,CBSA Code,...,bach_degree,pop_over_25,hh_kids,total_hh,med_rent,med_housevalue,unemp_civ,civ_labor_force,area_sqmi,HU_density
0,0,6001400100,763651.0,358749.0,301972.0,862353,61327,1432,593500,41860.0,...,1981.0,2478.0,241.0,1292.0,3202.0,1074100.0,75,1643,2.657,486.262695
1,1,6001400200,948962.0,453334.0,376760.0,862353,61327,1432,593500,41860.0,...,1278.0,1559.0,184.0,813.0,1770.0,978900.0,41,1270,0.23,3534.782471
2,2,6001400300,986396.0,472646.0,397943.0,862353,61327,1432,593500,41860.0,...,2817.0,4124.0,499.0,2439.0,1208.0,912700.0,204,3402,0.427,5711.943848
3,3,6001400400,960234.0,459758.0,387879.0,862353,61327,1432,593500,41860.0,...,2512.0,3303.0,437.0,1798.0,1584.0,848900.0,114,2678,0.272,6610.293945
4,4,6001400500,985696.0,471892.0,399812.0,862353,61327,1432,593500,41860.0,...,1822.0,2943.0,288.0,1643.0,1438.0,683500.0,82,2545,0.227,7237.885742


In [3]:
#specify datatypes so FIPS numbers, etc. read in properly
dtypes2 = {'GEOID': 'str', 'Agency': 'str', 'Stop ID': 'str', 'h_tract': 'str', 'FIPS_County': 'str'}

station_data = pd.read_csv('tract_stop.csv', delimiter=',', dtype=dtypes2)

station_small = station_data[['GEOID','Agency','Stop ID','station_area','tract_area','geometry','overlap_area']]
station_small.head()

Unnamed: 0,GEOID,Agency,Stop ID,station_area,tract_area,geometry,overlap_area
0,6001400600,AC Transit,1020280,0.506879,0.301476,"POLYGON ((564978.9780164432 4188758.12380546, ...",0.165578
1,6001400400,AC Transit,1020280,0.506879,0.72077,"POLYGON ((564950.9881445128 4188518.22503095, ...",0.050073
2,6001400300,AC Transit,1020280,0.506879,1.105797,"POLYGON ((564744.9932283605 4188317.651189192,...",0.263398
3,6001401100,AC Transit,1020280,0.506879,0.888693,"POLYGON ((564663.8718861116 4188061.494909854,...",0.027829
4,6001400600,AC Transit,1020370,0.506879,0.301476,"POLYGON ((564738.711670726 4188834.218121335, ...",0.179564


In [4]:
#merge together datasets by census tract
merge_data = pd.merge(station_small, tract_data, left_on="GEOID", right_on="Geo_FIPS", how="left")
merge_data.head()

Unnamed: 0.1,GEOID,Agency,Stop ID,station_area,tract_area,geometry,overlap_area,Unnamed: 0,Geo_FIPS,alljobs,...,bach_degree,pop_over_25,hh_kids,total_hh,med_rent,med_housevalue,unemp_civ,civ_labor_force,area_sqmi,HU_density
0,6001400600,AC Transit,1020280,0.506879,0.301476,"POLYGON ((564978.9780164432 4188758.12380546, ...",0.165578,5,6001400600,1013276.0,...,742.0,1233.0,162.0,713.0,1221.0,572300.0,104,1116,0.115,6200.0
1,6001400400,AC Transit,1020280,0.506879,0.72077,"POLYGON ((564950.9881445128 4188518.22503095, ...",0.050073,3,6001400400,960234.0,...,2512.0,3303.0,437.0,1798.0,1584.0,848900.0,114,2678,0.272,6610.293945
2,6001400300,AC Transit,1020280,0.506879,1.105797,"POLYGON ((564744.9932283605 4188317.651189192,...",0.263398,2,6001400300,986396.0,...,2817.0,4124.0,499.0,2439.0,1208.0,912700.0,204,3402,0.427,5711.943848
3,6001401100,AC Transit,1020280,0.506879,0.888693,"POLYGON ((564663.8718861116 4188061.494909854,...",0.027829,10,6001401100,1025859.0,...,2178.0,3545.0,186.0,2032.0,1324.0,596200.0,306,3182,0.342,5941.520508
4,6001400600,AC Transit,1020370,0.506879,0.301476,"POLYGON ((564738.711670726 4188834.218121335, ...",0.179564,5,6001400600,1013276.0,...,742.0,1233.0,162.0,713.0,1221.0,572300.0,104,1116,0.115,6200.0


In [5]:
unique_tract = merge_data.drop_duplicates(subset='GEOID')

In [6]:
cols = ['renters', 'occ_HU', 'hispanic', 'black',
       'white', 'asian', 'nhpi', 'total_pop', 'below200pov',
       'total_pov_status', 'rent_occ_HU', 'low_inc_rent_burden',
       'white_pov_tot', 'white_pov', 'black_pov_tot', 'black_pov',
       'asian_pov_tot', 'asian_pov', 'nhpi_pov_tot', 'nhpi_pov',
       'hispanic_pov_tot', 'hispanic_pov', 'sfdetach', 'smallmf', 'medmf',
       'bigmf', 'total_hu', 'vacant_hu', 'total_hu2', 'since2000',
       'before1950', 'total_structure', 'bach_degree', 'pop_over_25',
       'hh_kids', 'total_hh','unemp_civ','civ_labor_force','area_sqmi']

qtracts_tots=unique_tract[cols].groupby(unique_tract['CBSA Title']).sum()

qtracts_stats = pd.DataFrame(qtracts_tots.iloc[:,7])

qtracts_stats['pct_rent']=qtracts_tots['renters'] / qtracts_tots['occ_HU']

qtracts_stats['pct_white']=qtracts_tots['white'] / qtracts_tots['total_pop']
qtracts_stats['pct_hispanic']=qtracts_tots['hispanic'] / qtracts_tots['total_pop']
qtracts_stats['pct_black']=qtracts_tots['black'] / qtracts_tots['total_pop']
qtracts_stats['pct_asian']=qtracts_tots['asian'] / qtracts_tots['total_pop']

qtracts_stats['pct_below200pov']=qtracts_tots['below200pov'] / qtracts_tots['total_pov_status']

qtracts_stats['hispanic_pov']=qtracts_tots['hispanic_pov'] / qtracts_tots['hispanic_pov_tot']
qtracts_stats['black_pov']=qtracts_tots['black_pov'] / qtracts_tots['black_pov_tot']
qtracts_stats['asian_pov']=qtracts_tots['asian_pov'] / qtracts_tots['asian_pov_tot']
qtracts_stats['white_pov']=qtracts_tots['white_pov'] / qtracts_tots['white_pov_tot']

qtracts_stats['pct_sfdetach']=qtracts_tots['sfdetach'] / qtracts_tots['total_hu']
qtracts_stats['pct_smallmf']=qtracts_tots['smallmf'] / qtracts_tots['total_hu']
qtracts_stats['pct_bigmf']=qtracts_tots['bigmf'] / qtracts_tots['total_hu']
qtracts_stats['pct_medmf']=qtracts_tots['medmf'] / qtracts_tots['total_hu']
qtracts_stats['pct_vacant']=qtracts_tots['vacant_hu'] / qtracts_tots['total_hu2']

qtracts_stats['pct_since2000']=qtracts_tots['since2000'] / qtracts_tots['total_structure']
qtracts_stats['pct_before1950']=qtracts_tots['before1950'] / qtracts_tots['total_structure']

qtracts_stats['pct_bach_degree']=qtracts_tots['bach_degree'] / qtracts_tots['pop_over_25']
qtracts_stats['pct_hh_kids']=qtracts_tots['hh_kids'] / qtracts_tots['total_hh']
qtracts_stats['pct_unemp_civ']=qtracts_tots['unemp_civ'] / qtracts_tots['civ_labor_force']
qtracts_stats['density']=qtracts_tots['total_pop'] / qtracts_tots['area_sqmi']

qtracts_stats.to_csv(path + 'qtracts_stats.csv')

qtracts_stats.head()

Unnamed: 0_level_0,total_pop,pct_rent,pct_white,pct_hispanic,pct_black,pct_asian,pct_below200pov,hispanic_pov,black_pov,asian_pov,...,pct_smallmf,pct_bigmf,pct_medmf,pct_vacant,pct_since2000,pct_before1950,pct_bach_degree,pct_hh_kids,pct_unemp_civ,density
CBSA Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Bakersfield, CA",156071.0,0.484354,0.308642,0.585054,0.048901,0.031287,0.52652,0.294597,0.318256,0.218983,...,0.132536,0.045147,0.085313,0.065724,0.17377,0.102253,0.116326,0.467344,0.125855,1220.649282
"Fresno, CA",246998.0,0.583973,0.284897,0.49917,0.08192,0.104495,0.579718,0.396618,0.501319,0.352107,...,0.156929,0.079092,0.1744,0.074074,0.089726,0.179157,0.192785,0.384259,0.133464,5342.461025
"Los Angeles-Long Beach-Anaheim, CA",7110398.0,0.620383,0.244352,0.50965,0.082093,0.13663,0.437514,0.248827,0.242873,0.150154,...,0.103567,0.226439,0.185767,0.059767,0.068787,0.284797,0.297419,0.336596,0.090482,8899.589218
"Merced, CA",23479.0,0.594872,0.34533,0.44099,0.05605,0.1259,0.522545,0.389812,0.150918,0.194055,...,0.099827,0.036909,0.079514,0.082239,0.294897,0.191727,0.232093,0.382996,0.192073,2732.34025
"Modesto, CA",23104.0,0.555098,0.429017,0.370369,0.035622,0.110457,0.396569,0.175732,0.244922,0.239249,...,0.109762,0.138647,0.096793,0.055294,0.162226,0.11436,0.201564,0.350056,0.10287,5058.900775
