In [2]:
import pandas as pd
import geopandas as gpd

### Remove Duplicate Shapes and Shapes without Population

In [None]:
pop_df = pd.read_csv("Data/Population_by_2010_Census_Block.csv", header = 0)

In [None]:
blocks = gpd.read_file("Data/blocks_original/blocks.shp")

In [None]:
valid_tracts = set(pop_df["CENSUS BLOCK"])

In [None]:
len(blocks)

In [None]:
passed_ids = set([])
new_gdf = []
new_df = []
missing_pop = []

for i, row in blocks.iterrows():
    if row["tract_bloc"] not in passed_ids and int(row["tract_bloc"]) in valid_tracts:
        passed_ids.add(row["tract_bloc"])
        new_gdf.append(row)
    elif row["tract_bloc"] in passed_ids:
        print("Row " + str(i) + " is a duplicate")
    if row["tract_bloc"] not in valid_tracts:
        missing_pop.append(row)
        print("Row " + str(i) + " has no population data")

In [None]:
blocks = blocks.GeoDataFrame(new_gdf, columns = list(gdf.columns))

In [None]:
len(blocks)

In [None]:
blocks.to_file(driver="ESRI Shapefile", filename = "Data/blocks_cleaned/blocks.shp")

Merged with population file in GeoDa

### Spatial Join with Parks

In [70]:
blocks = gpd.read_file("Data/blocks_cleaned/blocks.shp")
parks = gpd.read_file("Data/parks_orig/parks.shp")

In [None]:
len(blocks)

In [4]:
bp = gpd.sjoin(blocks, parks, how="left", op='intersects')

In [5]:
len(bp)

46334

In [None]:
passed_ids = set([])
new_gdf = []
new_df = []
missing_pop = []

for i, row in bp.iterrows():
    if row["tract_bloc"] not in passed_ids:
        passed_ids.add(row["tract_bloc"])
        new_gdf.append(row)
        
bp = gpd.GeoDataFrame(new_gdf, columns = list(blocks_with_parks.columns))

In [None]:
len(bp)

In [17]:
has_park = []
has_park_null = []
for i, row in bp.iterrows():
    if pd.notnull(row['index_right']):
        has_park.append(1)
        has_park_null.append(1)
    else:
        has_park.append(0)
        has_park_null.append(None)
        
blocks["has_park"] = has_park

In [None]:
blocks.to_file(driver="ESRI Shapefile", filename = "Data/blocks_and_parks/bp.shp")

### Adding Crime Points

In [22]:
crime = gpd.read_file("Data/crime_original/crime.shp")
blocks = gpd.read_file("Data/blocks_and_parks/bp.shp")

In [25]:
crime_join = gpd.sjoin(crime, bp, how="left", op="within")



In [47]:
count = crime_match.groupby('tract_bloc').size()
crime_df = count.to_frame()
crime_df.reset_index(level=0, inplace=True)
crime_df = crime_df.rename(index=str, columns={0: "crime_count"})
crime_gdf = gpd.GeoDataFrame(crime_df)

In [66]:
bpc = bp.merge(crime_df, on="tract_bloc")

In [68]:
bpc.to_file(driver="ESRI Shapefile", filename = "Data/blocks_parks_crimes/bpc.shp")

### Calculate Disctance to Nearest Park for Every Block

In [92]:
blocks = gpd.read_file("Data/BlocksParksCrimes/bpc.shp")
parks = gpd.read_file("Data/parks_final/parks_final.shp")

Warning: The following takes an extremely long time to run

In [94]:
blocks['min_dist_to_park'] = blocks.geometry.apply(lambda g: parks.distance(g).min())

In [95]:
blocks.to_file(driver="ESRI Shapefile", filename = "Data/test_parkdistance/parkdist.shp")

### Calculate Block Area

In [2]:
blocks = gpd.read_file("Data/final_data/final_data.shp")

In [7]:
blocks["Area"] = blocks.area

In [8]:
blocks.to_file("Data/final_area/final_area.shp")

### Assign Blocks to Community Area

In [9]:
blocks = gpd.read_file("Data/final_area/final_area.shp")

In [10]:
ca = gpd.read_file("ComArea/ComArea_ACS14_f.shp")

In [14]:
blocks_with_ca = gpd.sjoin(blocks, ca, how="left", op='intersects')

In [16]:
passed_ids = set([])
new_gdf = []
new_df = []

for i, row in blocks_with_ca.iterrows():
    if row["tract_bloc"] not in passed_ids and int(row["tract_bloc"]):
        passed_ids.add(row["tract_bloc"])
        new_gdf.append(row)
    else:
        print("Row " + str(i) + " is a duplicate")

Row 9 is a duplicate
Row 10 is a duplicate
Row 46 is a duplicate
Row 59 is a duplicate
Row 103 is a duplicate
Row 103 is a duplicate
Row 124 is a duplicate
Row 148 is a duplicate
Row 148 is a duplicate
Row 149 is a duplicate
Row 161 is a duplicate
Row 163 is a duplicate
Row 166 is a duplicate
Row 171 is a duplicate
Row 185 is a duplicate
Row 189 is a duplicate
Row 200 is a duplicate
Row 221 is a duplicate
Row 227 is a duplicate
Row 229 is a duplicate
Row 230 is a duplicate
Row 265 is a duplicate
Row 266 is a duplicate
Row 286 is a duplicate
Row 293 is a duplicate
Row 307 is a duplicate
Row 311 is a duplicate
Row 347 is a duplicate
Row 348 is a duplicate
Row 349 is a duplicate
Row 360 is a duplicate
Row 430 is a duplicate
Row 438 is a duplicate
Row 445 is a duplicate
Row 446 is a duplicate
Row 466 is a duplicate
Row 502 is a duplicate
Row 509 is a duplicate
Row 515 is a duplicate
Row 515 is a duplicate
Row 515 is a duplicate
Row 545 is a duplicate
Row 565 is a duplicate
Row 574 is a dup

In [21]:
blocks_with_ca = gpd.GeoDataFrame(new_gdf, columns = list(blocks_with_ca.columns)) #if duplicates found, else skip

In [22]:
blocks_with_ca.to_file("Data/final_commarea/final_commarea.shp")