# Cleaning geospatial boundary data
Here you will find links to access geospatial data about the city of Chicago in its raw form. This notebook also shows exactly how I cleaned this data for use in analysis.

In [33]:
import geopandas as gpd
raw_data_path = "./boundaries_geospatial_raw/"
clean_data_path = "./boundaries_geospatial_clean/"

### Community Areas boundary shape data
Source: https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Community-Areas-current-/cauq-8yn6

Reference Map of Chicago community areas: https://robparal.com/wp-content/uploads/CommunityAreaReferenceMap.pdf

In [55]:
# Chicago community areas
commareas = gpd.read_file(raw_data_path + "chi_commareas_2018/geo_export_a4ae8b3f-a0a1-4b16-a202-eaf896cc4214.shp")

In [56]:
# Check out the attributes in this dataset
commareas.columns

Index(['area', 'area_num_1', 'area_numbe', 'comarea', 'comarea_id',
       'community', 'perimeter', 'shape_area', 'shape_len', 'geometry'],
      dtype='object')

In [57]:
# Remove redundant attributes, rename others
commareas.drop(columns=["area", "area_num_1", "comarea", "comarea_id", "perimeter"], axis=1, inplace=True)
commareas.rename(columns={"area_numbe": "area_num"}, inplace=True)
commareas.columns

Index(['area_num', 'community', 'shape_area', 'shape_len', 'geometry'], dtype='object')

In [58]:
# Adjust dtypes
commareas.area_num = commareas.area_num.astype("int64")
commareas.dtypes

area_num         int64
community       object
shape_area     float64
shape_len      float64
geometry      geometry
dtype: object

In [59]:
# Save cleaned data
commareas.to_file(clean_data_path + r"chi_commareas_2018_clean.shp", index_label = False)

In [62]:
commareas.community.values

array(['DOUGLAS', 'OAKLAND', 'FULLER PARK', 'GRAND BOULEVARD', 'KENWOOD',
       'LINCOLN SQUARE', 'WASHINGTON PARK', 'HYDE PARK', 'WOODLAWN',
       'ROGERS PARK', 'JEFFERSON PARK', 'FOREST GLEN', 'NORTH PARK',
       'ALBANY PARK', 'PORTAGE PARK', 'IRVING PARK', 'DUNNING',
       'MONTCLARE', 'BELMONT CRAGIN', 'WEST RIDGE', 'HERMOSA', 'AVONDALE',
       'LOGAN SQUARE', 'HUMBOLDT PARK', 'WEST TOWN', 'AUSTIN',
       'WEST GARFIELD PARK', 'EAST GARFIELD PARK', 'NEAR WEST SIDE',
       'NORTH LAWNDALE', 'UPTOWN', 'SOUTH LAWNDALE', 'LOWER WEST SIDE',
       'NEAR SOUTH SIDE', 'ARMOUR SQUARE', 'NORWOOD PARK',
       'NEAR NORTH SIDE', 'LOOP', 'SOUTH SHORE', 'CHATHAM', 'AVALON PARK',
       'SOUTH CHICAGO', 'BURNSIDE', 'MCKINLEY PARK', 'LAKE VIEW',
       'CALUMET HEIGHTS', 'ROSELAND', 'NORTH CENTER', 'PULLMAN',
       'SOUTH DEERING', 'EAST SIDE', 'WEST PULLMAN', 'RIVERDALE',
       'HEGEWISCH', 'GARFIELD RIDGE', 'ARCHER HEIGHTS', 'BRIGHTON PARK',
       'BRIDGEPORT', 'NEW CITY', 'WEST EL

### Zip codes boundary shape data
Source: https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-ZIP-Codes/gdcf-axmw

In [63]:
# Chicago zip codes
zip_codes = gpd.read_file(raw_data_path + "chi_zip_codes_2020/geo_export_d4cf5267-d480-4b46-bde6-3996533d98ab.shp")

In [64]:
# Check out the attributes in this dataset
print(zip_codes.columns)
zip_codes

Index(['objectid', 'shape_area', 'shape_len', 'zip', 'geometry'], dtype='object')


Unnamed: 0,objectid,shape_area,shape_len,zip,geometry
0,33.0,1.060523e+08,42720.044406,60647,"POLYGON ((-87.67762 41.91776, -87.67761 41.917..."
1,34.0,1.274761e+08,48103.782721,60639,"POLYGON ((-87.72683 41.92265, -87.72693 41.922..."
2,35.0,4.506904e+07,27288.609612,60707,"POLYGON ((-87.78500 41.90915, -87.78531 41.909..."
3,36.0,7.085383e+07,42527.989679,60622,"POLYGON ((-87.66707 41.88885, -87.66707 41.888..."
4,37.0,9.903962e+07,47970.140153,60651,"POLYGON ((-87.70656 41.89555, -87.70672 41.895..."
...,...,...,...,...,...
56,57.0,1.552855e+08,53406.915617,60623,"POLYGON ((-87.69479 41.83008, -87.69486 41.830..."
57,58.0,2.111148e+08,58701.325375,60629,"POLYGON ((-87.68306 41.75786, -87.68306 41.757..."
58,59.0,2.116961e+08,58466.160298,60620,"POLYGON ((-87.62373 41.72167, -87.62388 41.721..."
59,60.0,1.254243e+08,52377.854541,60637,"POLYGON ((-87.57691 41.79511, -87.57700 41.795..."


In [29]:
# Reorder columns
zip_codes = zip_codes[["zip", "shape_area", "shape_len", "geometry"]]
zip_codes

Unnamed: 0,zip,shape_area,shape_len,geometry
0,60647,1.060523e+08,42720.044406,"POLYGON ((-87.67762 41.91776, -87.67761 41.917..."
1,60639,1.274761e+08,48103.782721,"POLYGON ((-87.72683 41.92265, -87.72693 41.922..."
2,60707,4.506904e+07,27288.609612,"POLYGON ((-87.78500 41.90915, -87.78531 41.909..."
3,60622,7.085383e+07,42527.989679,"POLYGON ((-87.66707 41.88885, -87.66707 41.888..."
4,60651,9.903962e+07,47970.140153,"POLYGON ((-87.70656 41.89555, -87.70672 41.895..."
...,...,...,...,...
56,60623,1.552855e+08,53406.915617,"POLYGON ((-87.69479 41.83008, -87.69486 41.830..."
57,60629,2.111148e+08,58701.325375,"POLYGON ((-87.68306 41.75786, -87.68306 41.757..."
58,60620,2.116961e+08,58466.160298,"POLYGON ((-87.62373 41.72167, -87.62388 41.721..."
59,60637,1.254243e+08,52377.854541,"POLYGON ((-87.57691 41.79511, -87.57700 41.795..."


In [30]:
# Save cleaned data
zip_codes.to_file(clean_data_path + r"chi_zip_2020_clean.shp", index_label = False)

### City Boundary Shape Data
Source: https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-City/ewy2-6yfk

No major cleaning needed at this time. But feel free to use this notebook if you need to clean the data in the future!

In [31]:
# Chicago city boundaries
city = gpd.read_file(raw_data_path + "chi_city_boundaries/geo_export_800a0aed-6a3b-4680-9314-96031821fd31.shp")
city.to_file(clean_data_path + r"chi_city_2017_clean.shp", index_label = False)

### Census Tracts Shape Data
Source: https://data.cityofchicago.org/Facilities-Geographic-Boundaries/Boundaries-Census-Tracts-2010/5jrd-6zik

No major cleaning needed at this time.

In [32]:
# Chicago census tracts
city = gpd.read_file(raw_data_path + "chi_census_tracts_2010/geo_export_7a79e542-afd7-4706-bfac-74bae2f5488c.shp")
city.to_file(clean_data_path + r"chi_census_tracts_2010_clean.shp", index_label = False)