In [27]:
import geopandas as gpd
import pandas as pd
import os

In [28]:
# schema
col_names = {
    'COMMUNITY': 'Community',
    'AREA_NUMBE': 'AreaNumber'
}

# load stations dataframe
filepath = 'C:\\Users\\estel\\Documents\\python\\projects\\cta\\data\\bronze\\CommAreas_20250202.parquet'
gdf = gpd.read_parquet(filepath, columns=['geometry', 'COMMUNITY', 'AREA_NUMBE']).rename(columns=col_names)

print('Loaded', len(gdf), 'rows')
print(gdf.dtypes)
display(gdf.head())

Loaded 77 rows
geometry      geometry
Community       object
AreaNumber       int64
dtype: object


Unnamed: 0,geometry,Community,AreaNumber
0,"MULTIPOLYGON (((-87.60914 41.84469, -87.60915 ...",DOUGLAS,35
1,"MULTIPOLYGON (((-87.59215 41.81693, -87.59231 ...",OAKLAND,36
2,"MULTIPOLYGON (((-87.6288 41.80189, -87.62879 4...",FULLER PARK,37
3,"MULTIPOLYGON (((-87.60671 41.81681, -87.6067 4...",GRAND BOULEVARD,38
4,"MULTIPOLYGON (((-87.59215 41.81693, -87.59215 ...",KENWOOD,39


In [29]:
# quick cleanup for readability and correct dtypes
gdf['Community'] = gdf.Community.apply(lambda x: ' '.join(word.capitalize() for word in x.split(' ')))
gdf['AreaNumber'] = gdf.AreaNumber.astype(int)

display(gdf.head())

Unnamed: 0,geometry,Community,AreaNumber
0,"MULTIPOLYGON (((-87.60914 41.84469, -87.60915 ...",Douglas,35
1,"MULTIPOLYGON (((-87.59215 41.81693, -87.59231 ...",Oakland,36
2,"MULTIPOLYGON (((-87.6288 41.80189, -87.62879 4...",Fuller Park,37
3,"MULTIPOLYGON (((-87.60671 41.81681, -87.6067 4...",Grand Boulevard,38
4,"MULTIPOLYGON (((-87.59215 41.81693, -87.59215 ...",Kenwood,39


In [30]:
# drop no longer needed columns
select_cols = [
    'AreaNumber',
    'Community',
    'geometry'
]
gdf = gdf[select_cols]

display(gdf)

Unnamed: 0,AreaNumber,Community,geometry
0,35,Douglas,"MULTIPOLYGON (((-87.60914 41.84469, -87.60915 ..."
1,36,Oakland,"MULTIPOLYGON (((-87.59215 41.81693, -87.59231 ..."
2,37,Fuller Park,"MULTIPOLYGON (((-87.6288 41.80189, -87.62879 4..."
3,38,Grand Boulevard,"MULTIPOLYGON (((-87.60671 41.81681, -87.6067 4..."
4,39,Kenwood,"MULTIPOLYGON (((-87.59215 41.81693, -87.59215 ..."
...,...,...,...
72,74,Mount Greenwood,"MULTIPOLYGON (((-87.69646 41.70714, -87.69644 ..."
73,75,Morgan Park,"MULTIPOLYGON (((-87.64215 41.68508, -87.64249 ..."
74,76,Ohare,"MULTIPOLYGON (((-87.83658 41.9864, -87.83658 4..."
75,77,Edgewater,"MULTIPOLYGON (((-87.65456 41.99817, -87.65456 ..."


In [32]:
# save to file as a geojson for easier plotting
filepath = 'C:\\Users\\estel\\Documents\\python\\projects\\cta\\data\\silver\\'
filename = 'DimCommunityArea.json'
gdf.to_file(filepath+filename, driver='GeoJSON')

# verify
os.listdir(filepath)

['DimCommunityArea.json',
 'DimCommunityArea.parquet',
 'DimParkRide.parquet',
 'DimRailLine.parquet',
 'DimRailStation.parquet',
 'FactStationEntries.parquet']