Analysis for 311 flooding story in this notebook.

## LOAD AND CLEAN DATA

In [71]:
import pandas as pd
import geopandas as gpd

In [72]:
# water in basement complaint short code = AAF
basement_source_file = 'https://data.cityofchicago.org/resource/v6vf-nfxy.json?sr_short_code=AAF&$limit=1000000'

# water on street complaint short code = AAE
street_source_file = 'https://data.cityofchicago.org/resource/v6vf-nfxy.json?sr_short_code=AAE&$limit=1000000'

In [73]:
# load basement data
basement = pd.read_json(basement_source_file)
print("Number of rows:", basement.shape[0])
print("Columns:", list(basement.columns))

Number of rows: 36472
Columns: ['sr_number', 'sr_type', 'sr_short_code', 'owner_department', 'status', 'origin', 'created_date', 'last_modified_date', 'closed_date', 'street_address', 'zip_code', 'street_number', 'street_direction', 'street_name', 'street_type', 'duplicate', 'legacy_record', 'community_area', 'ward', 'electricity_grid', 'police_sector', 'police_district', 'police_beat', 'precinct', 'created_hour', 'created_day_of_week', 'created_month', 'x_coordinate', 'y_coordinate', 'latitude', 'longitude', 'location', 'created_department', 'parent_sr_number', 'city', 'state', 'electrical_district', 'sanitation_division_days']


In [7]:
# load street data
street = pd.read_json(street_source_file)
print("Number of rows:", street.shape[0])
print("Columns:", list(street.columns))

Number of rows: 39307
Columns: ['sr_number', 'sr_type', 'sr_short_code', 'owner_department', 'status', 'origin', 'created_date', 'last_modified_date', 'closed_date', 'street_address', 'street_number', 'street_direction', 'street_name', 'street_type', 'duplicate', 'legacy_record', 'community_area', 'ward', 'electricity_grid', 'police_sector', 'police_district', 'police_beat', 'precinct', 'created_hour', 'created_day_of_week', 'created_month', 'x_coordinate', 'y_coordinate', 'latitude', 'longitude', 'location', 'created_department', 'zip_code', 'parent_sr_number', 'city', 'state', 'electrical_district', 'sanitation_division_days']


In [74]:
# append the two datasets
complaints = pd.concat([basement,street], ignore_index=True)

In [75]:
# change data types and add time cols
for date_col in ['created_date', 'last_modified_date', 'closed_date']:
    complaints[date_col] = pd.to_datetime(complaints[date_col])
    complaints['created_year'] = complaints['created_date'].dt.year
    complaints['created_month'] = complaints['created_date'].dt.month
    complaints['created_month_year'] = complaints['created_date'].dt.strftime('%m/%Y')
    complaints['created_day'] = complaints['created_date'].dt.strftime('%Y-%m-%d')
    
for date_col in ['created_date', 'last_modified_date', 'closed_date']:
    street[date_col] = pd.to_datetime(street[date_col])
    street['created_year'] = street['created_date'].dt.year
    street['created_month'] = street['created_date'].dt.month
    street['created_month_year'] = street['created_date'].dt.strftime('%m/%Y')
    street['created_day'] = street['created_date'].dt.strftime('%Y-%m-%d')
    
for date_col in ['created_date', 'last_modified_date', 'closed_date']:
    basement[date_col] = pd.to_datetime(basement[date_col])
    basement['created_year'] = basement['created_date'].dt.year
    basement['created_month'] = basement['created_date'].dt.month
    basement['created_month_year'] = basement['created_date'].dt.strftime('%m/%Y')
    basement['created_day'] = basement['created_date'].dt.strftime('%Y-%m-%d')

In [76]:
# first and last basement flooding complaint date?
basement.sort_values('created_date', ascending=True).tail(1)

Unnamed: 0,sr_number,sr_type,sr_short_code,owner_department,status,origin,created_date,last_modified_date,closed_date,street_address,...,location,created_department,parent_sr_number,city,state,electrical_district,sanitation_division_days,created_year,created_month_year,created_day
19382,SR23-01169952,Water in Basement Complaint,AAF,DWM - Department of Water Management,Open,Alderman's Office,2023-07-19 12:13:57,2023-07-19 12:13:59,NaT,5019 S ROCKWELL ST,...,"{'latitude': '41.80223700094014', 'longitude':...",Alderman,,Chicago,Illinois,,,2023,07/2023,2023-07-19


In [77]:
basement.sort_values('created_date', ascending=True).head(1)

Unnamed: 0,sr_number,sr_type,sr_short_code,owner_department,status,origin,created_date,last_modified_date,closed_date,street_address,...,location,created_department,parent_sr_number,city,state,electrical_district,sanitation_division_days,created_year,created_month_year,created_day
14994,SR18-00201178,Water in Basement Complaint,AAF,DWM - Department of Water Management,Completed,Phone Call,2018-12-19 18:02:57,2020-10-30 08:50:06,2020-10-30 08:50:06,5402 S JUSTINE ST,...,"{'latitude': '41.795669', 'longitude': '-87.66...",,,,,11.0,Wednesday,2018,12/2018,2018-12-19


In [12]:
# first and last street flooding complaint date?
street.sort_values('created_date', ascending=True).tail(1)

Unnamed: 0,sr_number,sr_type,sr_short_code,owner_department,status,origin,created_date,last_modified_date,closed_date,street_address,...,created_department,zip_code,parent_sr_number,city,state,electrical_district,sanitation_division_days,created_year,created_month_year,created_day
3582,SR23-01164849,Water On Street Complaint,AAE,DWM - Department of Water Management,Open,Internet,2023-07-18 17:59:14,2023-07-18 18:02:42,NaT,3853 N AVERS AVE,...,,60618.0,SR23-01042408,Chicago,Illinois,,,2023,07/2023,2023-07-18


In [78]:
street.sort_values('created_date', ascending=True).head(1)

Unnamed: 0,sr_number,sr_type,sr_short_code,owner_department,status,origin,created_date,last_modified_date,closed_date,street_address,...,parent_sr_number,city,state,electrical_district,sanitation_division_days,created_year,created_month_year,created_day,community,area_num_1
9616,SR19-00001023,Water On Street Complaint,AAE,DWM - Department of Water Management,Completed,Mobile Device,2019-01-01 10:33:02,2020-02-13 18:11:15,2019-12-05 09:37:55,4638 N MONTICELLO AVE,...,,,,1.0,Friday,2019,01/2019,2019-01-01,ALBANY PARK,14.0


In [79]:
# check date data for completeness
print("Missing created date:", complaints['created_date'].isna().sum())
print("Missing last modified date:", complaints['last_modified_date'].isna().sum())
print("Missing closed date:", complaints['closed_date'].isna().sum())
print("Missing community area:", complaints['community_area'].isna().sum())

Missing created date: 0
Missing last modified date: 0
Missing closed date: 1085
Missing community area: 31


In [80]:
# add community area name
areas_gdf = gpd.read_file('../geographies/Boundaries - Community Areas (current).geojson')
areas_df = areas_gdf[['community', 'area_num_1']].copy()
areas_df['area_num_1'] = areas_df['area_num_1'].astype(float)

# merge with complaints
complaints = complaints.merge(areas_df, left_on = 'community_area', right_on = 'area_num_1')

# merge with streets and basements
street = street.merge(areas_df, left_on = 'community_area', right_on = 'area_num_1')
basement = basement.merge(areas_df, left_on = 'community_area', right_on = 'area_num_1')

In [81]:
# make dataframes just start in 2019
basement = basement[basement['created_date'] > '2019-01-01'].copy()
street = street[street['created_date'] > '2019-01-01'].copy()
complaints = complaints[complaints['created_date'] > '2019-01-01'].copy()

## FINDINGS

### Top level

In [82]:
# how many basement vs street flooding complaints in total have there been?
g = complaints.groupby('sr_type').size().reset_index()
g['pct'] = g[0]/g[0].sum()
g

Unnamed: 0,sr_type,0,pct
0,Water On Street Complaint,39176,0.518112
1,Water in Basement Complaint,36437,0.481888


In [83]:
len(complaints)

75613

In [84]:
# aggregate by day
complaints_by_day = complaints.groupby('created_day').size().reset_index(name='complaints')

July 5th topped July 2nd for the most complaints in the last four years.

In [85]:
# top ten complaint days?
complaints_by_day.sort_values('complaints', ascending=False).head(10)

Unnamed: 0,created_day,complaints
1634,2023-07-05,2286
1631,2023-07-02,2003
490,2020-05-17,1767
1632,2023-07-03,1510
1337,2022-09-11,1457
1635,2023-07-06,1392
1641,2023-07-12,1194
1640,2023-07-11,996
1633,2023-07-04,932
1636,2023-07-07,875


In [86]:
# aggregate by month
complaints_by_month = complaints.groupby('created_month_year').size().reset_index(name='complaints')

In [89]:
complaints_by_month.sort_values('complaints', ascending=False).head(10)

Unnamed: 0,created_month_year,complaints
34,07/2023,14363
21,05/2020,5577
20,05/2019,3762
42,09/2022,3699
27,06/2021,2873
9,02/2023,2196
33,07/2022,2082
25,06/2019,1819
15,04/2019,1772
23,05/2022,1724


There were more than double the number of basement flooding and street flooding complaints in the first half of July than the next highest month in the last four years. 

In [88]:
# July 2023 (as of July 19) out of the next highest month which was May 2020
14363/5577

2.5753989600143448

In [90]:
# aggregate by year
complaints_by_year = complaints.groupby('created_year').size().reset_index(name='complaints')

In [91]:
complaints_by_year.sort_values('complaints', ascending=False).head(10)

Unnamed: 0,created_year,complaints
4,2023,20698
3,2022,15608
1,2020,14830
0,2019,14507
2,2021,9970


In [92]:
# aggregate basement complaints by day
basement_by_day = basement.groupby('created_day').size().reset_index(name='complaints')

There were more than 2000 basement complaints on July 5th.

In [93]:
# top 10 basement complaints by day
basement_by_day.sort_values('complaints', ascending=False).head(10)

Unnamed: 0,created_day,complaints
1582,2023-07-05,2090
1579,2023-07-02,1483
1580,2023-07-03,1410
1583,2023-07-06,1237
1289,2022-09-11,1081
1581,2023-07-04,907
1589,2023-07-12,857
1584,2023-07-07,818
453,2020-05-17,713
1587,2023-07-10,697


In [94]:
basement_by_day.describe()

Unnamed: 0,complaints
count,1597.0
mean,22.815905
std,103.387517
min,1.0
25%,4.0
50%,8.0
75%,12.0
max,2090.0


In [95]:
# aggregate basement complaints by month
basement_by_month = basement.groupby('created_month_year').size().reset_index(name='complaints')

No other month since 2019, the earliest year 311 flooding data is available publicly, comes close to July’s current volume of reports. 


In [105]:
12174/2988

4.07429718875502

In [96]:
# top 10 basement complaints by month
basement_by_month.sort_values('complaints', ascending=False).head(10)

Unnamed: 0,created_month_year,complaints
34,07/2023,12174
42,09/2022,2988
21,05/2020,2785
33,07/2022,1316
20,05/2019,1312
27,06/2021,1110
16,04/2020,623
9,02/2023,598
43,10/2019,595
15,04/2019,594


In [31]:
# aggregate basement complaints by year
basement_by_year = basement.groupby('created_year').size().reset_index(name='complaints')

In [97]:
# basement complaints by year
basement_by_year.sort_values('complaints', ascending=False)

Unnamed: 0,created_year,complaints
4,2023,14042
3,2022,7401
1,2020,6202
0,2019,4517
2,2021,4173


There were nearly the same number of complaints about basement flooding in this month alone than in the last two years combined

In [34]:
# number of basement flooding complaints in 2022 and 2021
7401 + 4173

11574

In [99]:
12174 - 11574

600

Green wasn’t alone. On July 2, 2023, a record-setting rainstorm dumped 9 inches of rain on some parts of the Chicago area. Over 1,400 Chicagoans filed reports of flooded basements to 311, the city’s non-emergency helpline. 


In [100]:
# aggregate basement flooding complaints by day for July 2023

july_basement_by_day = basement[basement['created_month_year'] == '07/2023'].groupby('created_day').size().reset_index(name='complaints')

july_basement_by_day['pct'] = july_basement_by_day['complaints']/july_basement_by_day['complaints'].sum()
july_basement_by_day

Unnamed: 0,created_day,complaints,pct
0,2023-07-01,13,0.001068
1,2023-07-02,1483,0.121817
2,2023-07-03,1410,0.115821
3,2023-07-04,907,0.074503
4,2023-07-05,2090,0.171677
5,2023-07-06,1237,0.10161
6,2023-07-07,818,0.067192
7,2023-07-08,311,0.025546
8,2023-07-09,207,0.017003
9,2023-07-10,697,0.057253


From July 2 to July 18, over 12,000 basement flooding reports were filed with 311

In [102]:
# get july 2 through july 18 
july_basement_by_day['complaints'].sum() - 13 - 64

12097

In [103]:
# aggregate street complaints by month
street_by_month = street.groupby('created_month_year').size().reset_index(name='complaints')

In [106]:
# export monthly counts for basement and street flooding to csv for FLOURISH

# sort by date
basement_by_month = basement_by_month.sort_values('created_month_year')
street_by_month = street_by_month.sort_values('created_month_year')
complaints_by_month = street_by_month.sort_values('created_month_year')

# export to csv
basement_by_month.to_csv('processed/basement_by_month_7.19.23.csv')
street_by_month.to_csv('processed/street_by_month_7.19.23.csv')
complaints_by_month.to_csv('processed/complaints_by_month_7.19.23.csv')

# THEN COPY INTO GOOGLE SHEETS TO SORT BY DATE FOR FLOURISH

That represents more than 8,000 unique addresses in just July 2023.

In [107]:
# de-dupe basement complaints by address
basement[basement['created_month_year'] == '07/2023']['street_address'].nunique()

10029

In [108]:
# percent that are unique calls
basement[basement['created_month_year'] == '07/2023']['street_address'].nunique() / len(basement[basement['created_month_year'] == '07/2023'])

0.8238048299655002

In [109]:
july = basement[basement['created_month_year'] == '07/2023'].copy()

In [110]:
# aggregate street complaints by month
street_by_month = street.groupby('created_month_year').size().reset_index(name='complaints')

Storms in May of 2020 and 2019 produced more water on street complaints. 

In [111]:
street_by_month.sort_values('complaints', ascending=False).head(10)

Unnamed: 0,created_month_year,complaints
21,05/2020,2792
20,05/2019,2450
34,07/2023,2189
27,06/2021,1763
9,02/2023,1598
25,06/2019,1372
23,05/2022,1351
15,04/2019,1178
45,10/2021,1167
18,04/2022,1077


### Ranking of community areas - OLD

In [142]:
basement_top_10_daylist = b['created_day'].tolist()
street_top_10_daylist = s['created_day'].tolist()

In [143]:
# create a df for just top 10 days
top_10_days_basement_df = basement[basement['created_day'].isin(basement_top_10_daylist)]
top_10_days_street_df = street[street['created_day'].isin(street_top_10_daylist)]

In [144]:
# check
len(top_10_days_street_df)

4764

In [145]:
len(top_10_days_basement_df)

11105

In [146]:
# create a pivot for top 10 rain event days and CA
basement_table = pd.pivot_table(top_10_days_basement_df, values='sr_number', index=['community'],
                       columns=['created_day'], aggfunc='count')

In [147]:
basement_table['total'] = basement_table.sum(axis=1)

It does not seem like there's one CA across different rain event days that has gotten more basement flooding than others.

In [148]:
basement_table.sort_values('total', ascending=False).head(15)

created_day,2020-05-17,2022-09-11,2022-09-12,2023-07-02,2023-07-03,2023-07-04,2023-07-05,2023-07-06,2023-07-07,2023-07-10,total
community,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AUSTIN,65.0,6.0,4.0,465.0,475.0,468.0,872.0,546.0,399.0,346.0,3646.0
PORTAGE PARK,37.0,322.0,201.0,195.0,95.0,36.0,53.0,32.0,15.0,19.0,1005.0
WEST GARFIELD PARK,15.0,2.0,1.0,86.0,84.0,63.0,164.0,58.0,53.0,79.0,605.0
HUMBOLDT PARK,19.0,6.0,1.0,59.0,58.0,51.0,109.0,58.0,36.0,28.0,425.0
BELMONT CRAGIN,12.0,8.0,6.0,139.0,57.0,31.0,45.0,26.0,27.0,26.0,377.0
NORTH LAWNDALE,11.0,,1.0,47.0,40.0,32.0,114.0,41.0,28.0,22.0,336.0
AVALON PARK,4.0,,,34.0,120.0,35.0,47.0,26.0,15.0,9.0,290.0
CALUMET HEIGHTS,16.0,,,28.0,57.0,30.0,47.0,27.0,13.0,11.0,229.0
WEST RIDGE,9.0,71.0,123.0,7.0,2.0,,4.0,1.0,1.0,,218.0
EDGEWATER,4.0,149.0,54.0,2.0,,,3.0,2.0,,,214.0


In [149]:
# create a pivot for year and CA
basement_year = pd.pivot_table(basement, values='sr_number', index=['community'],
                       columns=['created_year'], aggfunc='count')

In [150]:
basement_year.sort_values(2023, ascending=False)

created_year,2018,2019,2020,2021,2022,2023
community,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
AUSTIN,,208.0,421.0,317.0,215.0,4458.0
WEST GARFIELD PARK,,22.0,51.0,31.0,32.0,785.0
HUMBOLDT PARK,,68.0,103.0,130.0,77.0,520.0
PORTAGE PARK,,106.0,187.0,86.0,856.0,516.0
BELMONT CRAGIN,,89.0,118.0,109.0,102.0,468.0
...,...,...,...,...,...,...
FULLER PARK,,12.0,8.0,8.0,5.0,8.0
WASHINGTON PARK,,14.0,24.0,11.0,15.0,7.0
MCKINLEY PARK,,17.0,20.0,15.0,19.0,6.0
OAKLAND,,3.0,13.0,4.0,6.0,3.0


In [151]:
# add CA ranks + avg rank
basement_year['2019_rank'] = basement_year[2019].rank(ascending=False)
basement_year['2020_rank'] = basement_year[2020].rank(ascending=False)
basement_year['2021_rank'] = basement_year[2021].rank(ascending=False)
basement_year['2022_rank'] = basement_year[2022].rank(ascending=False)
basement_year['2023_rank'] = basement_year[2023].rank(ascending=False)
basement_year['average_rank'] = basement_year[['2019_rank', '2020_rank', '2021_rank', '2022_rank','2023_rank']].mean(axis=1)

In [153]:
# create total columns
basement_year['total'] = basement_year[2019] + basement_year[2020] + basement_year[2021] + basement_year[2022] + basement_year[2023]
street_year['total'] = street_year[2019] + street_year[2020] + street_year[2021] + street_year[2022] + street_year[2023]

South side neighborhoods on average across the last 4.5 years ranked the highest for basement flooding complaints.

In [171]:
basement_year.sort_values('total', ascending=False).head(20)

created_year,2018,2019,2020,2021,2022,2023,2019_rank,2020_rank,2021_rank,2022_rank,2023_rank,average_rank,total
community,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
AUSTIN,,208.0,421.0,317.0,215.0,4458.0,5.0,1.0,1.0,7.0,1.0,3.0,5619.0
PORTAGE PARK,,106.0,187.0,86.0,856.0,516.0,13.0,9.0,19.0,1.0,4.0,9.2,1751.0
ROSELAND,,250.0,283.0,219.0,180.0,295.0,2.0,3.0,2.0,12.0,8.0,5.4,1227.0
AUBURN GRESHAM,,232.0,215.0,147.0,207.0,138.0,4.0,6.0,4.0,8.5,18.0,8.1,939.0
MOUNT GREENWOOD,,79.0,31.0,13.0,738.0,60.0,18.0,57.0,66.0,2.0,36.5,35.9,921.0
WEST GARFIELD PARK,,22.0,51.0,31.0,32.0,785.0,55.0,40.0,46.5,49.5,2.0,38.6,921.0
HUMBOLDT PARK,,68.0,103.0,130.0,77.0,520.0,21.5,19.0,5.0,28.5,3.0,15.4,898.0
BELMONT CRAGIN,,89.0,118.0,109.0,102.0,468.0,17.0,16.0,7.0,19.0,5.0,12.8,886.0
WASHINGTON HEIGHTS,,239.0,242.0,96.0,133.0,125.0,3.0,5.0,15.5,17.0,20.0,12.1,835.0
WEST PULLMAN,,123.0,195.0,148.0,150.0,200.0,9.0,7.0,3.0,15.0,11.0,9.0,816.0


In [172]:
basement_year['total'].describe()

count      77.000000
mean      448.857143
std       678.045405
min         9.000000
25%       133.000000
50%       267.000000
75%       585.000000
max      5619.000000
Name: total, dtype: float64

In [155]:
street_year = pd.pivot_table(street, values='sr_number', index=['community'],
                       columns=['created_year'], aggfunc='count')

In [156]:
# add CA ranks + avg rank
street_year['2019_rank'] = street_year[2019].rank(ascending=False)
street_year['2020_rank'] = street_year[2020].rank(ascending=False)
street_year['2021_rank'] = street_year[2021].rank(ascending=False)
street_year['2022_rank'] = street_year[2022].rank(ascending=False)
street_year['2023_rank'] = street_year[2023].rank(ascending=False)
street_year['average_rank'] = street_year[['2019_rank', '2020_rank', '2021_rank', '2022_rank','2023_rank']].mean(axis=1)

West Town ranked first in the number of street flooding complaints in the last 4.5 years. After that it's mostly north side and northwest side neighborhoods.

In [157]:
street_year.sort_values('average_rank').head(10)

created_year,2018,2019,2020,2021,2022,2023,2019_rank,2020_rank,2021_rank,2022_rank,2023_rank,average_rank
community,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
WEST TOWN,2.0,473.0,394.0,251.0,386.0,277.0,1.0,1.0,1.0,1.0,2.0,1.2
AUSTIN,1.0,382.0,322.0,199.0,291.0,283.0,2.0,2.0,3.0,3.0,1.0,2.2
LAKE VIEW,4.0,380.0,308.0,176.0,265.0,193.0,3.0,4.0,7.0,4.5,5.0,4.7
LINCOLN PARK,1.0,325.0,239.0,193.0,293.0,180.0,4.0,8.5,5.0,2.0,6.0,5.1
LOGAN SQUARE,3.0,261.0,321.0,192.0,245.0,244.0,9.0,3.0,6.0,7.0,3.0,5.6
BELMONT CRAGIN,1.0,284.0,299.0,195.0,205.0,234.0,5.0,6.0,4.0,13.0,4.0,6.4
PORTAGE PARK,,236.0,200.0,153.0,265.0,167.0,12.0,11.0,9.0,4.5,7.0,8.7
IRVING PARK,,223.0,239.0,205.0,240.0,142.0,14.0,8.5,2.0,8.0,11.0,8.7
WEST RIDGE,2.0,271.0,307.0,154.0,260.0,110.0,6.0,5.0,8.0,6.0,20.0,9.0
LINCOLN SQUARE,,266.0,278.0,136.0,226.0,111.0,7.0,7.0,12.0,10.0,19.0,11.0


In [158]:
# export as csv
basement_year.to_csv('processed/basement_year_7.12.23.csv')

In [161]:
# where did all of the basement flooding happen in July 2023?
g = basement[basement['created_month_year'] == '07/2023'].groupby('community').size().reset_index(name = 'count').sort_values('count', ascending=False).head(10)
g['pct'] = g['count']/g['count'].sum()
g

Unnamed: 0,community,count,pct
5,AUSTIN,4324,0.557504
71,WEST GARFIELD PARK,761,0.098118
32,HUMBOLDT PARK,489,0.063048
57,PORTAGE PARK,473,0.060985
8,BELMONT CRAGIN,425,0.054796
52,NORTH LAWNDALE,394,0.050799
6,AVALON PARK,306,0.039453
13,CALUMET HEIGHTS,228,0.029397
61,ROSELAND,184,0.023724
14,CHATHAM,172,0.022176


### Chronic flooding addresses

In [112]:
# chronic flooding addresses by the number of days
basement_chronic_addresses = basement.groupby('street_address')['created_day'].nunique().reset_index(name='count of days').sort_values('count of days', ascending=False)
basement_chronic_addresses.head(10)

Unnamed: 0,street_address,count of days
4742,1517 N LOCKWOOD AVE,12
22419,7224 S COTTAGE GROVE AVE,10
5452,1654 N SPRINGFIELD AVE,10
24715,832 E 87TH PL,9
21682,6806 S CHAPPEL AVE,9
27101,9611 S UNION AVE,9
15103,4930 W RACE AVE,9
11352,3912 N CLARK ST,9
2219,11226 S VERNON AVE,8
23638,7819 S UNION AVE,8


In [113]:
# number of days
basement_chronic_addresses['count of days'].sum()

34828

In [114]:
# how many addresses have called more than twice?
len(basement_chronic_addresses[basement_chronic_addresses['count of days'] > 1])

5209

In [115]:
# total number of unique addresses
basement['street_address'].nunique()

27571

In [116]:
# percent of addresses
len(basement_chronic_addresses[basement_chronic_addresses['count of days'] > 1]) / basement['street_address'].nunique()

0.18893039788183236

In [117]:
# figure out what community areas they are on
basement_chronic_addresses = basement_chronic_addresses.merge(basement[['street_address', 'community']].drop_duplicates('street_address'), on = 'street_address', how = 'left')

In [118]:
basement_chronic_addresses['count of days'].sum()

34828

In [121]:
# export list of addresses
basement_chronic_addresses.to_csv('processed/basement_chronic_addresses_7.19.23.csv')

In [122]:
# how many households are in what community area?
g = basement_chronic_addresses.groupby('community').size().reset_index(name = 'count of address').sort_values('count of address', ascending=False)
g['pct'] = g['count of address']/g['count of address'].sum()
g.head(30)

Unnamed: 0,community,count of address,pct
5,AUSTIN,4523,0.164049
57,PORTAGE PARK,1304,0.047296
61,ROSELAND,899,0.032607
8,BELMONT CRAGIN,802,0.029089
32,HUMBOLDT PARK,773,0.028037
46,MOUNT GREENWOOD,752,0.027275
71,WEST GARFIELD PARK,748,0.02713
4,AUBURN GRESHAM,688,0.024954
67,WASHINGTON HEIGHTS,610,0.022125
73,WEST PULLMAN,596,0.021617


In [123]:
# export grouped data for flourish
g.to_csv('processed/multi_flood_addresses_by_ca_7.19.23.csv')

In [124]:
# chronic flooding addresses by the year
basement_chronic_addresses_year = basement.groupby('street_address')['created_year'].nunique().reset_index(name='count of years').sort_values('count of years', ascending=False)

# take only addresses that have flooded more  than one year
basement_chronic_addresses_year = basement_chronic_addresses_year[basement_chronic_addresses_year['count of years'] > 1].copy()

basement_chronic_addresses_year

Unnamed: 0,street_address,count of years
15103,4930 W RACE AVE,5
5452,1654 N SPRINGFIELD AVE,5
2210,11221 S DR MARTIN LUTHER KING JR DR,4
1112,10605 S KEDZIE AVE,4
10419,3538 N RETA AVE,4
...,...,...
8346,2642 N NEW ENGLAND AVE,2
6862,212 E 89TH PL,2
13487,4605 N LAVERGNE AVE,2
26739,936 N LOCKWOOD AVE,2


In [125]:
# number of multi year addresses
len(basement_chronic_addresses_year)

2180

In [126]:
# total number of unique addresses
basement['street_address'].nunique()

27571

In [127]:
# percent 
len(basement_chronic_addresses_year)/basement['street_address'].nunique()

0.07906858655834029

A small percentage of addresses reported basement flooding to 311 almost every year in the last five years.

In [128]:
# figure out what percentage of these chronic addresses flooded nearly every year
len(basement_chronic_addresses_year[basement_chronic_addresses_year['count of years'] > 2])

255

In [129]:
# chronic flooding addresses by the month
basement_chronic_addresses_year = basement.groupby('street_address')['created_month_year'].nunique().reset_index(name='count of months').sort_values('count of months', ascending=False)

# take only addresses that have flooded more  than one year
basement_chronic_addresses_month = basement_chronic_addresses_year[basement_chronic_addresses_year['count of months'] > 1].copy()

basement_chronic_addresses_month

Unnamed: 0,street_address,count of months
21682,6806 S CHAPPEL AVE,8
15103,4930 W RACE AVE,8
22419,7224 S COTTAGE GROVE AVE,8
24715,832 E 87TH PL,8
27101,9611 S UNION AVE,7
...,...,...
1878,11029 S MILLARD AVE,2
19471,5917 W NEWPORT AVE,2
8392,2657 S KOLIN AVE,2
15802,5038 W DAKIN ST,2


### Communities reporting the most basement floods

In [130]:
basement_by_ca = basement.groupby('community').size().reset_index(name='count').sort_values('count', ascending=False)
basement_by_ca.head(10)

Unnamed: 0,community,count
5,AUSTIN,6396
57,PORTAGE PARK,1786
61,ROSELAND,1253
71,WEST GARFIELD PARK,1047
32,HUMBOLDT PARK,1004
8,BELMONT CRAGIN,996
4,AUBURN GRESHAM,965
46,MOUNT GREENWOOD,926
67,WASHINGTON HEIGHTS,852
73,WEST PULLMAN,841


In [131]:
basement_by_ca.describe()

Unnamed: 0,count
count,77.0
mean,473.207792
std,761.897101
min,9.0
25%,140.0
50%,283.0
75%,596.0
max,6396.0


In [132]:
# add in the population
ca = pd.read_csv('../geographies/cmap_community_area_pop_2020.csv')

In [133]:
# make geog upper case
ca['community'] = ca['GEOG'].str.upper()

In [134]:
# add in race pcts
ca['PctLatino'] = ca['HISP']/ca['TOT_POP']
ca['PctWhite'] = ca['WHITE']/ca['TOT_POP']
ca['PctBlack'] = ca['BLACK']/ca['TOT_POP']
ca['PctAsian'] = ca['ASIAN']/ca['TOT_POP']

In [135]:
# add majority race
ca['Majority'] = ''

for index, row in ca.iterrows():
    if ca['PctLatino'][index] > 0.5:
        ca['Majority'][index] = 'Latino'
    elif ca['PctWhite'][index] > 0.5:
        ca['Majority'][index] = 'White'
    elif ca['PctBlack'][index] > 0.5:
        ca['Majority'][index] = 'Black'
    elif ca['PctAsian'][index] > 0.5:
        ca['Majority'][index] = 'Asian'
    else:
        ca['Majority'][index] = 'None'

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ca['Majority'][index] = 'None'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ca['Majority'][index] = 'White'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ca['Majority'][index] = 'Latino'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ca['Majority'][index] = 'Black'
A value is trying to be set on a copy of

In [136]:
ca.tail()

Unnamed: 0,OBJECTID,GEOID,GEOG,TOT_POP,POP_HH,POP_GQ,HISP,WHITE,BLACK,ASIAN,...,HU_TOT,TOT_HH,VAC_HU,HH_SIZE,community,PctLatino,PctWhite,PctBlack,PctAsian,Majority
72,73,73,Washington Heights,25065,24901,164,544,95,23849,31,...,10451,9538,913,2.610715,WASHINGTON HEIGHTS,0.021704,0.00379,0.951486,0.001237,Black
73,74,74,Mount Greenwood,18628,18280,348,1994,15256,753,116,...,7298,6850,448,2.668613,MOUNT GREENWOOD,0.107043,0.818982,0.040423,0.006227,White
74,75,75,Morgan Park,21186,20636,550,964,5838,13649,73,...,8865,8175,690,2.524281,MORGAN PARK,0.045502,0.275559,0.644246,0.003446,Black
75,76,76,O'Hare,13418,13407,11,1492,9174,385,1958,...,6560,6158,402,2.177168,O'HARE,0.111194,0.683708,0.028693,0.145923,White
76,77,77,Edgewater,56296,53265,3031,8466,30962,6820,7316,...,33216,30466,2750,1.748342,EDGEWATER,0.150384,0.549986,0.121145,0.129956,White


In [137]:
# merge pop and majority
basement_by_ca = basement_by_ca.merge(ca[['community', 'TOT_POP', 'Majority']], on='community')

In [138]:
basement_by_ca['count per 1k'] = (basement_by_ca['count']/basement_by_ca['TOT_POP'])*1000

In [139]:
basement_by_ca.sort_values('count per 1k', ascending=False)

Unnamed: 0,community,count,TOT_POP,Majority,count per 1k
0,AUSTIN,6396,96557,Black,66.240666
20,AVALON PARK,574,9458,Black,60.689364
3,WEST GARFIELD PARK,1047,17433,Black,60.058510
7,MOUNT GREENWOOD,926,18628,White,49.710114
22,CALUMET HEIGHTS,547,13088,Black,41.794010
...,...,...,...,...,...
54,NEAR WEST SIDE,167,67881,,2.460188
68,NEAR SOUTH SIDE,60,28795,,2.083695
46,LAKE VIEW,200,103050,White,1.940805
70,HYDE PARK,56,29456,,1.901141


Since 2019, the communities with the most flooded basement reports were Austin, Portage Park, Roseland, and Garfield Park. All have had more than 1,000 reports filed. 

In [140]:
basement_by_ca.sort_values('count', ascending=False)

Unnamed: 0,community,count,TOT_POP,Majority,count per 1k
0,AUSTIN,6396,96557,Black,66.240666
1,PORTAGE PARK,1786,63020,,28.340209
2,ROSELAND,1253,38816,Black,32.280503
3,WEST GARFIELD PARK,1047,17433,Black,60.058510
4,HUMBOLDT PARK,1004,54165,Latino,18.535955
...,...,...,...,...,...
71,RIVERDALE,54,7262,Black,7.435968
72,BURNSIDE,53,2527,Black,20.973486
73,ARMOUR SQUARE,50,13890,Asian,3.599712
74,FULLER PARK,41,2567,Black,15.971952


In [147]:
# export basement_by_ca for flourish
basement_by_ca.to_csv('processed/basement_by_community_7.19.23.csv')

In [144]:
# group by majority race
g = basement_by_ca.groupby('Majority')['count'].sum().reset_index(name='count')
g['pct'] = g['count']/g['count'].sum()
g

Unnamed: 0,Majority,count,pct
0,Asian,50,0.001373
1,Black,18781,0.515565
2,Latino,5937,0.162979
3,,5065,0.139041
4,White,6595,0.181042


About four in ten reports of flooded basements to 311 in July came from the Austin neighborhood, according to WBEZ’s analysis.

In [146]:
j = basement[basement['created_month_year'] == '07/2023'].groupby('community').size().reset_index(name='count').sort_values('count', ascending=False)
j['pct'] = j['count']/j['count'].sum()
j.head(10)

Unnamed: 0,community,count,pct
5,AUSTIN,5101,0.419008
71,WEST GARFIELD PARK,887,0.07286
32,HUMBOLDT PARK,595,0.048875
8,BELMONT CRAGIN,535,0.043946
57,PORTAGE PARK,508,0.041728
52,NORTH LAWNDALE,484,0.039757
6,AVALON PARK,367,0.030146
13,CALUMET HEIGHTS,248,0.020371
64,SOUTH LAWNDALE,216,0.017743
61,ROSELAND,210,0.01725
