# Health Data
https://chicagohealthatlas.org/how-to

And here is the API: https://chicagohealthatlas.org/api/v1/

In [2]:
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
path = "./" # Your path to crocus_heat/datasets/
raw_data_prefix = "chi_health_atlas_raw/Chicago Health Atlas Data Download - "
clean_data_prefix = "chi_health_atlas_clean/"

### Template for community areas
Using adult diabetes in community areas as an example.

In [51]:
# Read in raw data and merge
adult_diabetes_counts = pd.read_csv(path + raw_data_prefix + "Adult diabetes in Community Areas.csv")
adult_diabetes_rate = pd.read_csv(path + raw_data_prefix + "Adult diabetes rate in Community Areas.csv")
adult_diabetes = pd.merge(left=adult_diabetes_counts, right=adult_diabetes_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])

# Rename columns to be more descriptive, and to match boundary shapefiles
# For community areas, "GEOID" is the area number
adult_diabetes.rename(columns={"Name": "community", "GEOID": "area_num",\
                               "HCSDIA_2021-2022": "diabetes_count", "HCSDIA_2021-2022_moe": "diabetes_count_moe",\
                               "HCSDIAP_2021-2022":"diabetes_rate", "HCSDIAP_2021-2022_moe": "diabetes_rate_moe"},\
                      inplace=True)
adult_diabetes.drop("Layer", axis=1, inplace=True)

# Standardize community area names
adult_diabetes["community"] = adult_diabetes["community"].str.upper()
adult_diabetes.loc[adult_diabetes["community"] == "O'HARE", "community"] = "OHARE"

# Drop the first row (which is informational)
print("Dropping the following informational row")
print(adult_diabetes.loc[adult_diabetes.index == 0])
adult_diabetes.drop([0], axis=0, inplace=True)

# Save
adult_diabetes.to_csv(path + clean_data_prefix + "adult_diabetes_commareas.csv", index=False)
adult_diabetes.head()

Dropping the following informational row
  community  area_num                               diabetes_count  \
0       NaN       NaN  Adult diabetes (count of adults), 2021-2022   

      diabetes_count_moe                                 diabetes_rate  \
0  (90% margin of error)  Adult diabetes rate (% of adults), 2021-2022   

       diabetes_rate_moe  
0  (90% margin of error)  


Unnamed: 0,community,area_num,diabetes_count,diabetes_count_moe,diabetes_rate,diabetes_rate_moe
1,ROGERS PARK,1.0,2600.0,1698.131024275,6.1299524989,3.953103167034
2,NORWOOD PARK,10.0,2400.0,1424.1970668534,8.5093049364,4.9318937486385
3,JEFFERSON PARK,11.0,1900.0,1599.46145264605,8.8099700233,6.8320810010145
4,FOREST GLEN,12.0,1500.0,1103.6614933792,11.359085406,7.5745882992955
5,ALBANY PARK,14.0,6300.0,3207.188527284,14.339056125,6.8486054829955005


### Template for Zip Codes
Using Asthma emergency department visits as an example.

In [55]:
# Read in raw data and merge (so that counts and rates are together)
template_counts = pd.read_csv(path + raw_data_prefix + "Asthma ED visits in ZIP Codes.csv")
template_rate = pd.read_csv(path + raw_data_prefix + "Asthma ED visit rate in ZIP Codes.csv")
template = pd.merge(left=template_counts, right=template_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])

# Rename columns to be more descriptive, and to match boundary shapefiles
# For zip codes, "GEOID" is not needed because it is the same as the zip code
# You will need to change these names for each data set
print(behavioral_hosp.columns)
template.rename(columns={"Name": "zip_code", "HDED_2017": "template_count",\
                          "HDED_2017_moe": "template_count_moe", "HDEDR_2017": "template_rate",\
                          "HDEDR_2017_moe": "template_rate_moe"
                         }, inplace=True)
template.drop(["Layer", "GEOID"], axis=1, inplace=True)
template

# Drop the first row (which is informational)
print("Dropping the following informational row")
print(template.loc[template.index == 0])
template.drop([0], axis=0, inplace=True)

# Adjust dtypes
template = template.apply(pd.to_numeric, args=("raise", "integer"))

# Save - don't actually do this for the template
#template.to_csv(path + clean_data_prefix + "template_visits_zip.csv", index=False)
template

Dropping the following informational row
   zip_code                                         data_count  \
0       NaN  Asthma ED visits (count of hospitalizations), ...   

          data_count_moe                                          data_rate  \
0  (90% margin of error)  Asthma ED visit rate (per 10,000 population), ...   

           data_rate_moe  
0  (90% margin of error)  


Unnamed: 0,zip_code,data_count,data_count_moe,data_rate,data_rate_moe
1,60601,19,,51.8,26.185714
2,60604,18,,388.2,234.286607
3,60605,43,,29.1,9.483929
4,60606,19,,352.3,224.928571
5,60607,113,,76.0,14.016071
6,60608,483,,60.9,4.7
7,60609,565,,85.9,6.042857
8,60610,175,,84.0,11.708036
9,60611,21,,15.2,7.595536
10,60612,632,,192.8,12.799107


In [None]:
# COPY
# Read in raw data and merge (so that counts and rates are together)
template_counts = pd.read_csv(path + raw_data_prefix + "Asthma ED visits in ZIP Codes.csv")
template_rate = pd.read_csv(path + raw_data_prefix + "Asthma ED visit rate in ZIP Codes.csv")
template = pd.merge(left=template_counts, right=template_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])

# Rename columns to be more descriptive, and to match boundary shapefiles
# For zip codes, "GEOID" is not needed because it is the same as the zip code
print(behavioral_hosp.columns)
template.rename(columns={"Name": "zip_code", "HDED_2017": "template_count",\
                          "HDED_2017_moe": "template_count_moe", "HDEDR_2017": "template_rate",\
                          "HDEDR_2017_moe": "template_rate_moe"
                         }, inplace=True)
template.drop(["Layer", "GEOID"], axis=1, inplace=True)
template

# Drop the first row (which is informational)
print("Dropping the following informational row")
print(template.loc[template.index == 0])
template.drop([0], axis=0, inplace=True)

# Adjust dtypes
template = template.apply(pd.to_numeric, args=("raise", "integer"))

# Save
template.to_csv(path + clean_data_prefix + "template_visits_zip.csv", index=False)
template

### Adult Asthma in Community Areas
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HCSATH?topic=adult-asthma
- https://chicagohealthatlas.org/indicators/HCSATHP?topic=adult-asthma-rate

In [4]:
# Adult Asthma
adult_asthma_counts = pd.read_csv(path + raw_data_prefix + "Adult asthma in Community Areas.csv")
adult_asthma_rate = pd.read_csv(path + raw_data_prefix + "Adult asthma rate in Community Areas.csv")
adult_asthma = pd.merge(left=adult_asthma_counts, right=adult_asthma_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])
adult_asthma.rename(columns={"Name": "community_area"}, inplace=True)
adult_asthma.drop("Layer", axis=1, inplace=True)
adult_asthma.to_csv(path + clean_data_prefix + "adult_asthma_commareas.csv", index=False)
adult_asthma

Unnamed: 0,community_area,GEOID,HCSATH_2021-2022,HCSATH_2021-2022_moe,HCSATHP_2021-2022,HCSATHP_2021-2022_moe
0,,,"Adult asthma (count of adults), 2021-2022",(90% margin of error),"Adult asthma rate (% of adults), 2021-2022",(90% margin of error)
1,Rogers Park,1.0,4900.0,2904.498116408,11.687814795,6.439035275897
2,Norwood Park,10.0,3000.0,1625.9889338147,10.697054662,5.54126845783
3,Jefferson Park,11.0,600.0,325.2073188273,2.4396925606,1.429608704748
4,Forest Glen,12.0,1100.0,740.95782808635,8.6175984924,5.437838144522001
5,Albany Park,14.0,3500.0,2073.3267721425,8.0608019141,4.6966871541695
6,Portage Park,15.0,4100.0,3281.585009738,7.4030969465,5.6888906142215
7,Irving Park,16.0,2800.0,2284.7833094800003,7.8393584543,6.068874899842999
8,Dunning,17.0,4500.0,3128.2313400694998,13.407961162,8.651347995556
9,Montclare,18.0,1400.0,786.5122182379,8.2530643898,4.989078619616


### Adult Diabetes in Community Areas
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HCSDIA?topic=adult-diabetes
- https://chicagohealthatlas.org/indicators/HCSDIAP?topic=adult-diabetes-rate

In [19]:
# Read in raw data and merge
adult_diabetes_counts = pd.read_csv(path + raw_data_prefix + "Adult diabetes in Community Areas.csv")
adult_diabetes_rate = pd.read_csv(path + raw_data_prefix + "Adult diabetes rate in Community Areas.csv")
adult_diabetes = pd.merge(left=adult_diabetes_counts, right=adult_diabetes_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])

# Rename columns (to match boundary shape files)
adult_diabetes.rename(columns={"Name": "community", "GEOID": "area_num",\
                               "HCSDIA_2021-2022": "diabetes_count", "HCSDIA_2021-2022_moe": "diabetes_count_moe",\
                               "HCSDIAP_2021-2022":"diabetes_rate", "HCSDIAP_2021-2022_moe": "diabetes_rate_moe"},\
                      inplace=True)
adult_diabetes.drop("Layer", axis=1, inplace=True)

# Standardize community area names
adult_diabetes["community"] = adult_diabetes["community"].str.upper()
adult_diabetes.loc[adult_diabetes["community"] == "O'HARE", "community"] = "OHARE"

# Drop the first row (which is informational)
print("Dropping the following informational row")
print(adult_diabetes.loc[adult_diabetes.index == 0])
adult_diabetes.drop([0], axis=0, inplace=True)

# Save
adult_diabetes.to_csv(path + clean_data_prefix + "adult_diabetes_commareas.csv", index=False)

Dropping the following informational row
  community  area_num                               diabetes_count  \
0       NaN       NaN  Adult diabetes (count of adults), 2021-2022   

      diabetes_count_moe                                 diabetes_rate  \
0  (90% margin of error)  Adult diabetes rate (% of adults), 2021-2022   

       diabetes_rate_moe  
0  (90% margin of error)  


Unnamed: 0,community,area_num,diabetes_count,diabetes_count_moe,diabetes_rate,diabetes_rate_moe
1,ROGERS PARK,1.0,2600.0,1698.131024275,6.1299524989,3.953103167034
2,NORWOOD PARK,10.0,2400.0,1424.1970668534,8.5093049364,4.9318937486385
3,JEFFERSON PARK,11.0,1900.0,1599.46145264605,8.8099700233,6.8320810010145
4,FOREST GLEN,12.0,1500.0,1103.6614933792,11.359085406,7.5745882992955
5,ALBANY PARK,14.0,6300.0,3207.188527284,14.339056125,6.8486054829955005
...,...,...,...,...,...,...
67,MORGAN PARK,75.0,3000.0,1477.3660592089,19.706104702,9.3659723143595
68,OHARE,76.0,1200.0,993.5532481217001,11.506201864,9.308925018022
69,EDGEWATER,77.0,8900.0,3844.6151648555,16.262917876,6.2420459494889995
70,NEAR NORTH SIDE,8.0,2800.0,1892.3809433689999,3.6433123235,2.4073854797615


### Adult Loneliness in Community Areas
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/CHAJVUB?topic=adult-loneliness
- https://chicagohealthatlas.org/indicators/CHAVQOS?topic=adult-loneliness-rate

In [57]:
# Loneliness
adult_loneliness_counts = pd.read_csv(path + raw_data_prefix + "Adult loneliness in Community Areas.csv")
adult_loneliness_rate = pd.read_csv(path + raw_data_prefix + "Adult loneliness rate in Community Areas.csv")
adult_loneliness = pd.merge(left=adult_loneliness_counts, right=adult_loneliness_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])
adult_loneliness.rename(columns={"Name": "community_area"}, inplace=True)
adult_loneliness.drop("Layer", axis=1, inplace=True)
adult_loneliness.to_csv(path + clean_data_prefix + "adult_loneliness_commareas.csv", index=False)
#adult_loneliness

### Asthma Emergency Department Visits in Zip Codes
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HDED?topic=asthma-ed-visits
- https://chicagohealthatlas.org/indicators/HDEDR?topic=asthma-ed-visit-rate

In [53]:
# Read in raw data and merge (so that counts and rates are together)
asthma_ed_counts = pd.read_csv(path + raw_data_prefix + "Asthma ED visits in ZIP Codes.csv")
asthma_ed_rate = pd.read_csv(path + raw_data_prefix + "Asthma ED visit rate in ZIP Codes.csv")
asthma_ed = pd.merge(left=asthma_ed_counts, right=asthma_ed_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])

# Rename columns to be more descriptive, and to match boundary shapefiles
# For zip codes, "GEOID" is not needed because it is the same as the zip code
asthma_ed.rename(columns={"Name": "zip_code", "HDED_2017": "asthma_ed_count",\
                          "HDED_2017_moe": "asthma_ed_count_moe", "HDEDR_2017": "asthma_ed_rate",\
                          "HDEDR_2017_moe": "asthma_ed_rate_moe"
                         }, inplace=True)
asthma_ed.drop(["Layer", "GEOID"], axis=1, inplace=True)
asthma_ed

# Drop the first row (which is informational)
print("Dropping the following informational row")
print(asthma_ed.loc[asthma_ed.index == 0])
asthma_ed.drop([0], axis=0, inplace=True)

# Adjust dtypes
asthma_ed = asthma_ed.apply(pd.to_numeric, args=("raise", "integer"))

# Save
asthma_ed.to_csv(path + clean_data_prefix + "asthma_ed_visits_zip.csv", index=False)
#asthma_ed

Dropping the following informational row
   zip_code                                    asthma_ed_count  \
0       NaN  Asthma ED visits (count of hospitalizations), ...   

     asthma_ed_count_moe                                     asthma_ed_rate  \
0  (90% margin of error)  Asthma ED visit rate (per 10,000 population), ...   

      asthma_ed_rate_moe  
0  (90% margin of error)  


### Behavioral Health hosp in Zip Codes
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HDBH?topic=behavioral-health-hosp
- https://chicagohealthatlas.org/indicators/HDBHR?topic=behavioral-health-hospitalization-rate

In [67]:
# Read in raw data and merge (so that counts and rates are together)
behavioral_hosp_counts = pd.read_csv(path + raw_data_prefix + "Behavioral health hospitalizations in ZIP Codes.csv")
behavioral_hosp_rate = pd.read_csv(path + raw_data_prefix + "Behavioral health hospitalization rate in ZIP Codes.csv")
behavioral_hosp = pd.merge(left=behavioral_hosp_counts, right=behavioral_hosp_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])

# Rename columns to be more descriptive, and to match boundary shapefiles
# For zip codes, "GEOID" is not needed because it is the same as the zip code
print(behavioral_hosp.columns)
behavioral_hosp.rename(columns={"Name": "zip_code", "HDED_2017": "behavioral_hosp_count",\
                          "HDED_2017_moe": "behavioral_hosp_count_moe", "HDEDR_2017": "behavioral_hosp_rate",\
                          "HDEDR_2017_moe": "behavioral_hosp_rate_moe"
                         }, inplace=True)
behavioral_hosp.drop(["Layer", "GEOID"], axis=1, inplace=True)
behavioral_hosp

# Drop the first row (which is informational)
print("Dropping the following informational row")
print(behavioral_hosp.loc[behavioral_hosp.index == 0])
behavioral_hosp.drop([0], axis=0, inplace=True)

# Adjust dtypes
behavioral_hosp = behavioral_hosp.apply(pd.to_numeric, args=("raise", "integer"))

# Save
behavioral_hosp.to_csv(path + clean_data_prefix + "behavioral_hosp_visits_zip.csv", index=False)
#behavioral_hosp

Index(['Layer', 'Name', 'GEOID', 'HDBH_2017', 'HDBH_2017_moe', 'HDBHR_2017',
       'HDBHR_2017_moe'],
      dtype='object')
Dropping the following informational row
   zip_code                                          HDBH_2017  \
0       NaN  Behavioral health hospitalizations (count of h...   

           HDBH_2017_moe                                         HDBHR_2017  \
0  (90% margin of error)  Behavioral health hospitalization rate (per 10...   

          HDBHR_2017_moe  
0  (90% margin of error)  


### Behavioral Health Treatment in Community Areas
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HCSBH?topic=behavioral-health-treatment
- https://chicagohealthatlas.org/indicators/HCSBHP?topic=behavioral-health-treatment-rate

In [60]:
# Behavioral health treatment
behavioral_treatment_counts = pd.read_csv(path + raw_data_prefix + "Behavioral health treatment in Community Areas.csv")
behavioral_treatment_rate = pd.read_csv(path + raw_data_prefix + "Behavioral health treatment rate in Community Areas.csv")
print(len(behavioral_treatment_counts), len(behavioral_treatment_rate))
print(behavioral_treatment_counts.columns)
behavioral_treatment = pd.merge(left=behavioral_treatment_counts, right=behavioral_treatment_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])
behavioral_treatment.rename(columns={"Name": "zip_code"}, inplace=True)
behavioral_treatment.drop("Layer", axis=1, inplace=True)
behavioral_treatment.to_csv(path + clean_data_prefix + "behavioral_treatment_commareas.csv", index=False)
#behavioral_treatment
print(behavioral_treatment.columns)

30 30
Index(['Layer', 'Name', 'GEOID', 'HCSBH_2021-2022', 'HCSBH_2021-2022_moe'], dtype='object')
Index(['zip_code', 'GEOID', 'HCSBH_2021-2022', 'HCSBH_2021-2022_moe',
       'HCSBHP_2021-2022', 'HCSBHP_2021-2022_moe'],
      dtype='object')


### Diabetes-related hosp in Zip Codes
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HDDIA?topic=diabetes-related-hosp
- https://chicagohealthatlas.org/indicators/HDDIAR?topic=diabetes-related-hospitalization-rate

In [66]:
# Read in raw data and merge (so that counts and rates are together)
diabetes_hosp_counts = pd.read_csv(path + raw_data_prefix + "Diabetes-related hospitalizations in ZIP Codes.csv")
diabetes_hosp_rate = pd.read_csv(path + raw_data_prefix + "Diabetes-related hospitalization rate in ZIP Codes.csv")
diabetes_hosp = pd.merge(left=diabetes_hosp_counts, right=diabetes_hosp_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])

# Rename columns to be more descriptive, and to match boundary shapefiles
# For zip codes, "GEOID" is not needed because it is the same as the zip code
print(diabetes_hosp.columns)
diabetes_hosp.rename(columns={"Name": "zip_code", "HDDIA_2017": "diabetes_hosp_count",\
                          "HDDIA_2017_moe": "diabetes_hosp_count_moe", "HDDIAR_2017": "diabetes_hosp_rate",\
                          "HDDIAR_2017_moe": "diabetes_hosp_rate_moe"
                         }, inplace=True)
diabetes_hosp.drop(["Layer", "GEOID"], axis=1, inplace=True)
diabetes_hosp

# Drop the first row (which is informational)
print("Dropping the following informational row")
print(diabetes_hosp.loc[diabetes_hosp.index == 0])
diabetes_hosp.drop([0], axis=0, inplace=True)

# Adjust dtypes
diabetes_hosp = diabetes_hosp.apply(pd.to_numeric, args=("raise", "integer"))

# Save
diabetes_hosp.to_csv(path + clean_data_prefix + "diabetes_hospital_visits_zip.csv", index=False)
#diabetes_hosp

Index(['Layer', 'Name', 'GEOID', 'HDDIA_2017', 'HDDIA_2017_moe', 'HDDIAR_2017',
       'HDDIAR_2017_moe'],
      dtype='object')
Dropping the following informational row
   zip_code                                diabetes_hosp_count  \
0       NaN  Diabetes-related hospitalizations (count of ho...   

  diabetes_hosp_count_moe                                 diabetes_hosp_rate  \
0   (90% margin of error)  Diabetes-related hospitalization rate (per 10,...   

  diabetes_hosp_rate_moe  
0  (90% margin of error)  


### Hypertension in Community Areas
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HCSHYT?topic=hypertension
- https://chicagohealthatlas.org/indicators/HCSHYTP?topic=hypertension-rate

In [44]:
# Hypertension
hypertension_counts = pd.read_csv(path + raw_data_prefix + "Hypertension in Community Areas.csv")
hypertension_rate = pd.read_csv(path + raw_data_prefix + "Hypertension rate in Community Areas.csv")
hypertension = pd.merge(left=hypertension_counts, right=hypertension_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])
hypertension.rename(columns={"Name": "community_area"}, inplace=True)
hypertension.drop("Layer", axis=1, inplace=True)
hypertension.to_csv(path + clean_data_prefix + "hypertension_commareas.csv", index=False)
#hypertension

### Mood and Depressive Disorder Related hosp
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HDMD?topic=mood-and-depressive-disorder-hosp
- https://chicagohealthatlas.org/indicators/HDMDR?topic=mood-and-depressive-disorder-hospitalization-rate

In [62]:
# Mood and depressive disorder hosp
mood_hosp_counts = pd.read_csv(path + raw_data_prefix + "Mood and depressive disorder hosp in ZIP Codes.csv")
mood_hosp_rate = pd.read_csv(path + raw_data_prefix + "Mood and depressive disorder hospitalization rate in ZIP Codes.csv")
print(len(mood_hosp_counts), len(mood_hosp_rate))
print(mood_hosp_counts.columns)
mood_hosp = pd.merge(left=mood_hosp_counts, right=mood_hosp_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])
mood_hosp.rename(columns={"Name": "zip_code"}, inplace=True)
mood_hosp.drop("Layer", axis=1, inplace=True)
mood_hosp.to_csv(path + clean_data_prefix + "mood_hosp_zip.csv", index=False)
#mood_hosp

57 57
Index(['Layer', 'Name', 'GEOID', 'HDMD_2017', 'HDMD_2017_moe'], dtype='object')


### Schizophrenic Disorder hosp
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HDSZ?topic=schizophrenic-disorder-hosp
- https://chicagohealthatlas.org/indicators/HDSZR?topic=schizophrenic-disorder-hospitalization-rate

In [63]:
# Schizophrenic disorder hosp
scizophrenic_hosp_counts = pd.read_csv(path + raw_data_prefix + "Schizophrenic disorder hosp in ZIP Codes.csv")
scizophrenic_hosp_rate = pd.read_csv(path + raw_data_prefix + "Schizophrenic disorder hospitalization rate in ZIP Codes.csv")
print(len(scizophrenic_hosp_counts), len(scizophrenic_hosp_rate))
print(scizophrenic_hosp_counts.columns)
scizophrenic_hosp = pd.merge(left=scizophrenic_hosp_counts, right=scizophrenic_hosp_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])
scizophrenic_hosp.rename(columns={"Name": "zip_code"}, inplace=True)
scizophrenic_hosp.drop("Layer", axis=1, inplace=True)
scizophrenic_hosp.to_csv(path + clean_data_prefix + "scizophrenic_hosp_zip.csv", index=False)
#scizophrenic_hosp

55 55
Index(['Layer', 'Name', 'GEOID', 'HDSZ_2017', 'HDSZ_2017_moe'], dtype='object')


### Serious Psychological Distress
Source info available at the following links (click "Sources")
- https://chicagohealthatlas.org/indicators/HCSSPD?topic=serious-psychological-distress
- https://chicagohealthatlas.org/indicators/HCSSPDP?topic=serious-psychological-distress-rate

In [64]:
# Serious psychological distress
psych_distress_counts = pd.read_csv(path + raw_data_prefix + "Serious psychological distress in Community Areas.csv")
psych_distress_rate = pd.read_csv(path + raw_data_prefix + "Serious psychological distress rate in Community Areas.csv")
psych_distress = pd.merge(left=psych_distress_counts, right=psych_distress_rate, how="outer", left_on=["Layer", "GEOID", "Name"], right_on=["Layer", "GEOID", "Name"])
psych_distress.rename(columns={"Name": "community_area"}, inplace=True)
psych_distress.drop("Layer", axis=1, inplace=True)
psych_distress.to_csv(path + clean_data_prefix + "psych_distress_commareas.csv", index=False)
#psych_distress