# IS 477 Midterm Notebook
### Project: Mapping and Modeling Drug-Related Deaths in Cook County

This notebook demonstrates interim progress on data acquisition, cleaning, integration, and exploratory analysis. 
It documents the steps taken so far to prepare datasets for spatial and demographic analysis, as outlined in our Project Plan.


In [None]:
# imports

import pandas as pd
import requests
import hashlib
import os

In [28]:
# scripts/acquire_data.py

df_me = pd.read_csv("data/raw/Medical_Examiner_Case_Archive_20251104.csv", low_memory=False)

In [41]:
os.makedirs("data/raw", exist_ok=True)

# ACS 5-Year 2023 (data collected 2019–2023)
url = "https://api.census.gov/data/2023/acs/acs5"

variables = [
    # Population + race
    "B01003_001E",  # total population
    "B03002_003E",  # white (non-Hispanic)
    "B03002_004E",  # black
    "B03002_006E",  # asian
    "B03002_012E",  # hispanic

    # Income & poverty
    "B19013_001E",  # median income
    "B17001_002E",  # below poverty

    # Education
    "B15003_022E",  # bachelor's degree or higher

    # Age structure (male + female counts)
    "B01001_003E", "B01001_027E",  # under 18
    "B01001_007E", "B01001_031E",  # 18–34
    "B01001_010E", "B01001_034E",  # 35–64
    "B01001_020E", "B01001_044E",  # 65+

    # New: Unemployment, Health Insurance, Rent
    "B23025_002E",  # labor force
    "B23025_005E",  # unemployed
    "B27010_017E",  # no health insurance (all people)
    "B25064_001E"   # median gross rent
]

params = {
    "get": ",".join(["NAME"] + variables),
    "for": "tract:*",
    "in": "county:031 state:17"  # Cook County, IL
}

r = requests.get(url, params=params)
r.raise_for_status()

data = r.json()
cols = data[0]
rows = data[1:]
df_census = pd.DataFrame(rows, columns=cols)

# Rename columns for clarity
df_census = df_census.rename(columns={
    "B01003_001E": "TotalPop",
    "B03002_003E": "White_NonHisp",
    "B03002_004E": "Black",
    "B03002_006E": "Asian",
    "B03002_012E": "Hispanic",
    "B19013_001E": "MedianIncome",
    "B17001_002E": "BelowPoverty",
    "B15003_022E": "BachelorsOrHigher",
    "B01001_003E": "Male_Under18",
    "B01001_027E": "Female_Under18",
    "B01001_007E": "Male_18_34",
    "B01001_031E": "Female_18_34",
    "B01001_010E": "Male_35_64",
    "B01001_034E": "Female_35_64",
    "B01001_020E": "Male_65plus",
    "B01001_044E": "Female_65plus",
    "B23025_002E": "LaborForce",
    "B23025_005E": "Unemployed",
    "B27010_017E": "NoHealthInsurance",
    "B25064_001E": "MedianRent",
    "state": "STATE",
    "county": "COUNTY",
    "tract": "TRACT"
})

# # Convert numeric columns
# num_cols = [c for c in df_census.columns if c not in ["NAME", "STATE", "COUNTY", "TRACT"]]
# df_census[num_cols] = df_census[num_cols].apply(pd.to_numeric, errors="coerce")

# # Derived totals & percentages
# df_census["Age_Under18"] = df_census["Male_Under18"] + df_census["Female_Under18"]
# df_census["Age_18_34"] = df_census["Male_18_34"] + df_census["Female_18_34"]
# df_census["Age_35_64"] = df_census["Male_35_64"] + df_census["Female_35_64"]
# df_census["Age_65plus"] = df_census["Male_65plus"] + df_census["Female_65plus"]

# df_census["Pct_Black"] = df_census["Black"] / df_census["TotalPop"] * 100
# df_census["Pct_Hispanic"] = df_census["Hispanic"] / df_census["TotalPop"] * 100
# df_census["Pct_Asian"] = df_census["Asian"] / df_census["TotalPop"] * 100
# df_census["Pct_WhiteNonHisp"] = df_census["White_NonHisp"] / df_census["TotalPop"] * 100
# df_census["Pct_BelowPoverty"] = df_census["BelowPoverty"] / df_census["TotalPop"] * 100
# df_census["Pct_BachelorsPlus"] = df_census["BachelorsOrHigher"] / df_census["TotalPop"] * 100

# # Unemployment and uninsured rates
# df_census["Pct_Unemployed"] = (df_census["Unemployed"] / df_census["LaborForce"]) * 100
# df_census["Pct_Uninsured"] = (df_census["NoHealthInsurance"] / df_census["TotalPop"]) * 100

# Unique tract ID for merging
df_census["TRACT_FIPS"] = df_census["STATE"] + df_census["COUNTY"] + df_census["TRACT"].str.zfill(6)

df_census.to_csv("data/raw/census_tract_data.csv", index=False)
print(f"Saved enriched Census data for {len(df_census)} tracts.")


Saved enriched Census data for 1332 tracts.


In [38]:
df_census

Unnamed: 0,NAME,TotalPop,White_NonHisp,Black,Asian,Hispanic,MedianIncome,BelowPoverty,BachelorsOrHigher,Male_Under18,...,Male_18_34,Female_18_34,Male_35_64,Female_35_64,Male_65plus,Female_65plus,STATE,COUNTY,TRACT,TRACT_FIPS
0,Census Tract 101; Cook County; Illinois,3726,1297,1376,137,809,69460,508,775,88,...,52,141,128,46,108,56,17,031,010100,17031010100
1,Census Tract 102.01; Cook County; Illinois,7588,1406,2301,376,2622,49639,1892,684,455,...,43,62,161,317,143,119,17,031,010201,17031010201
2,Census Tract 102.02; Cook County; Illinois,2609,967,949,212,423,55119,524,656,9,...,48,0,43,90,24,31,17,031,010202,17031010202
3,Census Tract 103; Cook County; Illinois,6311,3094,1298,342,1426,65871,431,1585,148,...,33,35,53,179,26,84,17,031,010300,17031010300
4,Census Tract 104; Cook County; Illinois,4282,3173,296,324,340,49017,463,999,25,...,358,688,91,100,83,18,17,031,010400,17031010400
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1327,Census Tract 8446; Cook County; Illinois,2274,114,1700,6,315,56848,434,314,46,...,52,0,0,158,6,23,17,031,844600,17031844600
1328,Census Tract 8447; Cook County; Illinois,1906,33,1292,28,527,69583,536,278,47,...,6,7,113,24,0,18,17,031,844700,17031844700
1329,Census Tract 9800; Cook County; Illinois,0,0,0,0,0,-666666666,0,0,0,...,0,0,0,0,0,0,17,031,980000,17031980000
1330,Census Tract 9801; Cook County; Illinois,0,0,0,0,0,-666666666,0,0,0,...,0,0,0,0,0,0,17,031,980100,17031980100


In [31]:
df_me.head()

Unnamed: 0,Case Number,Date of Incident,Date of Death,Age,Gender,Race,Latino,Manner of Death,Primary Cause,Primary Cause Line A,...,Incident Zip Code,longitude,latitude,location,Residence City,Residence Zip,OBJECTID,Chicago Ward,Chicago Community Area,COVID Related
0,ME2025-05835,11/04/2025 02:09:00 PM,11/04/2025 01:57:00 PM,57.0,Male,Black,False,,,,...,60621,-87.647293,41.779635,"(41.77963481, -87.64729284)",Chicago,60621,92724,16.0,ENGLEWOOD,False
1,ME2025-05834,09/24/2025 01:48:00 PM,11/04/2025 12:20:00 PM,55.0,Female,White,True,,,,...,60609,-87.662232,41.806638,"(41.806638, -87.662232)",Chicago,60609,92725,20.0,NEW CITY,False
2,ME2025-05833,11/04/2025 12:00:00 AM,11/04/2025 12:40:00 PM,69.0,Female,White,True,,,,...,60622,-87.696022,41.906421,"(41.906421, -87.6960225)",Chicago,60612,92720,26.0,WEST TOWN,False
3,ME2025-05832,11/04/2025 01:12:00 PM,11/04/2025 12:40:00 PM,75.0,Male,White,False,,,,...,60622,-87.696022,41.906421,"(41.906421, -87.6960225)",Chicago,60622,92719,26.0,WEST TOWN,False
4,ME2025-05831,11/04/2025 01:41:00 PM,11/04/2025 09:48:00 AM,30.0,Male,Black,False,,,,...,60429,-87.659707,41.58108,"(41.58108, -87.6597075)",Hazel Crest,60429,92722,,,False


### Exploratory Data Analysis 

In [33]:
len(df_me)

92642

In [34]:
df_me.dtypes

Case Number                object
Date of Incident           object
Date of Death              object
Age                       float64
Gender                     object
Race                       object
Latino                       bool
Manner of Death            object
Primary Cause              object
Primary Cause Line A       object
Primary Cause Line B       object
Primary Cause Line C       object
Secondary Cause            object
Gun Related                object
Opioid Related             object
Cold Related                 bool
Heat Related                 bool
Commissioner District     float64
Incident Address           object
Incident City              object
Incident Zip Code          object
longitude                 float64
latitude                  float64
location                   object
Residence City             object
Residence Zip              object
OBJECTID                    int64
Chicago Ward              float64
Chicago Community Area     object
COVID Related 

In [32]:
df_me.columns

Index(['Case Number', 'Date of Incident', 'Date of Death', 'Age', 'Gender',
       'Race', 'Latino', 'Manner of Death', 'Primary Cause',
       'Primary Cause Line A', 'Primary Cause Line B', 'Primary Cause Line C',
       'Secondary Cause', 'Gun Related', 'Opioid Related', 'Cold Related',
       'Heat Related', 'Commissioner District', 'Incident Address',
       'Incident City', 'Incident Zip Code', 'longitude', 'latitude',
       'location', 'Residence City', 'Residence Zip', 'OBJECTID',
       'Chicago Ward', 'Chicago Community Area', 'COVID Related'],
      dtype='object')

In [25]:
# AFTER CLEANING, save ME data to this!!!


# df_me = pd.read_csv('data/raw/cook_county_medical_examiner.csv')
# df_census = pd.read_csv('data/raw/census_tract_data.csv')

# # Save processed copy and compute checksum
# df_me.to_csv('data/processed/me_clean.csv', index=False)
# with open('data/processed/me_clean.csv', 'rb') as f:
#     checksum = hashlib.sha256(f.read()).hexdigest()
# print("Checksum:", checksum)