In [1]:
# --- Imports ---
import pandas as pd

# --- Load Arrest Rate Data ---
# (Again, recreate if needed or just reload CleanedCrime.csv)
crime_df = pd.read_csv('../data/CleanedCrime.csv')

# Group by Community Area
arrest_by_area = crime_df.groupby('Community Area').agg(
    total_crimes=('Date', 'count'),
    total_arrests=('Arrest', 'sum')
).reset_index()

# Calculate Arrest Rate
arrest_by_area['arrest_rate'] = arrest_by_area['total_arrests'] / arrest_by_area['total_crimes']

# --- Load Economic Data (Public Health Statistics) ---
income_df = pd.read_csv('../data/Public_Health_Statistics_-_Selected_public_health_indicators_by_Chicago_community_area_-_Historical.csv')

# Quick Peek
print("Income Data Columns:", income_df.columns.tolist())
print(income_df.head())

# --- Merge: Arrest Data + Per Capita Income ---
merged_df = arrest_by_area.merge(
    income_df[['Community Area', 'Per Capita Income']], 
    on='Community Area', 
    how='left'
)

# --- Preview Merged Data ---
print("\nMerged Data Preview:")
print(merged_df.head())

# --- Sanity Check: Any missing incomes?
missing_income = merged_df['Per Capita Income'].isnull().sum()
print(f"\nMissing Per Capita Income values: {missing_income}")

# (Optional) Filter out rows with missing income if needed
# merged_df = merged_df.dropna(subset=['Per Capita Income'])


Income Data Columns: ['Community Area', 'Community Area Name', 'Birth Rate', 'General Fertility Rate', 'Low Birth Weight', 'Prenatal Care Beginning in First Trimester', 'Preterm Births', 'Teen Birth Rate', 'Assault (Homicide)', 'Breast cancer in females', 'Cancer (All Sites)', 'Colorectal Cancer', 'Diabetes-related', 'Firearm-related', 'Infant Mortality Rate', 'Lung Cancer', 'Prostate Cancer in Males', 'Stroke (Cerebrovascular Disease)', 'Childhood Blood Lead Level Screening', 'Childhood Lead Poisoning', 'Gonorrhea in Females', 'Gonorrhea in Males', 'Tuberculosis', 'Below Poverty Level', 'Crowded Housing', 'Dependency', 'No High School Diploma', 'Per Capita Income', 'Unemployment']
   Community Area Community Area Name  Birth Rate  General Fertility Rate  \
0               1         Rogers Park        16.4                    62.0   
1               2          West Ridge        17.3                    83.3   
2               3              Uptown        13.1                    50.5   
3

  merged_df = arrest_by_area.merge(


In [2]:
# --- Make sure Community Area is INT before merging ---
arrest_by_area['Community Area'] = arrest_by_area['Community Area'].astype(int)
income_df['Community Area'] = income_df['Community Area'].astype(int)

# --- Now safe to merge ---
merged_df = arrest_by_area.merge(
    income_df[['Community Area', 'Per Capita Income']], 
    on='Community Area', 
    how='left'
)
