# Question 3: What barriers exist in their current living situations?

This notebook analyzes all barriers facing elderly residents in Allston-Brighton:

1. **Financial Barriers**: Income levels, property values
2. **Building Condition Barriers**: Interior/exterior condition, property grade, building age
3. **Property Violations**: Open violations, violation types
4. **Accessibility Barriers**: Store and park access limitations
5. **Combined Barriers**: Multiple barriers per resident

## Data Overview
- Total elderly: 7,396
- Mapped to buildings: 5,390
- Mapped with condition data: ~5,391


In [None]:
import sys
import os

current_dir = os.getcwd()
if os.path.basename(current_dir) == 'notebooks':
    project_dir = os.path.dirname(current_dir)
else:
    parts = current_dir.split(os.sep)
    if 'fa25-team-a' in parts:
        idx = parts.index('fa25-team-a')
        project_dir = os.sep.join(parts[:idx+1])
    else:
        project_dir = os.path.dirname(os.path.dirname(current_dir))
web_app_path = os.path.join(project_dir, 'web_app')
sys.path.append(web_app_path)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from config.database import get_db_connection, execute_query

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)


## 1. Overall Elderly Population Statistics


In [3]:
query = """
SELECT 
    COUNT(DISTINCT v.res_id) as total_elderly,
    COUNT(DISTINCT CASE WHEN vbm.res_id IS NOT NULL THEN v.res_id END) as mapped_to_buildings,
    AVG(v.age) as avg_age,
    PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY v.age) as median_age
FROM voters v
LEFT JOIN voters_buildings_map vbm ON v.res_id = vbm.res_id
WHERE v.is_elderly = true
"""

df_overall = pd.DataFrame(execute_query(query, fetch_all=True))
print("Overall Elderly Population:")
print(df_overall.to_string(index=False))
print(f"\nNote: This counts DISTINCT elderly residents to avoid duplicates.")


Overall Elderly Population:
 total_elderly  mapped_to_buildings             avg_age  median_age
          7396                 5391 75.1146804835924007        75.0

Note: This counts DISTINCT elderly residents to avoid duplicates.


## 2. Financial Barriers Analysis


In [4]:
query = """
SELECT DISTINCT ON (v.res_id)
    v.res_id,
    v.age,
    v.ward_id,
    v.precinct_id,
    COALESCE(ct.median_income, 0) as median_income,
    CASE 
        WHEN COALESCE(ct.median_income, 0) < 50000 THEN 'Low Income (<$50k)'
        WHEN COALESCE(ct.median_income, 0) < 75000 THEN 'Moderate Income ($50k-$75k)'
        ELSE 'Higher Income ($75k+)'
    END as income_category,
    CASE 
        WHEN COALESCE(ct.median_income, 0) < 50000 THEN true
        WHEN COALESCE(ct.median_income, 0) < 75000 THEN true
        ELSE false
    END as has_financial_barrier,
    ehc.fy2025_total_assessed_value_numeric as property_value
FROM voters v
INNER JOIN voters_buildings_map vbm ON v.res_id = vbm.res_id
LEFT JOIN census_tracts ct ON ST_Within(
    ST_SetSRID(ST_MakePoint(v.longitude, v.latitude), 4326),
    ct.geometry
)
LEFT JOIN elderly_housing_conditions ehc ON v.res_id = ehc.res_id
WHERE v.is_elderly = true
    AND v.latitude IS NOT NULL
    AND v.longitude IS NOT NULL
ORDER BY v.res_id, ct.median_income
"""

df_financial = pd.DataFrame(execute_query(query, fetch_all=True))

print("Financial Barriers Analysis:")
print(f"Total elderly analyzed: {len(df_financial):,}")
print(f"\nIncome Category Distribution:")
print(df_financial['income_category'].value_counts())
print(f"\nElderly with Financial Barriers (Low/Moderate Income): {df_financial['has_financial_barrier'].sum():,} ({df_financial['has_financial_barrier'].sum()/len(df_financial)*100:.1f}%)")
print(f"\nMedian Income Statistics:")
print(df_financial['median_income'].describe())
if df_financial['property_value'].notna().any():
    print(f"\nProperty Value Statistics (where available):")
    print(df_financial['property_value'].describe())


Financial Barriers Analysis:
Total elderly analyzed: 5,388

Income Category Distribution:
income_category
Higher Income ($75k+)          3997
Low Income (<$50k)              916
Moderate Income ($50k-$75k)     475
Name: count, dtype: int64

Elderly with Financial Barriers (Low/Moderate Income): 1,391 (25.8%)

Median Income Statistics:
count         5388
unique          22
top       80556.00
freq           965
Name: median_income, dtype: object

Property Value Statistics (where available):
count     4328
unique    1690
top       0.00
freq       640
Name: property_value, dtype: object


## 3. Building Condition Barriers Analysis


In [5]:
query = """
SELECT DISTINCT ON (v.res_id)
    v.res_id,
    v.age,
    v.ward_id,
    v.precinct_id,
    ehc.interior_condition,
    ehc.exterior_condition,
    ehc.grade,
    ehc.year_built,
    CASE 
        WHEN ehc.year_built IS NOT NULL THEN 2025 - ehc.year_built
        ELSE NULL
    END as building_age,
    CASE 
        WHEN ehc.interior_condition IN ('Poor', 'Fair') OR 
             ehc.exterior_condition IN ('Poor', 'Fair') OR
             ehc.grade IN ('Poor', 'Fair') THEN true
        ELSE false
    END as has_condition_barrier,
    CASE 
        WHEN ehc.interior_condition IN ('Poor', 'Fair') THEN true
        ELSE false
    END as has_interior_barrier,
    CASE 
        WHEN ehc.exterior_condition IN ('Poor', 'Fair') THEN true
        ELSE false
    END as has_exterior_barrier,
    CASE 
        WHEN ehc.grade IN ('Poor', 'Fair') THEN true
        ELSE false
    END as has_grade_barrier
FROM voters v
INNER JOIN voters_buildings_map vbm ON v.res_id = vbm.res_id
LEFT JOIN elderly_housing_conditions ehc ON v.res_id = ehc.res_id
WHERE v.is_elderly = true
ORDER BY v.res_id
"""

df_conditions = pd.DataFrame(execute_query(query, fetch_all=True))

print("Building Condition Barriers Analysis:")
print(f"Total elderly analyzed: {len(df_conditions):,}")
print(f"Elderly with condition data: {df_conditions['interior_condition'].notna().sum():,}")
print(f"\nInterior Condition Distribution:")
print(df_conditions['interior_condition'].value_counts(dropna=False))
print(f"\nExterior Condition Distribution:")
print(df_conditions['exterior_condition'].value_counts(dropna=False))
print(f"\nProperty Grade Distribution:")
print(df_conditions['grade'].value_counts(dropna=False))
print(f"\nElderly with Condition Barriers: {df_conditions['has_condition_barrier'].sum():,} ({df_conditions['has_condition_barrier'].sum()/len(df_conditions)*100:.1f}%)")
print(f"  - Interior barriers: {df_conditions['has_interior_barrier'].sum():,}")
print(f"  - Exterior barriers: {df_conditions['has_exterior_barrier'].sum():,}")
print(f"  - Grade barriers: {df_conditions['has_grade_barrier'].sum():,}")
if df_conditions['building_age'].notna().any():
    print(f"\nBuilding Age Statistics:")
    print(df_conditions['building_age'].describe())


Building Condition Barriers Analysis:
Total elderly analyzed: 5,391
Elderly with condition data: 4,331

Interior Condition Distribution:
interior_condition
Unknown      1969
Average      1880
None         1060
Good          398
Fair           74
Excellent       6
Poor            4
Name: count, dtype: int64

Exterior Condition Distribution:
exterior_condition
Average      2094
Unknown      1969
None         1060
Good          196
Fair           69
Excellent       3
Name: count, dtype: int64

Property Grade Distribution:
grade
Average    2062
Unknown    1969
None       1060
Good        297
Fair          3
Name: count, dtype: int64

Elderly with Condition Barriers: 131 (2.4%)
  - Interior barriers: 78
  - Exterior barriers: 69
  - Grade barriers: 3

Building Age Statistics:
count       4196
unique       124
top       105.00
freq         692
Name: building_age, dtype: object


## 4. Property Violations Barriers Analysis


In [6]:
query = """
SELECT 
    v.res_id,
    v.age,
    v.ward_id,
    v.precinct_id,
    COALESCE(evs.open_violations, 0) as open_violations,
    COALESCE(evs.total_violations, 0) as total_violations,
    COALESCE(evs.closed_violations, 0) as closed_violations,
    CASE 
        WHEN COALESCE(evs.open_violations, 0) > 0 THEN true
        ELSE false
    END as has_violation_barrier,
    CASE 
        WHEN COALESCE(evs.open_violations, 0) = 0 THEN 'No Violations'
        WHEN COALESCE(evs.open_violations, 0) = 1 THEN '1 Open Violation'
        WHEN COALESCE(evs.open_violations, 0) <= 3 THEN '2-3 Open Violations'
        ELSE '4+ Open Violations'
    END as violation_severity
FROM voters v
LEFT JOIN elderly_violations_one_to_one_summary evs ON v.res_id = evs.res_id
WHERE v.is_elderly = true
"""

df_violations = pd.DataFrame(execute_query(query, fetch_all=True))

print("Property Violations Barriers Analysis:")
print(f"Total elderly analyzed: {len(df_violations):,}")
print(f"Elderly with Open Violations: {df_violations['has_violation_barrier'].sum():,} ({df_violations['has_violation_barrier'].sum()/len(df_violations)*100:.1f}%)")
print(f"\nViolation Severity Distribution:")
print(df_violations['violation_severity'].value_counts())
print(f"\nOpen Violations Statistics:")
print(df_violations[df_violations['open_violations'] > 0]['open_violations'].describe())


Property Violations Barriers Analysis:
Total elderly analyzed: 7,396
Elderly with Open Violations: 35 (0.5%)

Violation Severity Distribution:
violation_severity
No Violations          7361
1 Open Violation         34
2-3 Open Violations       1
Name: count, dtype: int64

Open Violations Statistics:
count    35.000000
mean      1.028571
std       0.169031
min       1.000000
25%       1.000000
50%       1.000000
75%       1.000000
max       2.000000
Name: open_violations, dtype: float64


## 5. Violation Types Analysis


In [7]:
query = """
SELECT 
    v.res_id,
    ev.case_no,
    ev.status,
    ev.description,
    CASE 
        WHEN ev.description ILIKE '%unsafe%' OR ev.description ILIKE '%dangerous%' OR ev.description ILIKE '%hazard%' THEN 'Safety Issue'
        WHEN ev.description ILIKE '%maintenance%' OR ev.description ILIKE '%repair%' THEN 'Maintenance'
        WHEN ev.description ILIKE '%permit%' OR ev.description ILIKE '%code%' THEN 'Permit/Code Issue'
        WHEN ev.description ILIKE '%sanitation%' OR ev.description ILIKE '%trash%' OR ev.description ILIKE '%garbage%' THEN 'Sanitation'
        WHEN ev.description ILIKE '%fire%' OR ev.description ILIKE '%smoke%' THEN 'Fire Safety'
        ELSE 'Other'
    END as violation_category
FROM voters v
INNER JOIN elderly_violations_one_to_one ev ON v.res_id = ev.res_id
WHERE v.is_elderly = true
    AND ev.status = 'Open'
"""

df_violation_types = pd.DataFrame(execute_query(query, fetch_all=True))

if len(df_violation_types) > 0:
    print("Open Violation Types Analysis:")
    print(f"Total open violations: {len(df_violation_types):,}")
    print(f"Elderly with open violations: {df_violation_types['res_id'].nunique():,}")
    print(f"\nViolation Category Distribution:")
    print(df_violation_types['violation_category'].value_counts())
    print(f"\nTop 10 Violation Descriptions:")
    print(df_violation_types['description'].value_counts().head(10))
else:
    print("No open violations found in detailed records.")


Open Violation Types Analysis:
Total open violations: 36
Elderly with open violations: 35

Violation Category Distribution:
violation_category
Safety Issue         26
Maintenance           4
Other                 3
Permit/Code Issue     3
Name: count, dtype: int64

Top 10 Violation Descriptions:
description
Unsafe Structures           26
Maintenance                  4
Testing & Certification      3
Failure to Obtain Permit     3
Name: count, dtype: int64


## 6. Accessibility Barriers Analysis


In [8]:
query = """
SELECT 
    v.res_id,
    v.age,
    v.ward_id,
    v.precinct_id,
    MIN(vsn.distance_meters) as nearest_store_distance_meters,
    COUNT(vsn.store_id) as nearby_stores_count,
    CASE 
        WHEN MIN(vsn.distance_meters) <= 500 THEN 'Excellent (≤500m)'
        WHEN MIN(vsn.distance_meters) <= 1000 THEN 'Good (500-1000m)'
        WHEN MIN(vsn.distance_meters) IS NOT NULL THEN 'Limited (>1000m)'
        ELSE 'No Store Data'
    END as store_accessibility,
    CASE 
        WHEN MIN(vsn.distance_meters) IS NOT NULL AND MIN(vsn.distance_meters) > 1000 THEN true
        ELSE false
    END as has_store_accessibility_barrier
FROM voters v
LEFT JOIN voter_store_nearby vsn ON v.res_id = vsn.res_id
WHERE v.is_elderly = true
GROUP BY v.res_id, v.age, v.ward_id, v.precinct_id
"""

df_store_access = pd.DataFrame(execute_query(query, fetch_all=True))

print("Store Accessibility Barriers Analysis:")
print(f"Total elderly analyzed: {len(df_store_access):,}")
print(f"Elderly with store data: {df_store_access['nearest_store_distance_meters'].notna().sum():,}")
print(f"\nStore Accessibility Distribution:")
print(df_store_access['store_accessibility'].value_counts())
print(f"Elderly with Limited Store Access (>1000m): {df_store_access['has_store_accessibility_barrier'].sum():,} ({df_store_access['has_store_accessibility_barrier'].sum()/len(df_store_access)*100:.1f}%)")
if df_store_access['nearest_store_distance_meters'].notna().any():
    print(f"\nDistance to Nearest Store Statistics (meters):")
    print(df_store_access['nearest_store_distance_meters'].describe())


Store Accessibility Barriers Analysis:
Total elderly analyzed: 7,396
Elderly with store data: 3,641

Store Accessibility Distribution:
store_accessibility
No Store Data        3755
Excellent (≤500m)    3434
Good (500-1000m)      203
Limited (>1000m)        4
Name: count, dtype: int64
Elderly with Limited Store Access (>1000m): 4 (0.1%)

Distance to Nearest Store Statistics (meters):
count      3641
unique     1942
top       71.23
freq         59
Name: nearest_store_distance_meters, dtype: object


In [9]:
query = """
SELECT 
    v.res_id,
    v.age,
    v.ward_id,
    v.precinct_id,
    MIN(
        ST_Distance(
            ST_SetSRID(ST_MakePoint(v.longitude, v.latitude), 4326)::geography,
            ST_Transform(ST_SetSRID(p.geom, 26986), 4326)::geography
        )
    ) as nearest_park_distance_meters
FROM voters v
LEFT JOIN geo_parks p ON ST_DWithin(
    ST_SetSRID(ST_MakePoint(v.longitude, v.latitude), 4326)::geography,
    ST_Transform(ST_SetSRID(p.geom, 26986), 4326)::geography,
    5000
)
WHERE v.is_elderly = true
    AND v.latitude IS NOT NULL 
    AND v.longitude IS NOT NULL
GROUP BY v.res_id, v.age, v.ward_id, v.precinct_id, v.latitude, v.longitude
"""

try:
    df_park_access = pd.DataFrame(execute_query(query, fetch_all=True))
    
    if len(df_park_access) == 0:
        raise ValueError("Query returned empty result")
    
    df_park_access['park_accessibility'] = df_park_access['nearest_park_distance_meters'].apply(
        lambda x: 'Excellent (≤300m)' if pd.notna(x) and x <= 300
        else 'Good (300-600m)' if pd.notna(x) and x <= 600
        else 'Limited (>600m)' if pd.notna(x)
        else 'No Park Data'
    )
    
    df_park_access['has_park_accessibility_barrier'] = (
        df_park_access['nearest_park_distance_meters'].notna() & 
        (df_park_access['nearest_park_distance_meters'] > 600)
    )
    
    print("Park Accessibility Barriers Analysis:")
    print(f"Total elderly analyzed: {len(df_park_access):,}")
    print(f"Elderly with park data: {df_park_access['nearest_park_distance_meters'].notna().sum():,}")
    print(f"\nPark Accessibility Distribution:")
    print(df_park_access['park_accessibility'].value_counts())
    print(f"Elderly with Limited Park Access (>600m): {df_park_access['has_park_accessibility_barrier'].sum():,} ({df_park_access['has_park_accessibility_barrier'].sum()/len(df_park_access)*100:.1f}%)")
    if df_park_access['nearest_park_distance_meters'].notna().any():
        print(f"\nDistance to Nearest Park Statistics (meters):")
        print(df_park_access['nearest_park_distance_meters'].describe())
except Exception as e:
    print(f"Park accessibility data not available: {e}")
    df_park_access = pd.DataFrame(columns=['res_id', 'age', 'ward_id', 'precinct_id', 'nearest_park_distance_meters', 'park_accessibility', 'has_park_accessibility_barrier'])


Park Accessibility Barriers Analysis:
Total elderly analyzed: 7,371
Elderly with park data: 7,371

Park Accessibility Distribution:
park_accessibility
Excellent (≤300m)    6784
Good (300-600m)       583
Limited (>600m)         4
Name: count, dtype: int64
Elderly with Limited Park Access (>600m): 4 (0.1%)

Distance to Nearest Park Statistics (meters):
count    7371.000000
mean      141.704965
std        99.227318
min         0.000000
25%        60.057064
50%       127.388811
75%       204.597701
max       724.881071
Name: nearest_park_distance_meters, dtype: float64


## 7. Combined Barriers Analysis


In [10]:
df_combined = df_financial[['res_id', 'has_financial_barrier']].copy()

df_combined = df_combined.merge(
    df_conditions[['res_id', 'has_condition_barrier']],
    on='res_id',
    how='left'
)

df_combined = df_combined.merge(
    df_violations[['res_id', 'has_violation_barrier']],
    on='res_id',
    how='left'
)

df_combined = df_combined.merge(
    df_store_access[['res_id', 'has_store_accessibility_barrier']],
    on='res_id',
    how='left'
)

if not df_park_access.empty:
    df_combined = df_combined.merge(
        df_park_access[['res_id', 'has_park_accessibility_barrier']],
        on='res_id',
        how='left'
    )
else:
    df_combined['has_park_accessibility_barrier'] = False

df_combined = df_combined.fillna(False)

df_combined['barrier_count'] = (
    df_combined['has_financial_barrier'].astype(int) +
    df_combined['has_condition_barrier'].astype(int) +
    df_combined['has_violation_barrier'].astype(int) +
    df_combined['has_store_accessibility_barrier'].astype(int) +
    df_combined['has_park_accessibility_barrier'].astype(int)
)

df_combined['has_any_barrier'] = df_combined['barrier_count'] > 0
df_combined['has_multiple_barriers'] = df_combined['barrier_count'] >= 2

print("Combined Barriers Analysis:")
print(f"Total elderly analyzed: {len(df_combined):,}")
print(f"\nElderly with Any Barrier: {df_combined['has_any_barrier'].sum():,} ({df_combined['has_any_barrier'].sum()/len(df_combined)*100:.1f}%)")
print(f"Elderly with Multiple Barriers (2+): {df_combined['has_multiple_barriers'].sum():,} ({df_combined['has_multiple_barriers'].sum()/len(df_combined)*100:.1f}%)")
print(f"\nBarrier Count Distribution:")
print(df_combined['barrier_count'].value_counts().sort_index())
print(f"\nBarrier Type Prevalence:")
print(f"  - Financial barriers: {df_combined['has_financial_barrier'].sum():,} ({df_combined['has_financial_barrier'].sum()/len(df_combined)*100:.1f}%)")
print(f"  - Condition barriers: {df_combined['has_condition_barrier'].sum():,} ({df_combined['has_condition_barrier'].sum()/len(df_combined)*100:.1f}%)")
print(f"  - Violation barriers: {df_combined['has_violation_barrier'].sum():,} ({df_combined['has_violation_barrier'].sum()/len(df_combined)*100:.1f}%)")
print(f"  - Store access barriers: {df_combined['has_store_accessibility_barrier'].sum():,} ({df_combined['has_store_accessibility_barrier'].sum()/len(df_combined)*100:.1f}%)")
if not df_park_access.empty:
    print(f"  - Park access barriers: {df_combined['has_park_accessibility_barrier'].sum():,} ({df_combined['has_park_accessibility_barrier'].sum()/len(df_combined)*100:.1f}%)")


Combined Barriers Analysis:
Total elderly analyzed: 5,388

Elderly with Any Barrier: 1,512 (28.1%)
Elderly with Multiple Barriers (2+): 53 (1.0%)

Barrier Count Distribution:
barrier_count
0    3876
1    1459
2      53
Name: count, dtype: int64

Barrier Type Prevalence:
  - Financial barriers: 1,391 (25.8%)
  - Condition barriers: 131 (2.4%)
  - Violation barriers: 35 (0.6%)
  - Store access barriers: 4 (0.1%)
  - Park access barriers: 4 (0.1%)


## 8. Comprehensive Barrier Profile


In [11]:
df_comprehensive = df_financial[['res_id', 'age', 'ward_id', 'precinct_id', 'median_income', 'income_category', 'has_financial_barrier']].copy()

df_comprehensive = df_comprehensive.merge(
    df_conditions[['res_id', 'interior_condition', 'exterior_condition', 'grade', 'building_age', 'has_condition_barrier']],
    on='res_id',
    how='left'
)

df_comprehensive = df_comprehensive.merge(
    df_violations[['res_id', 'open_violations', 'total_violations', 'violation_severity', 'has_violation_barrier']],
    on='res_id',
    how='left'
)

df_comprehensive = df_comprehensive.merge(
    df_store_access[['res_id', 'store_accessibility', 'has_store_accessibility_barrier']],
    on='res_id',
    how='left'
)

if not df_park_access.empty:
    df_comprehensive = df_comprehensive.merge(
        df_park_access[['res_id', 'park_accessibility', 'has_park_accessibility_barrier']],
        on='res_id',
        how='left'
    )

df_comprehensive = df_comprehensive.merge(
    df_combined[['res_id', 'barrier_count', 'has_any_barrier', 'has_multiple_barriers']],
    on='res_id',
    how='left'
)

df_comprehensive['barrier_severity'] = pd.cut(
    df_comprehensive['barrier_count'],
    bins=[-1, 0, 1, 2, 5],
    labels=['No Barriers', 'Low (1 barrier)', 'Medium (2 barriers)', 'High (3+ barriers)']
)

print("Comprehensive Barrier Profile:")
print(f"Total elderly with complete barrier profile: {len(df_comprehensive):,}")
print(f"\nBarrier Severity Distribution:")
print(df_comprehensive['barrier_severity'].value_counts())
print(f"\nSummary Statistics:")
print(f"  - Average barriers per person: {df_comprehensive['barrier_count'].mean():.2f}")
print(f"  - Median barriers per person: {df_comprehensive['barrier_count'].median():.0f}")
print(f"  - Max barriers: {df_comprehensive['barrier_count'].max()}")


Comprehensive Barrier Profile:
Total elderly with complete barrier profile: 5,388

Barrier Severity Distribution:
barrier_severity
No Barriers            3876
Low (1 barrier)        1459
Medium (2 barriers)      53
High (3+ barriers)        0
Name: count, dtype: int64

Summary Statistics:
  - Average barriers per person: 0.29
  - Median barriers per person: 0
  - Max barriers: 2


## 9. Barrier Analysis by Geographic Area


In [12]:
df_ward = df_comprehensive.groupby('ward_id').agg({
    'res_id': 'count',
    'has_financial_barrier': 'sum',
    'has_condition_barrier': 'sum',
    'has_violation_barrier': 'sum',
    'has_store_accessibility_barrier': 'sum',
    'barrier_count': 'mean',
    'has_multiple_barriers': 'sum'
}).round(2)

df_ward.columns = ['total_elderly', 'financial_barriers', 'condition_barriers', 'violation_barriers', 
                   'store_access_barriers', 'avg_barriers_per_person', 'multiple_barriers']
df_ward = df_ward.sort_values('total_elderly', ascending=False)

print("Barriers by Ward:")
print(df_ward.to_string())


Barriers by Ward:
         total_elderly  financial_barriers  condition_barriers  violation_barriers  store_access_barriers  avg_barriers_per_person  multiple_barriers
ward_id                                                                                                                                              
21                3091                 755                  38                   6                      0                     0.26                 18
22                2297                 636                  93                  29                      4                     0.33                 35


In [13]:
df_tract = df_comprehensive.groupby(['precinct_id']).agg({
    'res_id': 'count',
    'has_financial_barrier': 'sum',
    'has_condition_barrier': 'sum',
    'has_violation_barrier': 'sum',
    'barrier_count': 'mean',
    'has_multiple_barriers': 'sum'
}).round(2)

df_tract.columns = ['total_elderly', 'financial_barriers', 'condition_barriers', 
                    'violation_barriers', 'avg_barriers_per_person', 'multiple_barriers']
df_tract = df_tract.sort_values('total_elderly', ascending=False).head(15)

print("Top 15 Precincts by Elderly Count - Barriers:")
print(df_tract.to_string())


Top 15 Precincts by Elderly Count - Barriers:
             total_elderly  financial_barriers  condition_barriers  violation_barriers  avg_barriers_per_person  multiple_barriers
precinct_id                                                                                                                       
13                     967                  11                  13                   2                     0.03                  1
12                     643                 207                  13                   0                     0.34                  2
10                     529                 171                  11                   0                     0.34                  1
7                      465                  65                   3                   0                     0.15                  2
9                      414                 296                  10                   1                     0.74                 11
11                     328           

## 10. Export Results


In [None]:
output_dir = os.path.join(project_dir, 'data', 'processed', 'elderly_analysis')
os.makedirs(output_dir, exist_ok=True)

df_financial.to_csv(f'{output_dir}/barriers_financial.csv', index=False)
df_conditions.to_csv(f'{output_dir}/barriers_conditions.csv', index=False)
df_violations.to_csv(f'{output_dir}/barriers_violations.csv', index=False)
if len(df_violation_types) > 0:
    df_violation_types.to_csv(f'{output_dir}/barriers_violation_types.csv', index=False)
df_store_access.to_csv(f'{output_dir}/barriers_store_access.csv', index=False)
if not df_park_access.empty:
    df_park_access.to_csv(f'{output_dir}/barriers_park_access.csv', index=False)
df_combined.to_csv(f'{output_dir}/barriers_combined.csv', index=False)
df_comprehensive.to_csv(f'{output_dir}/barriers_comprehensive.csv', index=False)
df_ward.to_csv(f'{output_dir}/barriers_by_ward.csv', index=True)
df_tract.to_csv(f'{output_dir}/barriers_by_precinct.csv', index=True)

print("✅ All barrier analysis files exported to:", output_dir)


✅ All barrier analysis files exported to: /Users/Studies/Projects/ds-abcdc-allston/fa25-team-a/data/processed/elderly_analysis
