# Question 5: What would be the impact on the housing market if they transitioned to senior housing?

This notebook analyzes the housing market impact of transitioning elderly residents to a new 50-60 unit affordable senior housing project:

1. **Top Candidates Identification**: Identify 50-60 priority candidates from eligibility analysis
2. **Property Availability Analysis**: Properties that would become available (elderly homeowners)
3. **Market Impact**: Property values, units available, neighborhood effects
4. **Outreach Strategy**: Geographic and demographic targeting for outreach program

## Project Context
- **New Housing Project**: 50-60 units
- **Target Population**: High and Medium priority eligible elderly residents
- **Outreach Program**: Need to identify best candidates for targeted outreach


In [50]:
import sys
import os

current_dir = os.getcwd()
if os.path.basename(current_dir) == 'notebooks':
    project_dir = os.path.dirname(current_dir)
else:
    parts = current_dir.split(os.sep)
    if 'fa25-team-a' in parts:
        idx = parts.index('fa25-team-a')
        project_dir = os.sep.join(parts[:idx+1])
    else:
        project_dir = os.path.dirname(os.path.dirname(current_dir))
web_app_path = os.path.join(project_dir, 'web_app')
sys.path.append(web_app_path)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from config.database import get_db_connection, execute_query

sns.set_style("whitegrid")
plt.rcParams['figure.figsize'] = (12, 6)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

# Project parameters
TARGET_UNITS = 60  # Target 50-60 units, using 60 as upper bound


In [51]:
# Load comprehensive eligibility analysis
data_dir = os.path.join(project_dir, 'data', 'processed', 'elderly_analysis')
eligibility_file = os.path.join(data_dir, 'comprehensive_eligibility_analysis.csv')

if os.path.exists(eligibility_file):
    df_eligibility = pd.read_csv(eligibility_file)
    print(f"✅ Loaded eligibility data: {len(df_eligibility):,} residents")
    print(f"   Priority distribution:")
    print(df_eligibility['priority_level'].value_counts())
else:
    print(f"❌ Error: Eligibility file not found at {eligibility_file}")
    print("   Please run elderly_housing_eligibility.ipynb first to generate the data.")

# Identify top candidates (High and Medium priority, sorted by score)
df_top_candidates = df_eligibility[
    df_eligibility['priority_level'].isin(['High', 'Medium'])
].copy()

df_top_candidates = df_top_candidates.sort_values('eligibility_score', ascending=False)

print(f"\nAvailable High/Medium Priority Candidates: {len(df_top_candidates):,}")
print(f"  - High Priority: {len(df_top_candidates[df_top_candidates['priority_level'] == 'High']):,}")
print(f"  - Medium Priority: {len(df_top_candidates[df_top_candidates['priority_level'] == 'Medium']):,}")

# Select top 60 candidates for the project
df_project_candidates = df_top_candidates.head(TARGET_UNITS).copy()

print(f"\nSelected Top {TARGET_UNITS} Candidates for Project:")
print(f"  - High Priority: {len(df_project_candidates[df_project_candidates['priority_level'] == 'High']):,}")
print(f"  - Medium Priority: {len(df_project_candidates[df_project_candidates['priority_level'] == 'Medium']):,}")
print(f"  - Average Eligibility Score: {df_project_candidates['eligibility_score'].mean():.1f}")
print(f"  - Score Range: {df_project_candidates['eligibility_score'].min():.0f} - {df_project_candidates['eligibility_score'].max():.0f}")


✅ Loaded eligibility data: 6,958 residents
   Priority distribution:
priority_level
Low          5868
Medium        921
High          164
Very High       5
Name: count, dtype: int64

Available High/Medium Priority Candidates: 1,085
  - High Priority: 164
  - Medium Priority: 921

Selected Top 60 Candidates for Project:
  - High Priority: 60
  - Medium Priority: 0
  - Average Eligibility Score: 31.1
  - Score Range: 28 - 35


In [52]:
# Add tenure status to candidates
query = """
SELECT DISTINCT ON (v.res_id)
    v.res_id,
    v.ward_id,
    v.precinct_id,
    b.owner_occ,
    CASE 
        WHEN b.owner_occ = 'Y' THEN 'Homeowner'
        WHEN b.owner_occ IS NULL THEN 'Unknown'
        ELSE 'Renter'
    END as tenure_status,
    b.total_value as property_value,
    CASE 
        WHEN b.total_value < 500000 THEN 'Low Value (<$500k)'
        WHEN b.total_value < 1000000 THEN 'Moderate Value ($500k-$1M)'
        WHEN b.total_value >= 1000000 THEN 'High Value ($1M+)'
        ELSE 'Unknown'
    END as value_category
FROM voters v
INNER JOIN voters_buildings_map vbm ON v.res_id = vbm.res_id
LEFT JOIN buildings b ON vbm.struct_id = b.struct_id
WHERE v.is_elderly = true
ORDER BY v.res_id
"""

df_tenure = pd.DataFrame(execute_query(query, fetch_all=True))

# Merge tenure data into project candidates
df_project_candidates = df_project_candidates.merge(
    df_tenure[['res_id', 'ward_id', 'precinct_id', 'tenure_status', 'property_value', 'value_category']],
    on='res_id',
    how='left'
)

# Filter to homeowners only (properties that would become available)
df_homeowner_candidates = df_project_candidates[
    df_project_candidates['tenure_status'] == 'Homeowner'
].copy()

print(f"Tenure data merged. Homeowner candidates: {len(df_homeowner_candidates):,}")


Tenure data merged. Homeowner candidates: 43


In [53]:
print("Housing Market Impact Analysis:")
print("=" * 60)

print(f"\n1. PROJECT SCALE:")
print(f"   - New Senior Housing Units: {TARGET_UNITS}")
print(f"   - Total Candidates Selected: {len(df_project_candidates):,}")

print(f"\n2. PROPERTIES THAT WOULD BECOME AVAILABLE:")
print(f"   - Homeowner Candidates: {len(df_homeowner_candidates):,}")
if len(df_homeowner_candidates) > 0:
    property_values = pd.to_numeric(df_homeowner_candidates['property_value'], errors='coerce')
    print(f"   - Estimated Total Property Value: ${property_values.sum():,.0f}")
    print(f"   - Average Property Value: ${property_values.mean():,.0f}")

print(f"\n3. TENURE BREAKDOWN OF CANDIDATES:")
tenure_breakdown = df_project_candidates['tenure_status'].value_counts(dropna=False)
for tenure, count in tenure_breakdown.items():
    pct = count / len(df_project_candidates) * 100
    print(f"   - {tenure}: {count} ({pct:.1f}%)")

print(f"\n4. GEOGRAPHIC DISTRIBUTION:")
print(df_project_candidates.groupby('ward_id').size())
print(f"\nBy Census Tract (Top 10):")
print(df_project_candidates.groupby('tract_name').size().sort_values(ascending=False).head(10))


Housing Market Impact Analysis:

1. PROJECT SCALE:
   - New Senior Housing Units: 60
   - Total Candidates Selected: 60

2. PROPERTIES THAT WOULD BECOME AVAILABLE:
   - Homeowner Candidates: 43
   - Estimated Total Property Value: $45,032,000
   - Average Property Value: $1,047,256

3. TENURE BREAKDOWN OF CANDIDATES:
   - Homeowner: 43 (71.7%)
   - Renter: 17 (28.3%)

4. GEOGRAPHIC DISTRIBUTION:
ward_id
21     8
22    52
dtype: int64

By Census Tract (Top 10):
tract_name
Census Tract 6.03; Suffolk County; Massachusetts      27
Census Tract 101.03; Suffolk County; Massachusetts    22
Census Tract 7.03; Suffolk County; Massachusetts       5
Census Tract 5.06; Suffolk County; Massachusetts       3
Census Tract 8.05; Suffolk County; Massachusetts       3
dtype: int64


In [54]:
# Expanded Outreach Pool Analysis (200-300 candidates)
OUTREACH_POOL_MIN = 200
OUTREACH_POOL_MAX = 300

print("=" * 70)
print("EXPANDED OUTREACH POOL ANALYSIS")
print("=" * 70)

print(f"\nProject Goal: Fill {TARGET_UNITS} units")
print(f"Outreach Pool Needed: {OUTREACH_POOL_MIN}-{OUTREACH_POOL_MAX} candidates")
print(f"Expected Response Rate: 20-30% (to get {TARGET_UNITS} acceptances)")

print(f"\nAvailable High/Medium Priority Candidates: {len(df_top_candidates):,}")
print(f"  - High Priority: {len(df_top_candidates[df_top_candidates['priority_level'] == 'High']):,}")
print(f"  - Medium Priority: {len(df_top_candidates[df_top_candidates['priority_level'] == 'Medium']):,}")

# Select top 300 candidates for expanded outreach pool
df_outreach_pool = df_top_candidates.head(OUTREACH_POOL_MAX).copy()

# Add tenure status to outreach pool
df_outreach_pool = df_outreach_pool.merge(
    df_tenure[['res_id', 'tenure_status', 'property_value']],
    on='res_id',
    how='left'
)

print(f"\nExpanded Outreach Pool (Top {OUTREACH_POOL_MAX}):")
print(f"  - High Priority: {len(df_outreach_pool[df_outreach_pool['priority_level'] == 'High']):,}")
print(f"  - Medium Priority: {len(df_outreach_pool[df_outreach_pool['priority_level'] == 'Medium']):,}")
print(f"  - Average Eligibility Score: {df_outreach_pool['eligibility_score'].mean():.1f}")
print(f"  - Score Range: {df_outreach_pool['eligibility_score'].min():.0f} - {df_outreach_pool['eligibility_score'].max():.0f}")
print(f"  - 25th percentile score: {df_outreach_pool['eligibility_score'].quantile(0.25):.1f}")
print(f"  - 50th percentile score: {df_outreach_pool['eligibility_score'].quantile(0.50):.1f}")
print(f"  - 75th percentile score: {df_outreach_pool['eligibility_score'].quantile(0.75):.1f}")

print(f"\nScore Distribution in Outreach Pool:")
print(df_outreach_pool['eligibility_score'].describe())

print(f"\n\nExpanded Outreach Pool - Tenure Status:")
print(df_outreach_pool['tenure_status'].value_counts(dropna=False))

tenure_counts = df_outreach_pool['tenure_status'].value_counts()
print(f"\nTenure Breakdown:")
if 'Homeowner' in tenure_counts:
    print(f"  - Homeowner: {tenure_counts['Homeowner']} ({tenure_counts['Homeowner']/len(df_outreach_pool)*100:.1f}%)")
if 'Renter' in tenure_counts:
    print(f"  - Renter: {tenure_counts['Renter']} ({tenure_counts['Renter']/len(df_outreach_pool)*100:.1f}%)")
if 'Unknown' in tenure_counts:
    print(f"  - Unknown: {tenure_counts['Unknown']} ({tenure_counts['Unknown']/len(df_outreach_pool)*100:.1f}%)")

# Analyze homeowners in outreach pool
df_outreach_homeowners = df_outreach_pool[df_outreach_pool['tenure_status'] == 'Homeowner'].copy()

if len(df_outreach_homeowners) > 0:
    property_values = pd.to_numeric(df_outreach_homeowners['property_value'], errors='coerce')
    print(f"\n\nHomeowner Candidates in Outreach Pool: {len(df_outreach_homeowners):,}")
    print(f"Total Estimated Property Value: ${property_values.sum():,.0f}")
    print(f"Average Property Value: ${property_values.mean():,.0f}")
    print(f"Median Property Value: ${property_values.median():,.0f}")
    print(f"Property Value Range: ${property_values.min():,.0f} - ${property_values.max():,.0f}")
    
    # Estimate property value if 20-30% accept
    top_homeowners = df_outreach_homeowners.sort_values('eligibility_score', ascending=False)
    top_20_pct = int(len(top_homeowners) * 0.20)
    top_30_pct = int(len(top_homeowners) * 0.30)
    
    if top_20_pct > 0:
        value_20pct = pd.to_numeric(top_homeowners.head(top_20_pct)['property_value'], errors='coerce').sum()
        print(f"\nEstimated Property Value if 20-30% Accept (based on top scores):")
        print(f"  - 20% acceptance ({top_20_pct} homeowners): ${value_20pct:,.0f}")
    if top_30_pct > 0:
        value_30pct = pd.to_numeric(top_homeowners.head(top_30_pct)['property_value'], errors='coerce').sum()
        print(f"  - 30% acceptance ({top_30_pct} homeowners): ${value_30pct:,.0f}")


EXPANDED OUTREACH POOL ANALYSIS

Project Goal: Fill 60 units
Outreach Pool Needed: 200-300 candidates
Expected Response Rate: 20-30% (to get 60 acceptances)

Available High/Medium Priority Candidates: 1,085
  - High Priority: 164
  - Medium Priority: 921

Expanded Outreach Pool (Top 300):
  - High Priority: 164
  - Medium Priority: 136
  - Average Eligibility Score: 27.1
  - Score Range: 25 - 35
  - 25th percentile score: 25.0
  - 50th percentile score: 27.0
  - 75th percentile score: 28.0

Score Distribution in Outreach Pool:
count    300.000000
mean      27.063333
std        2.773378
min       25.000000
25%       25.000000
50%       27.000000
75%       28.000000
max       35.000000
Name: eligibility_score, dtype: float64


Expanded Outreach Pool - Tenure Status:
tenure_status
Homeowner    158
Renter        82
Unknown       60
Name: count, dtype: int64

Tenure Breakdown:
  - Homeowner: 158 (52.7%)
  - Renter: 82 (27.3%)
  - Unknown: 60 (20.0%)


Homeowner Candidates in Outreach Pool: 

In [55]:
# Analyze where candidates are located for outreach targeting
df_outreach = df_project_candidates.groupby(['ward_id', 'precinct_id', 'tract_name', 'income_category']).agg({
    'res_id': 'count',
    'eligibility_score': 'mean',
    'age': 'mean'
}).round(2)

df_outreach.columns = ['candidate_count', 'avg_eligibility_score', 'avg_age']
df_outreach = df_outreach.sort_values('candidate_count', ascending=False)

print("Outreach Targeting - Top Areas by Candidate Count:")
print(df_outreach.head(15).to_string())

# Census tract level analysis
df_tract_outreach = df_project_candidates.groupby(['tract_id', 'tract_name', 'median_income']).agg({
    'res_id': 'count',
    'eligibility_score': 'mean',
    'tenure_status': lambda x: x.value_counts().to_dict()
}).round(2)

df_tract_outreach.columns = ['candidate_count', 'avg_score', 'tenure_breakdown']
df_tract_outreach = df_tract_outreach.sort_values('candidate_count', ascending=False)

print(f"\n\nTop Census Tracts for Outreach (by candidate count):")
print(df_tract_outreach.head(10).to_string())


Outreach Targeting - Top Areas by Candidate Count:
                                                                                        candidate_count  avg_eligibility_score  avg_age
ward_id precinct_id tract_name                                         income_category                                                 
22      5           Census Tract 6.03; Suffolk County; Massachusetts   Low Income                    18                  29.94    72.11
        1           Census Tract 101.03; Suffolk County; Massachusetts Low Income                    16                  31.50    75.00
        2           Census Tract 6.03; Suffolk County; Massachusetts   Low Income                     7                  28.00    72.57
                    Census Tract 101.03; Suffolk County; Massachusetts Low Income                     6                  33.83    74.67
21      4           Census Tract 8.05; Suffolk County; Massachusetts   Moderate Income                3                  28.00    71.

In [56]:
print("Top 60 Candidates Profile:")
print("=" * 60)

print(f"\n1. DEMOGRAPHICS:")
print(f"   - Average Age: {df_project_candidates['age'].mean():.1f} years")
print(f"   - Age Range: {df_project_candidates['age'].min():.0f} - {df_project_candidates['age'].max():.0f} years")
print(f"\n   Age Group Distribution:")
print(df_project_candidates['age_group'].value_counts())

print(f"\n2. INCOME PROFILE:")
print(df_project_candidates['income_category'].value_counts())
print(f"\n   Average Census Tract Median Income: ${df_project_candidates['median_income'].mean():,.0f}")

print(f"\n3. HOUSING CONDITIONS:")
print(f"   - With Poor/Fair Conditions: {df_project_candidates['has_poor_conditions'].sum():,}")
print(f"   - With Open Violations: {df_project_candidates['has_violations'].sum():,}")

print(f"\n4. AMENITY ACCESS:")
print(f"   - Store Access:")
print(df_project_candidates['store_accessibility'].value_counts(dropna=False))
print(f"   - Park Access:")
print(df_project_candidates['park_accessibility'].value_counts(dropna=False))


Top 60 Candidates Profile:

1. DEMOGRAPHICS:
   - Average Age: 74.0 years
   - Age Range: 62 - 93 years

   Age Group Distribution:
age_group
62-69    22
70-79    20
80-89    16
90+       2
Name: count, dtype: int64

2. INCOME PROFILE:
income_category
Low Income         54
Moderate Income     6
Name: count, dtype: int64

   Average Census Tract Median Income: $41,730

3. HOUSING CONDITIONS:
   - With Poor/Fair Conditions: 28
   - With Open Violations: 3

4. AMENITY ACCESS:
   - Store Access:
store_accessibility
Excellent (≤500m)    58
Good (500-1000m)      2
Name: count, dtype: int64
   - Park Access:
park_accessibility
Good (300-600m)      32
Excellent (≤300m)    28
Name: count, dtype: int64


In [57]:
# Add tenure status to df_top_candidates for alternative strategies (if not already merged)
if 'tenure_status' not in df_top_candidates.columns:
    df_top_candidates = df_top_candidates.merge(
        df_tenure[['res_id', 'tenure_status']],
        on='res_id',
        how='left'
    )

# If we need more candidates or want to prioritize differently
# Create alternative selection strategies

# Strategy 1: Prioritize renters (they need housing more urgently)
df_renter_priority = df_top_candidates[
    df_top_candidates['tenure_status'] == 'Renter'
].sort_values('eligibility_score', ascending=False).head(TARGET_UNITS)

print("Alternative Strategy 1: Prioritize Renters")
print(f"Selected {len(df_renter_priority):,} renter candidates")
if len(df_renter_priority) > 0:
    print(f"Average eligibility score: {df_renter_priority['eligibility_score'].mean():.1f}")
else:
    print("No renter candidates available in top priority pool")

# Strategy 2: Prioritize by income need
df_income_priority = df_top_candidates[
    df_top_candidates['income_category'].isin(['Low Income (<$50k)', 'Moderate Income ($50k-$75k)'])
].sort_values('eligibility_score', ascending=False).head(TARGET_UNITS)

print(f"\nAlternative Strategy 2: Prioritize Low/Moderate Income")
print(f"Selected {len(df_income_priority):,} low/moderate income candidates")
if len(df_income_priority) > 0:
    print(f"Average eligibility score: {df_income_priority['eligibility_score'].mean():.1f}")

# Strategy 3: Prioritize by housing conditions
df_condition_priority = df_top_candidates[
    (df_top_candidates['has_poor_conditions'] == True) | 
    (df_top_candidates['has_violations'] == True)
].sort_values('eligibility_score', ascending=False).head(TARGET_UNITS)

print(f"\nAlternative Strategy 3: Prioritize Poor Conditions/Violations")
print(f"Selected {len(df_condition_priority):,} candidates with housing issues")
if len(df_condition_priority) > 0:
    print(f"Average eligibility score: {df_condition_priority['eligibility_score'].mean():.1f}")


Alternative Strategy 1: Prioritize Renters
Selected 60 renter candidates
Average eligibility score: 27.4

Alternative Strategy 2: Prioritize Low/Moderate Income
Selected 0 low/moderate income candidates

Alternative Strategy 3: Prioritize Poor Conditions/Violations
Selected 60 candidates with housing issues
Average eligibility score: 28.0


In [58]:
# Analyze impact by neighborhood (census tract)
df_neighborhood_impact = df_project_candidates.groupby(['tract_id', 'tract_name', 'median_income']).agg({
    'res_id': 'count',
    'tenure_status': lambda x: (x == 'Homeowner').sum(),
    'property_value': lambda x: pd.to_numeric(x, errors='coerce').sum()
}).round(2)

df_neighborhood_impact.columns = ['candidates', 'homeowners', 'total_property_value']
df_neighborhood_impact = df_neighborhood_impact.sort_values('candidates', ascending=False)

print("Neighborhood-Level Impact (by Census Tract):")
print(df_neighborhood_impact.head(10).to_string())

print(f"\n\nSummary:")
print(f"  - Total neighborhoods affected: {len(df_neighborhood_impact)}")
print(f"  - Neighborhoods with homeowners: {(df_neighborhood_impact['homeowners'] > 0).sum()}")
print(f"  - Total estimated property value: ${df_neighborhood_impact['total_property_value'].sum():,.0f}")


Neighborhood-Level Impact (by Census Tract):
                                                                               candidates  homeowners  total_property_value
tract_id     tract_name                                         median_income                                              
2.502500e+10 Census Tract 6.03; Suffolk County; Massachusetts   33229.0                27          18           575177500.0
2.502501e+10 Census Tract 101.03; Suffolk County; Massachusetts 45000.0                22          16            20490800.0
2.502500e+10 Census Tract 7.03; Suffolk County; Massachusetts   46985.0                 5           5             6431700.0
             Census Tract 5.06; Suffolk County; Massachusetts   73403.0                 3           1             3905300.0
             Census Tract 8.05; Suffolk County; Massachusetts   53824.0                 3           3             2266800.0


Summary:
  - Total neighborhoods affected: 5
  - Neighborhoods with homeowners: 5
  -

In [59]:
# Export Results
output_dir = os.path.join(project_dir, 'data', 'processed', 'elderly_analysis')
os.makedirs(output_dir, exist_ok=True)

# Export project candidates
df_project_candidates.to_csv(f'{output_dir}/project_candidates_60.csv', index=False)
print(f"✅ Exported project candidates: {len(df_project_candidates):,} residents")

# Export homeowner candidates (properties that would become available)
if len(df_homeowner_candidates) > 0:
    df_homeowner_candidates.to_csv(f'{output_dir}/project_homeowner_candidates.csv', index=False)
    print(f"✅ Exported homeowner candidates: {len(df_homeowner_candidates):,} residents")

# Export expanded outreach pool
if 'df_outreach_pool' in locals():
    df_outreach_pool.to_csv(f'{output_dir}/outreach_pool_expanded_300.csv', index=False)
    print(f"✅ Exported expanded outreach pool: {len(df_outreach_pool):,} residents")
    
    # Export homeowners from outreach pool
    if 'df_outreach_homeowners' in locals() and len(df_outreach_homeowners) > 0:
        df_outreach_homeowners.to_csv(f'{output_dir}/outreach_pool_homeowners.csv', index=False)
        print(f"✅ Exported outreach pool homeowners: {len(df_outreach_homeowners):,} residents")

# Export neighborhood impact
if 'df_neighborhood_impact' in locals():
    df_neighborhood_impact.to_csv(f'{output_dir}/neighborhood_market_impact.csv', index=False)
    print(f"✅ Exported neighborhood impact analysis")

print(f"\nAll files exported to: {output_dir}/")


✅ Exported project candidates: 60 residents
✅ Exported homeowner candidates: 43 residents
✅ Exported expanded outreach pool: 300 residents
✅ Exported outreach pool homeowners: 158 residents
✅ Exported neighborhood impact analysis

All files exported to: /Users/Studies/Projects/ds-abcdc-allston/fa25-team-a/data/processed/elderly_analysis/
