In [1]:
import sys
from pathlib import Path

# Setup paths
PROJECT_ROOT = Path().resolve().parents[0]
sys.path.append(str(PROJECT_ROOT))

In [2]:
from config import *

### Analysis of Chronic Disease prevalence by Disease Burden Clusters
This grouping aligns with known chronic disease patterns

In [10]:
import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data
from typing import Dict, List, Tuple

def create_health_outcomes_analysis(
    df: pd.DataFrame,
    health_outcomes: Dict[str, str]
) -> alt.Chart:
    """
    Creates an interactive visualization of chronic disease patterns and health outcomes
    across regions characterized by their disease burden profiles, highlighting geographic
    variations in health conditions and their relationship to regional health characteristics.
    """
    disease_burden_regions = {
        'Diabetes_Belt': ['AL', 'MS', 'GA', 'SC', 'NC', 'TN', 'KY'],    # High diabetes prevalence
        'Stroke_Belt': ['NC', 'SC', 'GA', 'AL', 'MS', 'TN', 'AR', 'LA'], # High stroke rates
        'Heart_Disease_Zone': ['WV', 'KY', 'TN', 'OK', 'MS', 'AL'],      # High cardiovascular disease
        'Low_Mortality_West': ['CA', 'WA', 'OR', 'CO', 'UT'],            # Better health outcomes
        'Metropolitan_Health': ['NY', 'NJ', 'MA', 'CT', 'MD'],           # Urban health patterns
        'Rural_Challenge': ['MT', 'ID', 'WY', 'ND', 'SD'],               # Access challenges
        'Obesity_Belt': ['MS', 'AL', 'WV', 'KY', 'AR', 'LA']            # High obesity rates
    }
    
    # Define region types for color coding
    favorable_regions = ['Low_Mortality_West', 'Metropolitan_Health']
    challenged_regions = ['Diabetes_Belt', 'Stroke_Belt', 'Heart_Disease_Zone', 
                         'Rural_Challenge', 'Obesity_Belt']
    
    # Add disease burden region column and region type
    df['HealthRegion'] = df['StateAbbr'].map({
        state: region
        for region, states in disease_burden_regions.items()
        for state in states
    })
    
    df['RegionType'] = df['HealthRegion'].apply(
        lambda x: 'Better Health Outcomes' if x in favorable_regions else 'Health Challenged'
    )
    
    # Calculate regional statistics with confidence intervals
    regional_stats = []
    for measure in health_outcomes.keys():
        measure_col = f'{measure}_CrudePrev'
        
        region_data = df.groupby(['HealthRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        # Calculate 95% confidence intervals
        region_data['CI_lower'] = region_data[(measure_col, 'mean')] - 1.96 * (
            region_data[(measure_col, 'std')] / np.sqrt(region_data[(measure_col, 'count')])
        )
        region_data['CI_upper'] = region_data[(measure_col, 'mean')] + 1.96 * (
            region_data[(measure_col, 'std')] / np.sqrt(region_data[(measure_col, 'count')])
        )
        
        region_data.columns = ['HealthRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count', 'CI_lower', 'CI_upper']
        ]
        regional_stats.append(region_data)
    
    regional_data = regional_stats[0]
    for data in regional_stats[1:]:
        regional_data = regional_data.merge(data, on=['HealthRegion', 'RegionType'])
    
    # Create state-level statistics
    state_stats = []
    for measure in health_outcomes.keys():
        measure_col = f'{measure}_CrudePrev'
        
        state_data = df.groupby(['StateAbbr', 'StateDesc', 'HealthRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        state_data.columns = ['StateAbbr', 'StateDesc', 'HealthRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count']
        ]
        state_stats.append(state_data)
    
    state_data = state_stats[0]
    for data in state_stats[1:]:
        state_data = state_data.merge(data, on=['StateAbbr', 'StateDesc', 'HealthRegion', 'RegionType'])
    
    # Create measure selection with descriptive labels
    measure_select = alt.binding_select(
        options=list(health_outcomes.keys()),
        labels=[desc.split('among')[0].strip() for desc in health_outcomes.values()],
        name="Health Condition: "
    )
    
    selection = alt.param(
        name='health_outcome',
        value=list(health_outcomes.keys())[0],
        bind=measure_select
    )
    
    # Define color schemes for different region types
    color_scale = alt.Scale(
        domain=['Better Health Outcomes', 'Health Challenged'],
        range=['#2166ac', '#b2182b']  # Blue for better outcomes, Red for challenged
    )
    
    # Create regional comparison chart
    regional_chart = alt.Chart(regional_data).mark_bar().encode(
        x=alt.X('value:Q', 
                title='Disease Prevalence (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('HealthRegion:N', 
                title='Disease Burden Region',
                sort=['Low_Mortality_West', 'Metropolitan_Health',
                      'Diabetes_Belt', 'Stroke_Belt', 'Heart_Disease_Zone', 
                      'Rural_Challenge', 'Obesity_Belt']),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Regional Health Status')),
        tooltip=[
            alt.Tooltip('HealthRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Health Status'),
            alt.Tooltip('value:Q', title='Prevalence (%)', format='.1f'),
            alt.Tooltip('CI_lower:Q', title='95% CI Lower', format='.1f'),
            alt.Tooltip('CI_upper:Q', title='95% CI Upper', format='.1f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        CI_lower=f'datum[{selection.name} + "_CI_lower"]',
        CI_upper=f'datum[{selection.name} + "_CI_upper"]'
    ).properties(
        width=800,
        height=200,
        title='Regional Disease Burden Patterns'
    )
    
    # Create state-level detail chart
    state_chart = alt.Chart(state_data).mark_circle(size=100).encode(
        x=alt.X('value:Q', 
                title='Disease Prevalence (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('StateDesc:N', 
                title='State',
                sort=alt.EncodingSortField(field='value', op='mean', order='ascending')),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Regional Health Status')),
        tooltip=[
            alt.Tooltip('StateDesc:N', title='State'),
            alt.Tooltip('HealthRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Health Status'),
            alt.Tooltip('value:Q', title='Prevalence (%)', format='.1f'),
            alt.Tooltip('std:Q', title='Std Dev', format='.2f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        std=f'datum[{selection.name} + "_std"]'
    ).properties(
        width=800,
        height=500,
        title='State-Level Disease Burden Analysis'
    )
    
    # Combine visualizations
    final_viz = alt.vconcat(
        regional_chart,
        state_chart,
        spacing=30
    ).properties(
        title={
            'text': 'Geographic Patterns of Chronic Disease Burden in the United States',
            'subtitle': [
                'Analysis of health outcomes across regions with distinct disease burden profiles',
                'Examining variations in chronic conditions and their regional distribution',
                'Use dropdown to explore different health conditions'
            ],
            'fontSize': 24,
            'anchor': 'middle'
        }
    ).add_params(
        selection
    )
    
    return final_viz

def main():
    # Enable Altair to work in Jupyter
    alt.renderers.enable('default')
    
    # Define health outcome measures
    health_outcomes = {
        'ARTHRITIS': 'Arthritis among adults aged ≥18 years',
        'BPHIGH': 'High blood pressure among adults aged ≥18 years',
        'CANCER': 'Cancer (excluding skin cancer) among adults aged ≥18 years',
        'CASTHMA': 'Current asthma among adults aged ≥18 years',
        'CHD': 'Coronary heart disease among adults aged ≥18 years',
        'COPD': 'Chronic obstructive pulmonary disease among adults aged ≥18 years',
        'DEPRESSION': 'Depression among adults aged ≥18 years',
        'DIABETES': 'Diagnosed diabetes among adults aged ≥18 years',
        'HIGHCHOL': 'High cholesterol among adults aged ≥18 years',
        'STROKE': 'Stroke among adults aged ≥18 years'
    }
    
    # Load data
    file_path = get_file_path(2024)
    df = pd.read_csv(file_path)
    
    # Create visualization
    chart = create_health_outcomes_analysis(df, health_outcomes)
    
    # Display in notebook
    display(chart)
    
    return chart

if __name__ == "__main__":
    chart = main()
    # Save chart to HTML file
    chart.save(PROJECT_ROOT / 'results' / 'regional_analysis' / 'health_outcomes_analysis.html')

### Health Risk Behaviors analysis by social determinants of health
This grouping considers social determinants of health and analyzes health risk behaviors

In [11]:
import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data
from typing import Dict, List, Tuple

def create_health_behaviors_analysis(
    df: pd.DataFrame,
    health_risk_behaviors: Dict[str, str]
) -> alt.Chart:
    """
    Creates an interactive visualization of health risk behavior patterns
    across different socioeconomic regions, highlighting behavioral health
    disparities and their relationship to regional economic characteristics.
    """
    socioeconomic_regions = {
        'Urban_Dense': ['NY', 'NJ', 'MA', 'CT', 'RI', 'MD', 'DC'],     # High income, education
        'Industrial_Legacy': ['PA', 'OH', 'MI', 'IN', 'IL', 'WI'],      # Economic transition
        'Rural_South': ['AL', 'MS', 'AR', 'LA', 'KY', 'WV'],           # Higher poverty rates
        'Growth_Corridor': ['GA', 'NC', 'SC', 'TN', 'TX'],             # Economic expansion
        'Western_Diverse': ['CA', 'NV', 'AZ', 'NM'],                    # Population diversity
        'Mountain_Rural': ['MT', 'ID', 'WY', 'CO'],                     # Geographic isolation
        'Coastal_Tech': ['WA', 'OR', 'CA'],                            # High income tech
        'Plains_Agricultural': ['ND', 'SD', 'NE', 'KS', 'OK']          # Rural economics
    }
    
    # Define region types for color coding
    advantaged_regions = ['Urban_Dense', 'Coastal_Tech', 'Growth_Corridor', 'Western_Diverse']
    vulnerable_regions = ['Rural_South', 'Industrial_Legacy', 'Mountain_Rural', 'Plains_Agricultural']
    
    # Add socioeconomic region column and region type
    df['SocioRegion'] = df['StateAbbr'].map({
        state: region
        for region, states in socioeconomic_regions.items()
        for state in states
    })
    
    df['RegionType'] = df['SocioRegion'].apply(
        lambda x: 'Economically Advantaged' if x in advantaged_regions else 'Economically Vulnerable'
    )
    
    # Calculate regional statistics with confidence intervals
    regional_stats = []
    for measure in health_risk_behaviors.keys():
        measure_col = f'{measure}_CrudePrev'
        
        region_data = df.groupby(['SocioRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        # Calculate 95% confidence intervals
        region_data['CI_lower'] = region_data[(measure_col, 'mean')] - 1.96 * (
            region_data[(measure_col, 'std')] / np.sqrt(region_data[(measure_col, 'count')])
        )
        region_data['CI_upper'] = region_data[(measure_col, 'mean')] + 1.96 * (
            region_data[(measure_col, 'std')] / np.sqrt(region_data[(measure_col, 'count')])
        )
        
        region_data.columns = ['SocioRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count', 'CI_lower', 'CI_upper']
        ]
        regional_stats.append(region_data)
    
    regional_data = regional_stats[0]
    for data in regional_stats[1:]:
        regional_data = regional_data.merge(data, on=['SocioRegion', 'RegionType'])
    
    # Create state-level statistics
    state_stats = []
    for measure in health_risk_behaviors.keys():
        measure_col = f'{measure}_CrudePrev'
        
        state_data = df.groupby(['StateAbbr', 'StateDesc', 'SocioRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        state_data.columns = ['StateAbbr', 'StateDesc', 'SocioRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count']
        ]
        state_stats.append(state_data)
    
    state_data = state_stats[0]
    for data in state_stats[1:]:
        state_data = state_data.merge(data, on=['StateAbbr', 'StateDesc', 'SocioRegion', 'RegionType'])
    
    # Create measure selection
    measure_select = alt.binding_select(
        options=list(health_risk_behaviors.keys()),
        labels=[desc.split('among')[0].strip() for desc in health_risk_behaviors.values()],
        name="Health Risk Behavior: "
    )
    
    selection = alt.param(
        name='health_risk',
        value=list(health_risk_behaviors.keys())[0],
        bind=measure_select
    )
    
    # Define color schemes for different region types
    color_scale = alt.Scale(
        domain=['Economically Advantaged', 'Economically Vulnerable'],
        range=['#2166ac', '#b2182b']  # Blue for advantaged, Red for vulnerable
    )
    
    # Create regional comparison chart
    regional_chart = alt.Chart(regional_data).mark_bar().encode(
        x=alt.X('value:Q', 
                title='Prevalence Rate (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('SocioRegion:N', 
                title='Socioeconomic Region',
                sort=['Urban_Dense', 'Coastal_Tech', 'Growth_Corridor', 'Western_Diverse',
                      'Rural_South', 'Industrial_Legacy', 'Mountain_Rural', 'Plains_Agricultural']),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Regional Economic Status')),
        tooltip=[
            alt.Tooltip('SocioRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Economic Status'),
            alt.Tooltip('value:Q', title='Rate (%)', format='.1f'),
            alt.Tooltip('CI_lower:Q', title='95% CI Lower', format='.1f'),
            alt.Tooltip('CI_upper:Q', title='95% CI Upper', format='.1f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        CI_lower=f'datum[{selection.name} + "_CI_lower"]',
        CI_upper=f'datum[{selection.name} + "_CI_upper"]'
    ).properties(
        width=800,
        height=200,
        title='Regional Health Risk Behavior Patterns'
    )
    
    # Create state-level detail chart
    state_chart = alt.Chart(state_data).mark_circle(size=100).encode(
        x=alt.X('value:Q', 
                title='Prevalence Rate (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('StateDesc:N', 
                title='State',
                sort=alt.EncodingSortField(field='value', op='mean', order='ascending')),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Regional Economic Status')),
        tooltip=[
            alt.Tooltip('StateDesc:N', title='State'),
            alt.Tooltip('SocioRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Economic Status'),
            alt.Tooltip('value:Q', title='Rate (%)', format='.1f'),
            alt.Tooltip('std:Q', title='Std Dev', format='.2f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        std=f'datum[{selection.name} + "_std"]'
    ).properties(
        width=800,
        height=500,
        title='State-Level Health Risk Behavior Analysis'
    )
    
    # Combine visualizations
    final_viz = alt.vconcat(
        regional_chart,
        state_chart,
        spacing=30
    ).properties(
        title={
            'text': 'Health Risk Behaviors Across Socioeconomic Regions',
            'subtitle': [
                'Analysis of behavioral health patterns in economically advantaged and vulnerable regions',
                'Examining regional variations in risk behaviors and their relationship to economic status',
                'Use dropdown to explore different health risk indicators'
            ],
            'fontSize': 24,
            'anchor': 'middle'
        }
    ).add_params(
        selection
    )
    
    return final_viz

def main():
    # Enable Altair to work in Jupyter
    alt.renderers.enable('default')
    
    # Define health risk behavior measures
    health_risk_behaviors = {
        'BINGE': 'Binge drinking among adults aged ≥18 years',
        'CSMOKING': 'Current smoking among adults aged ≥18 years',
        'LPA': 'No leisure-time physical activity among adults aged ≥18 years',
        'OBESITY': 'Obesity among adults aged ≥18 years',
        'SLEEP': 'Sleeping less than 7 hours among adults aged ≥18 years'
    }
    
    # Load data
    file_path = get_file_path(2024)
    df = pd.read_csv(file_path)
    
    # Create visualization
    chart = create_health_behaviors_analysis(df, health_risk_behaviors)
    
    # Display in notebook
    display(chart)
    
    return chart

if __name__ == "__main__":
    chart = main()
    # Save chart to HTML file
    chart.save(PROJECT_ROOT / 'results' / 'regional_analysis' / 'health_risk_behaviors.html')

### Social Needs analysis by social determinants of health
This grouping considers social determinants of health and analyzes social needs

In [12]:
import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data
from typing import Dict, List, Tuple

def create_social_needs_analysis(
    df: pd.DataFrame,
    social_needs: Dict[str, str]
) -> alt.Chart:
    """
    Creates an interactive visualization of social needs and vulnerability patterns
    across different socioeconomic regions, highlighting disparities in social determinants
    of health and economic stability.
    """
    # Define socioeconomic regions with strategic significance
    socioeconomic_regions = {
        'Urban_Dense': ['NY', 'NJ', 'MA', 'CT', 'RI', 'MD', 'DC'],     # High income, education
        'Industrial_Legacy': ['PA', 'OH', 'MI', 'IN', 'IL', 'WI'],      # Economic transition
        'Rural_South': ['AL', 'MS', 'AR', 'LA', 'KY', 'WV'],           # Higher poverty rates
        'Growth_Corridor': ['GA', 'NC', 'SC', 'TN', 'TX'],             # Economic expansion
        'Western_Diverse': ['CA', 'NV', 'AZ', 'NM'],                    # Population diversity
        'Mountain_Rural': ['MT', 'ID', 'WY', 'CO'],                     # Geographic isolation
        'Coastal_Tech': ['WA', 'OR', 'CA'],                            # High income tech
        'Plains_Agricultural': ['ND', 'SD', 'NE', 'KS', 'OK']          # Rural economics
    }
    
    # Define region types for color coding
    advantaged_regions = ['Urban_Dense', 'Coastal_Tech', 'Growth_Corridor', 'Western_Diverse']
    vulnerable_regions = ['Rural_South', 'Industrial_Legacy', 'Mountain_Rural', 'Plains_Agricultural']
    
    # Add socioeconomic region column and region type
    df['SocioRegion'] = df['StateAbbr'].map({
        state: region
        for region, states in socioeconomic_regions.items()
        for state in states
    })
    
    df['RegionType'] = df['SocioRegion'].apply(
        lambda x: 'Economically Advantaged' if x in advantaged_regions else 'Economically Vulnerable'
    )
    
    # Calculate regional statistics
    regional_stats = []
    for measure in social_needs.keys():
        measure_col = f'{measure}_CrudePrev'
        
        region_data = df.groupby(['SocioRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        region_data.columns = ['SocioRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count']
        ]
        regional_stats.append(region_data)
    
    regional_data = regional_stats[0]
    for data in regional_stats[1:]:
        regional_data = regional_data.merge(data, on=['SocioRegion', 'RegionType'])
    
    # Create state-level statistics
    state_stats = []
    for measure in social_needs.keys():
        measure_col = f'{measure}_CrudePrev'
        
        state_data = df.groupby(['StateAbbr', 'StateDesc', 'SocioRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        state_data.columns = ['StateAbbr', 'StateDesc', 'SocioRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count']
        ]
        state_stats.append(state_data)
    
    state_data = state_stats[0]
    for data in state_stats[1:]:
        state_data = state_data.merge(data, on=['StateAbbr', 'StateDesc', 'SocioRegion', 'RegionType'])
    
    # Create measure selection
    measure_select = alt.binding_select(
        options=list(social_needs.keys()),
        labels=[desc.split('among')[0].strip() for desc in social_needs.values()],
        name="Social Need Indicator: "
    )
    
    selection = alt.param(
        name='social_need',
        value=list(social_needs.keys())[0],
        bind=measure_select
    )
    
    # Define color schemes for different region types
    color_scale = alt.Scale(
        domain=['Economically Advantaged', 'Economically Vulnerable'],
        range=['#2166ac', '#b2182b']  # Blue for advantaged, Red for vulnerable
    )
    
    # Create regional comparison chart
    regional_chart = alt.Chart(regional_data).mark_bar().encode(
        x=alt.X('value:Q', 
                title='Prevalence Rate (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('SocioRegion:N', 
                title='Socioeconomic Region',
                sort=['Urban_Dense', 'Coastal_Tech', 'Growth_Corridor', 'Western_Diverse',
                      'Rural_South', 'Industrial_Legacy', 'Mountain_Rural', 'Plains_Agricultural']),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Regional Economic Status')),
        tooltip=[
            alt.Tooltip('SocioRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Economic Status'),
            alt.Tooltip('value:Q', title='Rate (%)', format='.1f'),
            alt.Tooltip('std:Q', title='Std Dev', format='.2f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        std=f'datum[{selection.name} + "_std"]'
    ).properties(
        width=800,
        height=200,
        title='Regional Social Needs Patterns'
    )
    
    # Create state-level detail chart
    state_chart = alt.Chart(state_data).mark_circle(size=100).encode(
        x=alt.X('value:Q', 
                title='Prevalence Rate (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('StateDesc:N', 
                title='State',
                sort=alt.EncodingSortField(field='value', op='mean', order='ascending')),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Regional Economic Status')),
        tooltip=[
            alt.Tooltip('StateDesc:N', title='State'),
            alt.Tooltip('SocioRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Economic Status'),
            alt.Tooltip('value:Q', title='Rate (%)', format='.1f'),
            alt.Tooltip('std:Q', title='Std Dev', format='.2f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        std=f'datum[{selection.name} + "_std"]'
    ).properties(
        width=800,
        height=500,
        title='State-Level Social Needs Analysis'
    )
    
    # Combine visualizations
    final_viz = alt.vconcat(
        regional_chart,
        state_chart,
        spacing=30
    ).properties(
        title={
            'text': 'Social Needs and Economic Vulnerability Across U.S. Regions',
            'subtitle': [
                'Analysis of social determinants of health across economically advantaged and vulnerable regions',
                'Examining regional variations in social needs and economic stability',
                'Use dropdown to explore different social need indicators'
            ],
            'fontSize': 24,
            'anchor': 'middle'
        }
    ).add_params(
        selection
    )
    
    return final_viz

def main():
    # Enable Altair to work in Jupyter
    alt.renderers.enable('default')
    
    # Define social needs measures
    social_needs = {
        'ISOLATION': 'Social isolation among adults aged ≥18 years',
        'FOODINSECU': 'Food insecurity among adults aged ≥18 years',
        'HOUSINSECU': 'Housing insecurity among adults aged ≥18 years',
        'SHUTUTILITY': 'Utility shutoff among adults aged ≥18 years',
        'LACKTRPT': 'Lack of transportation among adults aged ≥18 years',
        'EMOTIONSPT': 'Lack of emotional support among adults aged ≥18 years'
    }
    
    # Load data
    file_path = get_file_path(2024)
    df = pd.read_csv(file_path)
    
    # Create visualization
    chart = create_social_needs_analysis(df, social_needs)
    
    # Display in notebook
    display(chart)
    
    return chart

if __name__ == "__main__":
    chart = main()
    # Save chart to HTML file
    chart.save(PROJECT_ROOT / 'results' / 'regional_analysis' / 'social_needs_analysis.html')

### Preventive Health Services prevalence by Health Resource Regions
This grouping aligns with healthcare infrastructure and resource distribution, and analyzes the prevalence of preventive health services utilization

In [13]:
import altair as alt
import pandas as pd
import numpy as np
from vega_datasets import data
from typing import Dict, List, Tuple

def create_healthcare_access_analysis(
    df: pd.DataFrame,
    prevention_measures: Dict[str, str]
) -> alt.Chart:
    """
    Creates an interactive visualization of healthcare access and prevention patterns
    across different health resource regions, highlighting disparities in preventive
    care accessibility and utilization.
    """
    # Define health resource regions with strategic significance
    health_resource_regions = {
        'Northeast_Metro': ['NY', 'NJ', 'MA', 'CT', 'RI'],  # High hospital density
        'Mid_Atlantic': ['PA', 'MD', 'DC', 'VA', 'DE'],     # Research corridor
        'Southeast_Rural': ['AL', 'MS', 'AR', 'LA', 'SC'],  # Limited healthcare access
        'Southeast_Urban': ['GA', 'NC', 'TN', 'FL'],        # Major medical centers
        'Industrial_Midwest': ['OH', 'MI', 'IN', 'IL', 'WI'],  # Environmental factors
        'Agricultural_Belt': ['IA', 'KS', 'NE', 'MO'],      # Rural health challenges
        'Southwest_Border': ['TX', 'NM', 'AZ', 'CA'],       # Cross-border health
        'Pacific_Tech': ['WA', 'OR', 'CA'],                 # Innovation hubs
    }
    
    # Define region types for color coding
    high_resource_regions = ['Northeast_Metro', 'Mid_Atlantic', 'Southeast_Urban', 'Pacific_Tech']
    limited_resource_regions = ['Southeast_Rural', 'Industrial_Midwest', 'Agricultural_Belt', 'Southwest_Border']
    
    # Add health resource region column and region type
    df['HealthRegion'] = df['StateAbbr'].map({
        state: region
        for region, states in health_resource_regions.items()
        for state in states
    })
    
    df['RegionType'] = df['HealthRegion'].apply(
        lambda x: 'High Resource' if x in high_resource_regions else 'Limited Resource'
    )
    
    # Calculate regional statistics
    regional_stats = []
    for measure in prevention_measures.keys():
        measure_col = f'{measure}_CrudePrev'
        
        region_data = df.groupby(['HealthRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        region_data.columns = ['HealthRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count']
        ]
        regional_stats.append(region_data)
    
    regional_data = regional_stats[0]
    for data in regional_stats[1:]:
        regional_data = regional_data.merge(data, on=['HealthRegion', 'RegionType'])
    
    # Create state-level statistics
    state_stats = []
    for measure in prevention_measures.keys():
        measure_col = f'{measure}_CrudePrev'
        
        state_data = df.groupby(['StateAbbr', 'StateDesc', 'HealthRegion', 'RegionType']).agg({
            measure_col: ['mean', 'std', 'count']
        }).reset_index()
        
        state_data.columns = ['StateAbbr', 'StateDesc', 'HealthRegion', 'RegionType'] + [
            f'{measure}_{stat}' for stat in ['mean', 'std', 'count']
        ]
        state_stats.append(state_data)
    
    state_data = state_stats[0]
    for data in state_stats[1:]:
        state_data = state_data.merge(data, on=['StateAbbr', 'StateDesc', 'HealthRegion', 'RegionType'])
    
    # Create measure selection
    measure_select = alt.binding_select(
        options=list(prevention_measures.keys()),
        labels=[desc.split('among')[0].strip() for desc in prevention_measures.values()],
        name="Prevention Measure: "
    )
    
    selection = alt.param(
        name='prevention_measure',
        value=list(prevention_measures.keys())[0],
        bind=measure_select
    )
    
    # Define color schemes for different region types
    color_scale = alt.Scale(
        domain=['High Resource', 'Limited Resource'],
        range=['#2166ac', '#b2182b']  # Blue for high resource, Red for limited resource
    )
    
    # Create regional comparison chart
    regional_chart = alt.Chart(regional_data).mark_bar().encode(
        x=alt.X('value:Q', 
                title='Access/Utilization Rate (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('HealthRegion:N', 
                title='Health Resource Region',
                sort=['Northeast_Metro', 'Mid_Atlantic', 'Southeast_Urban', 'Pacific_Tech',
                      'Southeast_Rural', 'Industrial_Midwest', 'Agricultural_Belt', 'Southwest_Border']),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Region Type')),
        tooltip=[
            alt.Tooltip('HealthRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Region Type'),
            alt.Tooltip('value:Q', title='Rate (%)', format='.1f'),
            alt.Tooltip('std:Q', title='Std Dev', format='.2f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        std=f'datum[{selection.name} + "_std"]'
    ).properties(
        width=800,
        height=200,
        title='Regional Healthcare Access Patterns'
    )
    
    # Create state-level detail chart
    state_chart = alt.Chart(state_data).mark_circle(size=100).encode(
        x=alt.X('value:Q', 
                title='Access/Utilization Rate (%)',
                scale=alt.Scale(zero=False)),
        y=alt.Y('StateDesc:N', 
                title='State',
                sort=alt.EncodingSortField(field='value', op='mean', order='descending')),
        color=alt.Color('RegionType:N',
                       scale=color_scale,
                       legend=alt.Legend(title='Region Type')),
        tooltip=[
            alt.Tooltip('StateDesc:N', title='State'),
            alt.Tooltip('HealthRegion:N', title='Region'),
            alt.Tooltip('RegionType:N', title='Region Type'),
            alt.Tooltip('value:Q', title='Rate (%)', format='.1f'),
            alt.Tooltip('std:Q', title='Std Dev', format='.2f')
        ]
    ).transform_calculate(
        value=f'datum[{selection.name} + "_mean"]',
        std=f'datum[{selection.name} + "_std"]'
    ).properties(
        width=800,
        height=500,
        title='State-Level Healthcare Access Analysis'
    )
    
    # Combine visualizations
    final_viz = alt.vconcat(
        regional_chart,
        state_chart,
        spacing=30
    ).properties(
        title={
            'text': 'Healthcare Access and Prevention Disparities Across U.S. Health Resource Regions',
            'subtitle': [
                'Analysis of preventive care utilization patterns in high-resource vs. limited-resource regions',
                'Examining regional variations in healthcare infrastructure and preventive service adoption',
                'Use dropdown to explore different prevention measures'
            ],
            'fontSize': 24,
            'anchor': 'middle'
        }
    ).add_params(
        selection
    )
    
    return final_viz

def main():
    # Enable Altair to work in Jupyter
    alt.renderers.enable('default')
    
    # Define prevention measures
    prevention_measures = {
        'ACCESS2': 'Current lack of health insurance among adults aged 18-64 years',
        'CHECKUP': 'Visits to doctor for routine checkup within the past year among adults aged ≥18 years',
        'CHOLSCREEN': 'Cholesterol screening among adults aged ≥18 years',
        'COLON_SCREEN': 'Fecal occult blood test, sigmoidoscopy, or colonoscopy among adults aged 50-75 years',
        'DENTAL': 'Visits to dentist or dental clinic among adults aged ≥18 years',
        'MAMMOUSE': 'Mammography use among women aged 50-74 years',
        'BPMED': 'Taking medicine for high blood pressure control among adults aged ≥18 years with high blood pressure'
    }
    
    # Load data
    file_path = get_file_path(2024)
    df = pd.read_csv(file_path)
    
    # Create visualization
    chart = create_healthcare_access_analysis(df, prevention_measures)
    
    # Display in notebook
    display(chart)
    
    return chart

if __name__ == "__main__":
    chart = main()
    # Save chart to HTML file
    chart.save(PROJECT_ROOT / 'results' / 'regional_analysis' / 'healthcare_access_analysis.html')