In [13]:
# comprehensive data prep for advanced frontend
import pandas as pd
import numpy as np
import json
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from scipy.stats import percentileofscore
import warnings
warnings.filterwarnings('ignore')

# load the data from explore notebook
try:
    df_health = pd.read_pickle('.objects/healthcare_access_scores_clean.pkl')
except:
    print("need to run explore notebook first")
    %run explore.ipynb
    df_health = pd.read_pickle('.objects/healthcare_access_scores_clean.pkl')

print(f"loaded {len(df_health)} counties")

loaded 3222 counties


In [24]:
# create comprehensive county profiles with everything
county_profiles = df_health.copy()

# add region mapping first
region_mapping = {
    'Northeast': ['Connecticut', 'Maine', 'Massachusetts', 'New Hampshire', 'Rhode Island', 'Vermont', 
                 'New Jersey', 'New York', 'Pennsylvania'],
    'Southeast': ['Delaware', 'Florida', 'Georgia', 'Maryland', 'North Carolina', 'South Carolina', 
                 'Virginia', 'District of Columbia', 'West Virginia', 'Alabama', 'Kentucky', 
                 'Mississippi', 'Tennessee', 'Arkansas', 'Louisiana', 'Oklahoma', 'Texas'],
    'Midwest': ['Illinois', 'Indiana', 'Michigan', 'Ohio', 'Wisconsin', 'Iowa', 'Kansas', 
               'Minnesota', 'Missouri', 'Nebraska', 'North Dakota', 'South Dakota'],
    'Southwest': ['Arizona', 'New Mexico', 'Nevada', 'Utah'],
    'West': ['Alaska', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana', 'Oregon', 'Washington', 'Wyoming']
}

def get_region(state):
    for region, states in region_mapping.items():
        if state in states:
            return region
    return 'Other'

county_profiles['Region'] = county_profiles['State'].apply(get_region)

# check what columns we have
print("Available columns:")
print(county_profiles.columns.tolist())
print(f"\nRegion distribution:")
print(county_profiles['Region'].value_counts())

# add percentile rankings for each metric
metrics = ['Healthcare_Access', 'Insurance_Rate', 'Education_Rate', 'Income_Normalized', 
          'Poverty_Rate', 'No_Vehicle_Rate', 'Disability_Rate', 'Population']

for metric in metrics:
    if metric in county_profiles.columns:
        county_profiles[f'{metric}_Percentile'] = county_profiles[metric].rank(pct=True) * 100

# add opportunity scores - high impact potential
county_profiles['Opportunity_Score'] = (
    (100 - county_profiles['Healthcare_Access']) * 0.4 +  # more room for improvement
    (county_profiles['Population'] / county_profiles['Population'].max() * 100) * 0.3 +  # more people affected
    (100 - county_profiles['Income_Normalized'] * 100) * 0.2 +  # lower income = more need
    (county_profiles['Poverty_Rate']) * 0.1  # higher poverty = more need
).clip(0, 100)

# add vulnerability index - multiple risk factors
county_profiles['Vulnerability_Index'] = (
    county_profiles['Poverty_Rate'] * 0.25 +
    county_profiles['No_Vehicle_Rate'] * 0.2 +
    county_profiles['Disability_Rate'] * 0.2 +
    (100 - county_profiles['Insurance_Rate']) * 0.2 +
    (100 - county_profiles['Education_Rate']) * 0.15
).clip(0, 100)

# add resilience score - protective factors
county_profiles['Resilience_Score'] = (
    county_profiles['Education_Rate'] * 0.3 +
    county_profiles['Insurance_Rate'] * 0.25 +
    county_profiles['Income_Normalized'] * 100 * 0.25 +
    (100 - county_profiles['Poverty_Rate']) * 0.2
).clip(0, 100)

print("added derived metrics")

Available columns:
['NAME', 'Population', 'Median_Income', 'Commute_Minutes', 'College_Educated', 'Owner_Occupied', 'Total_Health_Insurance', 'Male_with_Insurance', 'Female_with_Insurance', 'Poverty_Total', 'Poverty_Below', 'No_Vehicle', 'Total_Households', 'Broadband_Rate', 'LEP_Rate', 'Disability_Total', 'Disability_With', 'state', 'county', 'FIPS', 'State', 'County', 'Insurance_Rate', 'Education_Rate', 'Income_Normalized', 'Commute_Score', 'No_Vehicle_Rate', 'Poverty_Rate', 'Disability_Rate', 'Healthcare_Access_Classic', 'Healthcare_Access', 'Score_Diff', 'Region']

Region distribution:
Region
Southeast    1422
Midwest      1055
West          355
Northeast     218
Southwest      94
Other          78
Name: count, dtype: int64
added derived metrics


In [15]:
# create detailed cluster analysis
cluster_features = ['Healthcare_Access', 'Income_Normalized', 'Education_Rate', 'Insurance_Rate', 
                   'Poverty_Rate', 'Population', 'Vulnerability_Index', 'Resilience_Score']

# normalize for clustering
scaler = StandardScaler()
cluster_data = scaler.fit_transform(county_profiles[cluster_features].fillna(0))

# create multiple clustering solutions
for k in [5, 7, 10]:
    kmeans = KMeans(n_clusters=k, random_state=42)
    county_profiles[f'Cluster_{k}'] = kmeans.fit_predict(cluster_data)

# detailed cluster descriptions
cluster_descriptions = {
    0: {"name": "Metro Powerhouses", "desc": "Major urban centers with excellent access but high costs"},
    1: {"name": "Suburban Stability", "desc": "Well-resourced suburban communities with good outcomes"},
    2: {"name": "Rural Resilience", "desc": "Small towns with moderate access, strong community ties"},
    3: {"name": "Urban Challenges", "desc": "Dense cities with mixed outcomes, high inequality"},
    4: {"name": "Healthcare Deserts", "desc": "Underserved areas with critical access gaps"},
    5: {"name": "Growth Corridors", "desc": "Rapidly developing areas with emerging needs"},
    6: {"name": "College Towns", "desc": "Education-focused communities with unique demographics"}
}

# add cluster names
county_profiles['Cluster_Name_Detailed'] = county_profiles['Cluster_7'].map(
    {i: cluster_descriptions[i]["name"] for i in range(7)}
)
county_profiles['Cluster_Description'] = county_profiles['Cluster_7'].map(
    {i: cluster_descriptions[i]["desc"] for i in range(7)}
)

print("created advanced clustering")

created advanced clustering


In [16]:
# create policy impact simulations - more sophisticated
def advanced_policy_simulation(county_row, policy_changes):
    """simulate multiple policy changes with interactions"""
    base_score = county_row['Healthcare_Access']
    
    # base factors
    insurance = county_row['Insurance_Rate'] * (1 + policy_changes.get('insurance_pct', 0)/100)
    poverty = county_row['Poverty_Rate'] * (1 + policy_changes.get('poverty_pct', 0)/100)
    education = county_row['Education_Rate'] * (1 + policy_changes.get('education_pct', 0)/100)
    income = county_row['Median_Income'] * (1 + policy_changes.get('income_pct', 0)/100)
    
    # policy interactions - some policies boost others
    if policy_changes.get('education_pct', 0) > 10:
        income *= 1.05  # education boosts income slightly
    if policy_changes.get('income_pct', 0) > 15:
        insurance *= 1.03  # higher income improves insurance access
    
    # calculate new score with interactions
    new_score = (
        np.clip(insurance, 0, 100) * 0.25 +
        np.clip(education, 0, 80) * 0.15 +
        np.clip((income / county_profiles['Median_Income'].max()) * 100, 0, 100) * 0.15 +
        np.clip((100 - poverty), 0, 100) * 0.15 +
        base_score * 0.30  # existing infrastructure
    )
    
    return {
        'new_score': np.clip(new_score, 0, 100),
        'improvement': new_score - base_score,
        'new_insurance': np.clip(insurance, 0, 100),
        'new_poverty': np.clip(poverty, 0, 60),
        'new_education': np.clip(education, 0, 80),
        'new_income': income
    }

print("created advanced policy simulation")

created advanced policy simulation


In [17]:
# create comparative analysis data
comparative_data = []

for _, county in county_profiles.iterrows():
    # find similar counties (same cluster, similar population)
    cluster = county['Cluster_7']
    similar_mask = (
        (county_profiles['Cluster_7'] == cluster) &
        (abs(county_profiles['Population'] - county['Population']) < county['Population'] * 0.5)
    )
    similar_counties = county_profiles[similar_mask]
    
    if len(similar_counties) > 1:
        rank_in_cluster = (similar_counties['Healthcare_Access'] < county['Healthcare_Access']).sum() + 1
        total_in_cluster = len(similar_counties)
    else:
        rank_in_cluster = 1
        total_in_cluster = 1
    
    comparative_data.append({
        'FIPS': county['FIPS'],
        'rank_in_cluster': rank_in_cluster,
        'total_in_cluster': total_in_cluster,
        'cluster_avg_score': similar_counties['Healthcare_Access'].mean(),
        'cluster_avg_income': similar_counties['Median_Income'].mean(),
        'performance_vs_cluster': county['Healthcare_Access'] - similar_counties['Healthcare_Access'].mean(),
        'best_in_cluster_fips': similar_counties.loc[similar_counties['Healthcare_Access'].idxmax(), 'FIPS'],
        'worst_in_cluster_fips': similar_counties.loc[similar_counties['Healthcare_Access'].idxmin(), 'FIPS']
    })

comparative_df = pd.DataFrame(comparative_data)
county_profiles = county_profiles.merge(comparative_df, on='FIPS', how='left')

print("created comparative analysis")

created comparative analysis


In [18]:
# create time series projections (simulated)
projection_data = []

for _, county in county_profiles.iterrows():
    # simulate different growth scenarios
    scenarios = {
        'status_quo': {'growth': 0.01, 'policy_investment': 0},
        'moderate_investment': {'growth': 0.02, 'policy_investment': 0.15},
        'aggressive_investment': {'growth': 0.035, 'policy_investment': 0.30}
    }
    
    county_projections = {'FIPS': county['FIPS']}
    
    for scenario_name, params in scenarios.items():
        # 5-year projection
        projected_scores = []
        current_score = county['Healthcare_Access']
        
        for year in range(6):
            if year == 0:
                projected_scores.append(current_score)
            else:
                # growth with diminishing returns
                growth_factor = params['growth'] * (1 - (current_score/100)) * params['policy_investment']
                current_score = min(95, current_score + growth_factor * 10)
                projected_scores.append(current_score)
        
        county_projections[f'{scenario_name}_projection'] = projected_scores
    
    projection_data.append(county_projections)

projections_df = pd.DataFrame(projection_data)
print("created projection scenarios")

created projection scenarios


In [19]:
# create detailed state analysis
state_analysis = county_profiles.groupby(['State', 'Region']).agg({
    'Healthcare_Access': ['mean', 'median', 'std', 'min', 'max'],
    'Population': ['sum', 'mean'],
    'Opportunity_Score': ['mean', 'max'],
    'Vulnerability_Index': ['mean', 'max'],
    'Resilience_Score': ['mean', 'min'],
    'Median_Income': ['mean', 'median'],
    'Insurance_Rate': ['mean', 'min'],
    'Poverty_Rate': ['mean', 'max'],
    'Education_Rate': ['mean', 'max'],
    'County': 'count'
}).round(2)

# flatten column names
state_analysis.columns = ['_'.join(col).strip() for col in state_analysis.columns]
state_analysis = state_analysis.reset_index()

# add state rankings
state_analysis['Healthcare_Rank'] = state_analysis['Healthcare_Access_mean'].rank(ascending=False)
state_analysis['Opportunity_Rank'] = state_analysis['Opportunity_Score_mean'].rank(ascending=False)
state_analysis['Inequality_Score'] = state_analysis['Healthcare_Access_std']  # higher = more inequality

print("created state analysis")

created state analysis


In [20]:
# create policy recommendation engine data
def generate_policy_recommendations(county_row):
    recommendations = []
    
    # insurance expansion
    if county_row['Insurance_Rate'] < 80:
        impact = (80 - county_row['Insurance_Rate']) * 0.25
        cost = county_row['Population'] * (100 - county_row['Insurance_Rate']) / 100 * 3000
        recommendations.append({
            'policy': 'Insurance Expansion',
            'priority': 'High' if county_row['Insurance_Rate'] < 70 else 'Medium',
            'impact_score': min(impact, 20),
            'annual_cost': cost,
            'cost_per_point': cost / max(impact, 1),
            'rationale': f"Current coverage at {county_row['Insurance_Rate']:.1f}% leaves significant gaps"
        })
    
    # education investment
    if county_row['Education_Rate'] < 30:
        impact = (30 - county_row['Education_Rate']) * 0.15
        cost = county_row['Population'] * 500
        recommendations.append({
            'policy': 'Education Investment',
            'priority': 'High' if county_row['Education_Rate'] < 20 else 'Medium',
            'impact_score': min(impact, 15),
            'annual_cost': cost,
            'cost_per_point': cost / max(impact, 1),
            'rationale': f"Education rate of {county_row['Education_Rate']:.1f}% limits economic mobility"
        })
    
    # poverty reduction
    if county_row['Poverty_Rate'] > 20:
        impact = (county_row['Poverty_Rate'] - 15) * 0.15
        cost = county_row['Population'] * county_row['Poverty_Rate'] / 100 * 2000
        recommendations.append({
            'policy': 'Poverty Reduction Programs',
            'priority': 'High' if county_row['Poverty_Rate'] > 30 else 'Medium',
            'impact_score': min(impact, 18),
            'annual_cost': cost,
            'cost_per_point': cost / max(impact, 1),
            'rationale': f"Poverty rate of {county_row['Poverty_Rate']:.1f}% creates multiple barriers to care"
        })
    
    return recommendations

# generate recommendations for all counties
all_recommendations = []
for _, county in county_profiles.iterrows():
    recs = generate_policy_recommendations(county)
    for rec in recs:
        rec['FIPS'] = county['FIPS']
        rec['County'] = county['County']
        rec['State'] = county['State']
        all_recommendations.append(rec)

recommendations_df = pd.DataFrame(all_recommendations)
print(f"generated {len(recommendations_df)} policy recommendations")

generated 7004 policy recommendations


In [21]:
# export all data for frontend
import os
os.makedirs('.objects', exist_ok=True)

# main county data with everything
export_columns = [
    'FIPS', 'County', 'State', 'Region', 'Population', 'Median_Income',
    'Healthcare_Access', 'Insurance_Rate', 'Education_Rate', 'Poverty_Rate',
    'No_Vehicle_Rate', 'Disability_Rate', 'Broadband_Rate', 'LEP_Rate',
    'Healthcare_Access_Percentile', 'Insurance_Rate_Percentile', 'Education_Rate_Percentile',
    'Opportunity_Score', 'Vulnerability_Index', 'Resilience_Score',
    'Cluster_5', 'Cluster_7', 'Cluster_10', 'Cluster_Name_Detailed', 'Cluster_Description',
    'rank_in_cluster', 'total_in_cluster', 'performance_vs_cluster', 'cluster_avg_score'
]

# add approximate coordinates - enhanced with jitter for better visualization
state_coords = {
    'California': (36.7783, -119.4179), 'Texas': (31.9686, -99.9018), 'Florida': (27.7663, -82.6404),
    'New York': (40.7589, -73.9851), 'Pennsylvania': (40.2677, -76.8759), 'Illinois': (40.6331, -89.3985),
    'Ohio': (40.2732, -82.7850), 'Georgia': (32.1656, -82.9001), 'North Carolina': (35.7596, -79.0193),
    'Michigan': (44.3148, -85.6024), 'New Jersey': (40.0583, -74.4057), 'Virginia': (37.4316, -78.6569),
    'Washington': (47.7511, -120.7401), 'Arizona': (34.0489, -111.0937), 'Massachusetts': (42.2373, -71.5314),
    'Tennessee': (35.5175, -86.5804), 'Indiana': (40.2677, -86.1349), 'Missouri': (37.9643, -91.8318),
    'Maryland': (39.0458, -76.6413), 'Wisconsin': (43.7844, -88.7879), 'Colorado': (39.7392, -104.9903),
    'Minnesota': (46.7296, -94.6859), 'South Carolina': (33.8361, -81.1637), 'Alabama': (32.3182, -86.9023),
    'Louisiana': (30.9843, -91.9623), 'Kentucky': (37.8393, -84.2700), 'Oregon': (44.9778, -120.7376),
    'Oklahoma': (35.0078, -97.0929), 'Connecticut': (41.6032, -73.0877), 'Iowa': (41.8780, -93.0977),
    'Arkansas': (34.7465, -92.2824), 'Mississippi': (32.3547, -89.3985), 'Kansas': (39.0119, -98.4842),
    'Utah': (39.3210, -111.0937), 'Nevada': (38.8026, -116.4194), 'New Mexico': (34.5199, -105.8701),
    'West Virginia': (38.5976, -80.4549), 'Nebraska': (41.4925, -99.9018), 'Idaho': (44.0682, -114.7420),
    'Hawaii': (19.8968, -155.5828), 'New Hampshire': (43.1939, -71.5724), 'Maine': (45.2538, -69.4455),
    'Rhode Island': (41.5801, -71.4774), 'Montana': (46.8797, -110.3626), 'Delaware': (38.9108, -75.5277),
    'South Dakota': (43.9695, -99.9018), 'North Dakota': (47.5515, -101.0020), 'Alaska': (64.0685, -152.2782),
    'Vermont': (44.2601, -72.5806), 'Wyoming': (43.0750, -107.2903)
}

# add coordinates with realistic spread
np.random.seed(42)
county_profiles['lat'] = county_profiles['State'].map(lambda x: state_coords.get(x, (39, -98))[0]) + np.random.normal(0, 1.0, len(county_profiles))
county_profiles['lng'] = county_profiles['State'].map(lambda x: state_coords.get(x, (39, -98))[1]) + np.random.normal(0, 1.5, len(county_profiles))

# export main dataset
export_data = county_profiles[export_columns + ['lat', 'lng']].copy()
export_data = export_data.fillna(0).round(2)

export_data.to_json('.objects/comprehensive_county_data.json', orient='records', indent=2)
export_data.to_pickle('.objects/comprehensive_county_data.pkl')

# export other datasets
state_analysis.to_json('.objects/detailed_state_analysis.json', orient='records', indent=2)
recommendations_df.to_json('.objects/policy_recommendations.json', orient='records', indent=2)
projections_df.to_json('.objects/projection_scenarios.json', orient='records', indent=2)

# export cluster definitions
cluster_definitions = []
for i in range(7):
    cluster_counties = county_profiles[county_profiles['Cluster_7'] == i]
    cluster_definitions.append({
        'id': i,
        'name': cluster_descriptions[i]['name'],
        'description': cluster_descriptions[i]['desc'],
        'county_count': len(cluster_counties),
        'avg_healthcare_access': cluster_counties['Healthcare_Access'].mean(),
        'avg_population': cluster_counties['Population'].mean(),
        'avg_income': cluster_counties['Median_Income'].mean(),
        'characteristics': {
            'healthcare_range': [cluster_counties['Healthcare_Access'].min(), cluster_counties['Healthcare_Access'].max()],
            'population_range': [cluster_counties['Population'].min(), cluster_counties['Population'].max()],
            'top_states': cluster_counties['State'].value_counts().head(3).to_dict()
        }
    })

with open('.objects/cluster_definitions.json', 'w') as f:
    json.dump(cluster_definitions, f, indent=2)

print(f"\nExported comprehensive datasets:")
print(f"- comprehensive_county_data.json: {len(export_data)} counties with {len(export_columns)+2} features")
print(f"- detailed_state_analysis.json: {len(state_analysis)} states")
print(f"- policy_recommendations.json: {len(recommendations_df)} recommendations")
print(f"- projection_scenarios.json: {len(projections_df)} county projections")
print(f"- cluster_definitions.json: 7 detailed cluster definitions")


Exported comprehensive datasets:
- comprehensive_county_data.json: 3222 counties with 31 features
- detailed_state_analysis.json: 52 states
- policy_recommendations.json: 7004 recommendations
- projection_scenarios.json: 3222 county projections
- cluster_definitions.json: 7 detailed cluster definitions


In [22]:
# create gradient color scales and thematic mappings
def create_color_scales():
    return {
        'healthcare_access': {
            'name': 'Healthcare Access Score',
            'colors': ['#1a0000', '#4d1100', '#802200', '#b33300', '#e64400', '#ff7700', '#ffaa44', '#ffdd77', '#ffffaa', '#ccffcc', '#66ff66', '#00ff00'],
            'domain': [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 95, 100],
            'labels': ['Critical', 'Very Poor', 'Poor', 'Below Average', 'Fair', 'Average', 'Good', 'Very Good', 'Excellent', 'Outstanding', 'Elite']
        },
        'opportunity': {
            'name': 'Improvement Opportunity',
            'colors': ['#000033', '#003366', '#006699', '#0099cc', '#00ccff', '#66ddff', '#99eeff', '#ccf7ff'],
            'domain': [0, 20, 40, 60, 75, 85, 95, 100],
            'labels': ['Limited', 'Low', 'Moderate', 'Good', 'High', 'Very High', 'Exceptional']
        },
        'vulnerability': {
            'name': 'Vulnerability Index',
            'colors': ['#000', '#330000', '#660000', '#990000', '#cc0000', '#ff3333', '#ff6666', '#ff9999', '#ffcccc'],
            'domain': [0, 10, 20, 30, 40, 50, 60, 70, 80],
            'labels': ['Very Low', 'Low', 'Below Average', 'Average', 'Above Average', 'High', 'Very High', 'Critical']
        },
        'population': {
            'name': 'Population Size',
            'colors': ['#f0f0f0', '#d9d9d9', '#bdbdbd', '#969696', '#737373', '#525252', '#252525'],
            'domain': [0, 10000, 50000, 100000, 500000, 1000000, 5000000],
            'labels': ['Very Small', 'Small', 'Medium', 'Large', 'Very Large', 'Major', 'Mega']
        }
    }

color_scales = create_color_scales()

with open('.objects/color_scales.json', 'w') as f:
    json.dump(color_scales, f, indent=2)

print("created advanced color scales")

created advanced color scales


In [23]:
# create insight templates for the frontend
insights_templates = {
    'comparative': [
        "This county ranks #{rank_in_cluster} out of {total_in_cluster} similar counties",
        "Performance is {performance_vs_cluster:+.1f} points {direction} the cluster average",
        "Similar counties achieve an average score of {cluster_avg_score:.1f}"
    ],
    'opportunity': [
        "High opportunity for improvement with score of {opportunity_score:.1f}",
        "With targeted interventions, could reach {potential_score:.1f} within 3-5 years",
        "Priority areas: {top_intervention_areas}"
    ],
    'vulnerability': [
        "Vulnerability index of {vulnerability_index:.1f} indicates {vulnerability_level} risk",
        "Key risk factors: {top_risk_factors}",
        "Resilience score of {resilience_score:.1f} shows {resilience_level} protective factors"
    ],
    'outlier': [
        "This county is an outlier - {outlier_reason}",
        "Despite {limiting_factor}, achieves {positive_outcome}",
        "Could serve as a model for {similar_counties} similar counties"
    ]
}

# create analysis insights for each county
county_insights = []
for _, county in county_profiles.iterrows():
    insights = []
    
    # comparative insight
    direction = "above" if county['performance_vs_cluster'] > 0 else "below"
    insights.append({
        'type': 'comparative',
        'text': f"This county ranks #{county['rank_in_cluster']} out of {county['total_in_cluster']} similar counties. Performance is {county['performance_vs_cluster']:+.1f} points {direction} the cluster average of {county['cluster_avg_score']:.1f}.",
        'importance': 'medium'
    })
    
    # opportunity insight
    if county['Opportunity_Score'] > 70:
        insights.append({
            'type': 'opportunity',
            'text': f"High opportunity for improvement with opportunity score of {county['Opportunity_Score']:.1f}. Strategic investments could yield significant population health gains.",
            'importance': 'high'
        })
    
    # vulnerability insight
    vuln_level = "high" if county['Vulnerability_Index'] > 60 else "moderate" if county['Vulnerability_Index'] > 30 else "low"
    insights.append({
        'type': 'vulnerability',
        'text': f"Vulnerability index of {county['Vulnerability_Index']:.1f} indicates {vuln_level} risk. Resilience score of {county['Resilience_Score']:.1f} shows protective factors.",
        'importance': 'high' if county['Vulnerability_Index'] > 60 else 'medium'
    })
    
    county_insights.append({
        'FIPS': county['FIPS'],
        'insights': insights
    })

with open('.objects/county_insights.json', 'w') as f:
    json.dump(county_insights, f, indent=2)

print(f"created insights for {len(county_insights)} counties")
print("\n=== DATA PREPARATION COMPLETE ===")
print("Ready to create symlinks and build advanced frontend!")

created insights for 3222 counties

=== DATA PREPARATION COMPLETE ===
Ready to create symlinks and build advanced frontend!
