In [27]:
import pandas as pd
import altair as alt

enrollment_df = pd.read_json('https://raw.githubusercontent.com/APWright/CSC477-Fall2025/7e0863102337a1227224f23e668e9a1699cf237a/Assignment-3/csforca-enrollment.json')
enrollment_df = enrollment_df.rename(columns={'county': 'County'})
enrollment_df = (
    enrollment_df
    .groupby('County', as_index=False)[['AP CS', 'Non-AP CS', 'Overall Enrollment']]
    .sum()
)
enrollment_df = enrollment_df[(enrollment_df['AP CS'] > 0) & (enrollment_df['Non-AP CS'] > 0)]
enrollment_df['Non-AP to AP Ratio'] = enrollment_df['Non-AP CS'] / enrollment_df['AP CS']

income_df = pd.read_json('ca_county_median_income.json')

merged_df = enrollment_df.merge(income_df, on='County', how='inner')
merged_df_no_outliers = merged_df[(merged_df['County'] != 'Lake') & (merged_df['County'] != 'Imperial')]
merged_df_only_outliers = merged_df[(merged_df['County'] == 'Lake') | (merged_df['County'] == 'Imperial')]

min_income = merged_df_no_outliers['Median Household Income'].min()
max_income = merged_df_no_outliers['Median Household Income'].max()

def create_ap_to_non_ap_ratio_chart(df):
    return (
        alt.Chart(df)
        .mark_bar()
        .encode(
            x=alt.X('Non-AP to AP Ratio:Q', title='Non-AP to AP CS Student Ratio'),
            y=alt.Y('County:N', title='County', sort='-x'),
            color=alt.Color(
                'Median Household Income:Q',
                scale=alt.Scale(scheme='greens', domain=[min_income, max_income])
            ),
            tooltip=[
                'County',
                'AP CS',
                'Non-AP CS',
                'Non-AP to AP Ratio',
                'Median Household Income'
            ],
        )
        .properties(
            title='Ratio of Non-AP to AP CS Students by County (Colored by Median Household Income)',
            width=800,
            height=14*len(df),
        )
    )

In [28]:
no_outliers_ratio_chart = create_ap_to_non_ap_ratio_chart(merged_df_no_outliers)
no_outliers_ratio_chart.save('no_outliers_ratio_chart.svg')
no_outliers_ratio_chart

In [29]:
outliers_ratio_chart = create_ap_to_non_ap_ratio_chart(merged_df_only_outliers)
outliers_ratio_chart.save('outliers_ratio_chart.svg')
outliers_ratio_chart


In [30]:
from vega_datasets import data

enrollment_df = pd.read_json('https://raw.githubusercontent.com/APWright/CSC477-Fall2025/7e0863102337a1227224f23e668e9a1699cf237a/Assignment-3/csforca-enrollment.json')
enrollment_df = enrollment_df.rename(columns={'county': 'County'})
enrollment_df = (
    enrollment_df
    .groupby('County', as_index=False)[['AP CS', 'Non-AP CS', 'Overall Enrollment']]
    .sum()
)
enrollment_df['CS Enrollment %'] = (
    (enrollment_df['AP CS'] + enrollment_df['Non-AP CS']) / 
    enrollment_df['Overall Enrollment'] * 100
)
income_df = pd.read_json('ca_county_median_income.json')
enrollment_df = enrollment_df.merge(income_df, on='County', how='inner')

median_income = enrollment_df['Median Household Income'].median()

min_cs_enrollment = enrollment_df['CS Enrollment %'].min()
max_cs_enrollment = enrollment_df['CS Enrollment %'].max()

county_fips = {
    'Alameda': 6001, 'Alpine': 6003, 'Amador': 6005, 'Butte': 6007,
    'Calaveras': 6009, 'Colusa': 6011, 'Contra Costa': 6013, 'Del Norte': 6015,
    'El Dorado': 6017, 'Fresno': 6019, 'Glenn': 6021, 'Humboldt': 6023,
    'Imperial': 6025, 'Inyo': 6027, 'Kern': 6029, 'Kings': 6031,
    'Lake': 6033, 'Lassen': 6035, 'Los Angeles': 6037, 'Madera': 6039,
    'Marin': 6041, 'Mariposa': 6043, 'Mendocino': 6045, 'Merced': 6047,
    'Modoc': 6049, 'Mono': 6051, 'Monterey': 6053, 'Napa': 6055,
    'Nevada': 6057, 'Orange': 6059, 'Placer': 6061, 'Plumas': 6063,
    'Riverside': 6065, 'Sacramento': 6067, 'San Benito': 6069, 'San Bernardino': 6071,
    'San Diego': 6073, 'San Francisco': 6075, 'San Joaquin': 6077, 'San Luis Obispo': 6079,
    'San Mateo': 6081, 'Santa Barbara': 6083, 'Santa Clara': 6085, 'Santa Cruz': 6087,
    'Shasta': 6089, 'Sierra': 6091, 'Siskiyou': 6093, 'Solano': 6095,
    'Sonoma': 6097, 'Stanislaus': 6099, 'Sutter': 6101, 'Tehama': 6103,
    'Trinity': 6105, 'Tulare': 6107, 'Tuolumne': 6109, 'Ventura': 6111,
    'Yolo': 6113, 'Yuba': 6115
}
enrollment_df['id'] = enrollment_df['County'].map(county_fips)

base = (
    alt.Chart(alt.topo_feature(data.us_10m.url, 'counties'))
    .mark_geoshape(stroke='black', strokeWidth=0.5)
    .transform_filter((alt.datum.id >= 6001) & (alt.datum.id <= 6115))
    .transform_lookup(
        lookup='id',
        from_=alt.LookupData(
            enrollment_df, 
            'id', 
            ['County', 'CS Enrollment %', 'AP CS', 'Non-AP CS', 'Overall Enrollment', 'Median Household Income']
        )
    )
)

tooltips = [
    alt.Tooltip('County:N', title='County'),
    alt.Tooltip('CS Enrollment %:Q', title='CS Enrollment %', format='.2f'),
    alt.Tooltip('Median Household Income:Q', title='Income', format='$,.0f'),
    alt.Tooltip('AP CS:Q', title='AP CS Students'),
    alt.Tooltip('Non-AP CS:Q', title='Non-AP CS Students'),
    alt.Tooltip('Overall Enrollment:Q', title='Overall Enrollment', format=',')
]

below_median = base.transform_filter(
    alt.datum['Median Household Income'] < median_income
).encode(
    color=alt.Color(
        'CS Enrollment %:Q',
        scale=alt.Scale(
            scheme='reds',
            domain=[min_cs_enrollment, max_cs_enrollment],
            nice=True
        ),
        title='Enrollment % (Low Income)'
    ),
    tooltip=tooltips
)

above_median = base.transform_filter(
    alt.datum['Median Household Income'] >= median_income
).encode(
    color=alt.Color(
        'CS Enrollment %:Q',
        scale=alt.Scale(
            scheme='greens',
            domain=[min_cs_enrollment, max_cs_enrollment],
            nice=True
        ),
        title='Enrollment % (High Income)'
    ),
    tooltip=tooltips
)

choropleth = (
    (below_median + above_median)
    .project(type='albersUsa')
    .properties(
        title='CS Enrollment % by County (Red: Below Median Income | Blue: Above)',
        width=800,
        height=600
    )
    .resolve_scale(color='independent')
)

choropleth.save('choropleth.svg')
choropleth