# [Research Request - 5311 Apportionment Formula #1710](https://github.com/cal-itp/data-analyses/issues/1710)

- Double check work done by Sara.
- Cross reference all available types of census data.
- can we use the census API to read in data to a notebook? AND apple the apportionment formula

**Possible explanation of how population data was determined**
via email:
>Previous calculations used a combination of population and demographic information received from DOTP along with these FTA maps to determine the rural population in each of the CA regions
>
>We previously had discussed updating this formula to match how the FTA funds have been apportioned to California, but that conversation was put on hold for a much deeper discussion on the formula. 

**Notes from 5311 apportionment excel sheet**

1. Alpine and Sierra Counties (collectively, 'Minimum Counties') receive a minimum funding amount of $48,000 due to low population (under 7,590)
2. Orange County does not receive Rural funds


In [1]:
import pandas as pd
import altair as alt
from calitp_data_analysis.sql import query_sql

### County-level Urban and Rural information for the 2010 & 2020 Census

In [2]:
county_rural_2020_url = "https://www2.census.gov/geo/docs/reference/ua/2020_UA_COUNTY.xlsx"

county_rural_2010_url = "https://www2.census.gov/geo/docs/reference/ua/PctUrbanRural_County.xls"

# read in data
county_rural_data_2020 = pd.read_excel(county_rural_2020_url)
county_rural_data_2010 = pd.read_excel(county_rural_2010_url)

# lower case the columns
county_rural_data_2020.columns = county_rural_data_2020.columns.str.lower()
county_rural_data_2010.columns = county_rural_data_2010.columns.str.lower()

# add suffix to distinuish years
county_rural_data_2020 = county_rural_data_2020.add_suffix("_2020")
county_rural_data_2010 = county_rural_data_2010.add_suffix("_2010")

display(
    county_rural_data_2020.head(),
    county_rural_data_2010.head(),
)

Unnamed: 0,state_2020,county_2020,state_name_2020,county_name_2020,pop_cou_2020,hou_cou_2020,aland_cou_2020,aland_mi²_cou_2020,popden_cou_2020,houden_cou_2020,...,pop_rur_2020,poppct_rur_2020,hou_rur_2020,houpct_rur_2020,aland_rur_2020,aland_mi²_rur_2020,aland_pct_rur_2020,popden_rur_2020,houden_rur_2020,ruralblocks_2020
0,1,1,Alabama,Autauga,58805,24350,1539634184,594.452758,98.922916,40.962044,...,23920,0.406768,9991,0.410308,1483727020,572.867002,0.963688,41.754892,17.440348,991
1,1,3,Alabama,Baldwin,231767,124148,4117656199,1589.827058,145.781265,78.088997,...,87113,0.375865,40740,0.328157,3762600021,1452.739868,0.913772,59.964624,28.043562,3181
2,1,5,Alabama,Barbour,25223,11618,2292160149,885.003034,28.500467,13.127639,...,16627,0.6592,7538,0.648821,2276027730,878.774307,0.992962,18.920672,8.577857,1011
3,1,7,Alabama,Bibb,22293,9002,1612188717,622.466064,35.814001,14.461833,...,22293,1.0,9002,1.0,1612188717,622.466064,1.0,35.814001,14.461833,1090
4,1,9,Alabama,Blount,59134,24622,1670259090,644.887035,91.69668,38.18033,...,53510,0.904894,22337,0.907197,1658933117,640.514076,0.993219,83.54227,34.873551,2207


Unnamed: 0,state_2010,county_2010,statename_2010,countyname_2010,pop_cou_2010,area_cou_2010,pop_urban_2010,poppct_urban_2010,area_urban_2010,areapct_urban_2010,...,pop_uc_2010,poppct_uc_2010,area_uc_2010,areapct_uc_2010,popden_uc_2010,pop_rural_2010,poppct_rural_2010,area_rural_2010,areapct_rural_2010,popden_rural_2010
0,1,1,Alabama,Autauga,54571,1539582278,31650,58.0,50882539,3.3,...,0,0.0,0,0.0,,22921,42.0,1488699739,96.7,39.9
1,1,3,Alabama,Baldwin,182265,4117521611,105205,57.72,275001368,6.68,...,41556,22.8,111381848,2.71,966.3,77060,42.28,3842520243,93.32,51.9
2,1,5,Alabama,Barbour,27457,2291818968,8844,32.21,17447656,0.76,...,8844,32.21,17447656,0.76,1312.8,18613,67.79,2274371312,99.24,21.2
3,1,7,Alabama,Bibb,22915,1612480789,7252,31.65,18903236,1.17,...,7252,31.65,18903236,1.17,993.6,15663,68.35,1593577553,98.83,25.5
4,1,9,Alabama,Blount,57322,1669961855,5760,10.05,10874700,0.65,...,5374,9.38,9851721,0.59,1412.8,51562,89.95,1659087155,99.35,80.5


### filter for just California

In [3]:
ca_county_2010 = county_rural_data_2010[county_rural_data_2010["statename_2010"]=="California"]
ca_county_2020 = county_rural_data_2020[county_rural_data_2020["state_name_2020"]=="California"]

# ca_county_2010["county"].nunique() == ca_county_2020["county_name_2020"].nunique() #True

### aggregate by county name, sum population columns

In [4]:
county_2010 = ca_county_2010.groupby("countyname_2010").agg(
    {"pop_rural_2010":"sum", 
     "pop_cou_2010":"sum", 
     "poppct_rural_2010":"sum"}).reset_index()

county_2020 = ca_county_2020.groupby("county_name_2020").agg(
    {"pop_rur_2020":"sum", 
     "pop_cou_2020":"sum", 
     "poppct_rur_2020":"sum"}).reset_index()

# round population %
county_2020["poppct_rur_2020"] = (county_2020["poppct_rur_2020"]*100).round(2)

display(
    county_2010.head(),
    county_2020.head()
)

Unnamed: 0,countyname_2010,pop_rural_2010,pop_cou_2010,poppct_rural_2010
0,Alameda,5869,1510271,0.39
1,Alpine,1175,1175,100.0
2,Amador,23016,38091,60.42
3,Butte,41584,220000,18.9
4,Calaveras,34370,45578,75.41


Unnamed: 0,county_name_2020,pop_rur_2020,pop_cou_2020,poppct_rur_2020
0,Alameda,8253,1682353,0.49
1,Alpine,1204,1204,100.0
2,Amador,28020,40474,69.23
3,Butte,44478,211632,21.02
4,Calaveras,37128,45292,81.97


### merge 2010 and 2020 county census data

In [5]:
pop_2010_2020 = county_2010.merge(
    county_2020,
    left_on = "countyname_2010",
    right_on = "county_name_2020",
    how= "inner",
    indicator= True
).drop(columns= ["county_name_2020","pop_cou_2010","pop_cou_2020","poppct_rural_2010", "poppct_rur_2020","_merge"])

# rename columns for consistency
pop_2010_2020 = pop_2010_2020.rename(columns={
    "pop_rural_2010":"pop_rur_2010",
    "countyname_2010":"county"
})
pop_2010_2020.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 58 entries, 0 to 57
Data columns (total 3 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   county        58 non-null     object
 1   pop_rur_2010  58 non-null     int64 
 2   pop_rur_2020  58 non-null     int64 
dtypes: int64(2), object(1)
memory usage: 1.8+ KB


### Read in and attach CT district numbers

In [6]:
county_table = "mart_transit_database.dim_county_geography"

county_query = f"""
SELECT
  DISTINCT name, caltrans_district
FROM
  {county_table}
WHERE
  _is_current is TRUE
LIMIT
  1000
"""

county_district = query_sql(county_query, as_df=True)

In [7]:
pop_2010_2020 = pop_2010_2020.merge(
    county_district,
    left_on = "county",
    right_on = "name",
    how = "left",
    # indicator = True # all `both`
).drop(columns=["name"])

pop_2010_2020.head()

Unnamed: 0,county,pop_rur_2010,pop_rur_2020,caltrans_district
0,Alameda,5869,8253,4
1,Alpine,1175,1204,10
2,Amador,23016,28020,10
3,Butte,41584,44478,3
4,Calaveras,34370,37128,10


### consolidate specific counties to MTC and SACOG

In [8]:
mtc = [
    "Sonoma", 
    "Napa", 
    "Solano", 
    "Contra Costa", 
    "Alameda", 
    "Marin", 
    "San Francisco", 
    "San Mateo", 
    "Santa Clara"
]

sacog = [
    "Yuba", 
    "Sutter", 
    "Yolo", 
    "Sacramento"
]

# add MTC row
mtc_row = pop_2010_2020[pop_2010_2020["county"].isin(mtc)][["pop_rur_2010","pop_rur_2020"]].sum()

mtc_row["county"] = "MTC"
mtc_row["caltrans_district"] = 4

pop_2010_2020 = pd.concat([
    pop_2010_2020,
    pd.DataFrame([mtc_row])],
    ignore_index=True
)

# add SACOG row
sacog_row = pop_2010_2020[pop_2010_2020["county"].isin(sacog)][["pop_rur_2010","pop_rur_2020"]].sum()

sacog_row["county"] = "SACOG"
sacog_row["caltrans_district"] = 3

pop_2010_2020 = pd.concat([
    pop_2010_2020,
    pd.DataFrame([sacog_row])],
    ignore_index=True
)

# remove individual mtc and sacog counties
pop_2010_2020 = pop_2010_2020[(~pop_2010_2020["county"].isin(mtc))
     &(~pop_2010_2020["county"].isin(sacog))
]

In [9]:
pop_2010_2020.tail()

Unnamed: 0,county,pop_rur_2010,pop_rur_2020,caltrans_district
53,Tulare,68449,94889,6
54,Tuolumne,27110,26607,10
55,Ventura,25725,28411,7
58,MTC,156971,181435,4
59,SACOG,76115,94530,3


## Applying OTGC formula
- via apportionment worksheet

apportionment = (county rural population/ total net population) * total apportionment available to counties - 197.27


In [10]:
# https://www.transit.dot.gov/funding/apportionments/table-9-fy-2025-section-5311-and-section-5340-rural-area-formula-0
ca_total_5311_apportionment = 43540762 

# total after removing (admin, min county, 5311f stuff)
total_available_to_counties = 32559572 

# list of min counties 
excluded_county_pop = ["Alpine", "Orange", "Sierra"]

# calculate net population
net_pop_2010 = pop_2010_2020["pop_rur_2010"].sum()-(pop_2010_2020[pop_2010_2020["county"].isin(excluded_county_pop)]["pop_rur_2010"].sum())
net_pop_2020 = pop_2010_2020["pop_rur_2020"].sum()-(pop_2010_2020[pop_2010_2020["county"].isin(excluded_county_pop)]["pop_rur_2020"].sum())

# function to replicate formula
def otgc_formula(
    rural_pop: int,
    total_net_pop: int,
    total_county_funds:int
):
    return ((rural_pop/total_net_pop)*total_county_funds)-197.27

# calculate 2010 apportionment
pop_2010_2020["apportionment_calc_2010"] = pop_2010_2020.apply(
    lambda row: otgc_formula(
        rural_pop = row["pop_rur_2010"],
        total_net_pop = net_pop_2010,
        total_county_funds = total_available_to_counties
    ), axis=1
).round(2)

# calculate 2020 apportionment
pop_2010_2020["apportionment_calc_2020"] = pop_2010_2020.apply(
    lambda row: otgc_formula(
        rural_pop = row["pop_rur_2020"],
        total_net_pop = net_pop_2020,
        total_county_funds = total_available_to_counties
    ), axis=1
).round(2)

## manual overwrite for orange, sierra, alpine
per instructions on apportionment worksheet

In [11]:
min_county_edit = {
    "Alpine":48000.00,
    "Orange":0,
    "Sierra":48000.00
}

# loop + .loc to update values at specific locations
for county, amount in min_county_edit.items():
    pop_2010_2020.loc[pop_2010_2020["county"] == county, "apportionment_calc_2020"] = amount
    pop_2010_2020.loc[pop_2010_2020["county"] == county, "apportionment_calc_2010"] = amount

## re-order columns

In [12]:
pop_2010_2020 = pop_2010_2020[[
    "county",
    "caltrans_district",
    "pop_rur_2010",
    "apportionment_calc_2010",
    "pop_rur_2020",
    "apportionment_calc_2020"
]]

# final cleaned dataframe
pop_2010_2020.columns

Index(['county', 'caltrans_district', 'pop_rur_2010',
       'apportionment_calc_2010', 'pop_rur_2020', 'apportionment_calc_2020'],
      dtype='object')

## Compare against DLA apportionment

In [13]:
dla_fy25 = pd.read_csv("gs://calitp-analytics-data/data-analyses/5311 /fy25_apportionment_dla.csv")

dla_fy25 = dla_fy25.rename(columns={
    'County/Region':'county',
    'ID':'id',
    'Population (9)': 'pop_rur_2025_dla',
    'FFY25 Apportionment': 'apportionment_dla_2025'
})

display(dla_fy25.info())

compare_merge = pop_2010_2020.merge(
    dla_fy25,
    left_on = "county",
    right_on = "county",
    how = "inner",
    indicator = True
)

display(compare_merge.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47 entries, 0 to 46
Data columns (total 4 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   county                  47 non-null     object 
 1   id                      45 non-null     float64
 2   pop_rur_2025_dla        47 non-null     int64  
 3   apportionment_dla_2025  47 non-null     float64
dtypes: float64(2), int64(1), object(1)
memory usage: 1.6+ KB


None

<class 'pandas.core.frame.DataFrame'>
Int64Index: 47 entries, 0 to 46
Data columns (total 10 columns):
 #   Column                   Non-Null Count  Dtype   
---  ------                   --------------  -----   
 0   county                   47 non-null     object  
 1   caltrans_district        47 non-null     int64   
 2   pop_rur_2010             47 non-null     int64   
 3   apportionment_calc_2010  47 non-null     float64 
 4   pop_rur_2020             47 non-null     int64   
 5   apportionment_calc_2020  47 non-null     float64 
 6   id                       45 non-null     float64 
 7   pop_rur_2025_dla         47 non-null     int64   
 8   apportionment_dla_2025   47 non-null     float64 
 9   _merge                   47 non-null     category
dtypes: category(1), float64(4), int64(4), object(1)
memory usage: 3.8+ KB


None

In [14]:
# calc new columns that display diffferences 
compare_merge["2020_fy25_amt_diff"] = compare_merge["apportionment_dla_2025"] - compare_merge["apportionment_calc_2020"]
compare_merge["2020_fy25_pop_diff"] = compare_merge["pop_rur_2025_dla"] - compare_merge["pop_rur_2020"]

#calc new columns that display % difference from 2020 census and ffy25 pop
compare_merge["2020_fy25_amt_%_diff"] = ((compare_merge["apportionment_dla_2025"] - compare_merge["apportionment_calc_2020"])/compare_merge["apportionment_calc_2020"]).round(2)*100
compare_merge["2020_fy25_pop_%_diff"] = ((compare_merge["pop_rur_2025_dla"] - compare_merge["pop_rur_2020"])/compare_merge["pop_rur_2020"]).round(2)*100

display(
    compare_merge[[
        "2020_fy25_amt_diff",
        "2020_fy25_pop_diff",
        "2020_fy25_amt_%_diff",
        "2020_fy25_pop_%_diff"
    ]].describe(),
    compare_merge.head()
)

Unnamed: 0,2020_fy25_amt_diff,2020_fy25_pop_diff,2020_fy25_amt_%_diff,2020_fy25_pop_%_diff
count,47.0,47.0,46.0,47.0
mean,184.635532,41175.212766,12.152174,102.382979
std,277530.592009,41980.759419,43.741141,83.932052
min,-996202.87,-7699.0,-51.0,-9.0
25%,-99623.91,13275.5,-19.5,40.0
50%,14929.49,30210.0,2.5,89.0
75%,106611.35,54716.5,36.0,145.0
max,647155.09,180305.0,158.0,379.0


Unnamed: 0,county,caltrans_district,pop_rur_2010,apportionment_calc_2010,pop_rur_2020,apportionment_calc_2020,id,pop_rur_2025_dla,apportionment_dla_2025,_merge,2020_fy25_amt_diff,2020_fy25_pop_diff,2020_fy25_amt_%_diff,2020_fy25_pop_%_diff
0,Alpine,10,1175,48000.0,1204,48000.0,1.0,1119,48000.0,both,0.0,-85,0.0,-7.0
1,Amador,10,23016,400197.85,28020,402349.01,2.0,46118,357467.0,both,-44882.01,18098,-11.0,65.0
2,Butte,3,41584,723213.86,44478,638791.07,3.0,136143,1055644.0,both,416852.93,91665,65.0,206.0
3,Calaveras,10,34370,597716.36,37128,533198.1,4.0,50990,395251.0,both,-137947.1,13862,-26.0,37.0
4,Colusa,3,6795,118011.14,9326,133783.69,5.0,27483,212945.0,both,79161.31,18157,59.0,195.0


In [15]:
compare_merge.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 47 entries, 0 to 46
Data columns (total 14 columns):
 #   Column                   Non-Null Count  Dtype   
---  ------                   --------------  -----   
 0   county                   47 non-null     object  
 1   caltrans_district        47 non-null     int64   
 2   pop_rur_2010             47 non-null     int64   
 3   apportionment_calc_2010  47 non-null     float64 
 4   pop_rur_2020             47 non-null     int64   
 5   apportionment_calc_2020  47 non-null     float64 
 6   id                       45 non-null     float64 
 7   pop_rur_2025_dla         47 non-null     int64   
 8   apportionment_dla_2025   47 non-null     float64 
 9   _merge                   47 non-null     category
 10  2020_fy25_amt_diff       47 non-null     float64 
 11  2020_fy25_pop_diff       47 non-null     int64   
 12  2020_fy25_amt_%_diff     46 non-null     float64 
 13  2020_fy25_pop_%_diff     47 non-null     float64 
dtypes: category(

## Visuals

### melt dataframe

In [24]:
melt = compare_merge.melt(
    id_vars = ["county","caltrans_district"],
    value_vars = [
        "pop_rur_2010",
        "apportionment_calc_2010",
        "pop_rur_2020",
        "apportionment_calc_2020",
        'pop_rur_2025_dla',
        'apportionment_dla_2025',
        '2020_fy25_amt_diff',
        '2020_fy25_pop_diff',
        "2020_fy25_amt_%_diff",
        "2020_fy25_pop_%_diff"
    ],
    value_name = "metric"
)

display(
    melt.sort_values(by="county").head(),
    melt["variable"].value_counts()
)

Unnamed: 0,county,caltrans_district,variable,metric
0,Alpine,10,pop_rur_2010,1175.0
47,Alpine,10,apportionment_calc_2010,48000.0
188,Alpine,10,pop_rur_2025_dla,1119.0
376,Alpine,10,2020_fy25_amt_%_diff,0.0
235,Alpine,10,apportionment_dla_2025,48000.0


pop_rur_2010               47
apportionment_calc_2010    47
pop_rur_2020               47
apportionment_calc_2020    47
pop_rur_2025_dla           47
apportionment_dla_2025     47
2020_fy25_amt_diff         47
2020_fy25_pop_diff         47
2020_fy25_amt_%_diff       47
2020_fy25_pop_%_diff       47
Name: variable, dtype: int64

### 5311 Apportionments per County ($)

In [86]:
apportionment_cols =[
    "apportionment_calc_2010",
    "apportionment_calc_2020",
    "apportionment_dla_2025",
    # "2020_fy25_amt_diff"
]

alt.Chart(melt[melt["variable"].isin(apportionment_cols)]).mark_bar().encode(
    x = "county:N",
    y = "metric:Q",
    xOffset="variable:N",
    color = "variable:N",
    tooltip = ["county","variable","metric"]
).properties(
    width="container",
    title= "5311 Apportionments per County ($), per District"
)

In [87]:
for district in melt["caltrans_district"].unique():
    amt_county_district = alt.Chart(melt[
        (melt["variable"].isin(apportionment_cols)) &
        (melt["caltrans_district"]==district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        xOffset="variable:N",
        row = "caltrans_district",
        color = "variable:N",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 600,
        title= "Apportionment Populations per County by District"
    )
    
    display(amt_county_district)

### Rural Populations per County

In [88]:
pop_cols=[
    "pop_rur_2010",
    "pop_rur_2020",
    "pop_rur_2025_dla",
    # "2020_fy25_pop_diff"
]
alt.Chart(melt[
        melt["variable"].isin(pop_cols)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        xOffset="variable:N",
        color = "variable:N",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= "container",
        title= "Rural Populations per County"
    )

In [89]:
for district in melt["caltrans_district"].unique():
    pop_county_district = alt.Chart(melt[
        (melt["variable"].isin(pop_cols)) &
        (melt["caltrans_district"]==district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        xOffset="variable:N",
        row = "caltrans_district",
        color = "variable:N",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 600,
        title= "Rural Populations per County by Districts"
    )
    
    display(pop_county_district)

### Apportionment Differences per county
Difference from calculated 2020 to stated FY2025 apportionment

In [90]:
apportionment_cols

['apportionment_calc_2010',
 'apportionment_calc_2020',
 'apportionment_dla_2025']

In [91]:
for district in melt["caltrans_district"].unique(): 
    amt_diff = alt.Chart(melt[
        # (melt["variable"]=="2020_fy25_amt_diff")
        (melt["variable"].isin(["apportionment_calc_2020","apportionment_dla_2025"]))
        & (melt["caltrans_district"]== district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        color = "variable:N",
        xOffset="variable:N",
        row = "caltrans_district",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 500,
        title= "Difference in Apportionments per County. Apportionment calculated using 2020 census compared to stated FFY25 apportionments"
    )

    amt_pct_diff = alt.Chart(melt[
        (melt["variable"]=="2020_fy25_amt_%_diff")
        & (melt["caltrans_district"]== district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        color = "variable:N",
        row = "caltrans_district",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 500,
        title= "% diffference in Apportionments per County"
    )

    display(
        alt.hconcat(amt_diff, amt_pct_diff),
    )

### Population Differences per county
Difference from 2020 rural county census to stated FY2025 population.

As stated above:
>Previous Grant Manger used a combination of population and demographic information she received from DOTP along with these FTA maps to determine the rural population in each of the CA regions
>
>We previously had discussed updating this formula to match how the FTA funds have been apportioned to California, but that conversation was put on hold for a much deeper discussion on the formula.

In [65]:
pop_cols

['pop_rur_2010', 'pop_rur_2020', 'pop_rur_2025_dla']

In [93]:
diff_cols=[
    "2020_fy25_amt_diff",
    "2020_fy25_pop_diff",
]
for district in melt["caltrans_district"].unique():
    pop_diff = alt.Chart(melt[
            (melt["variable"].isin(["pop_rur_2020","pop_rur_2025_dla"])) &
            (melt["caltrans_district"]==district)
        ]).mark_bar().encode(
            x = "county:N",
            y = "metric:Q",
            xOffset="variable:N",
            row = "caltrans_district",
            color = "variable:N",
            tooltip = ["county","variable","metric"]
        ).properties(
            width= 600,
            title= "Rural Populations per County in Districts"
        )

    pop_pct_diff = alt.Chart(melt[
        (melt["variable"]=="2020_fy25_pop_%_diff")
        & (melt["caltrans_district"]== district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        color = "variable:N",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 600,
        title= "% Difference in Rural Populations per County"
    )

    display(
        alt.hconcat(pop_diff,pop_pct_diff),
    )

### DEPRECATED! updated charts above might have solved this
~~5311 Apportionments ($) and Rural Populations per County~~
Chart to compare 2020 to 2025 apportionment and population data and their differences, for each county

In [96]:
for district in melt["caltrans_district"].unique(): 
    amt_diff = alt.Chart(melt[
        # (melt["variable"]=="2020_fy25_amt_diff")
        (melt["variable"].isin(["apportionment_calc_2020","apportionment_dla_2025"]))
        & (melt["caltrans_district"]== district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        color = "variable:N",
        xOffset="variable:N",
        row = "caltrans_district",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 500,
        title= "Difference in Apportionments per County. Apportionment calculated using 2020 census compared to stated FFY25 apportionments"
    )

    amt_pct_diff = alt.Chart(melt[
        (melt["variable"]=="2020_fy25_amt_%_diff")
        & (melt["caltrans_district"]== district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        color = "variable:N",
        row = "caltrans_district",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 500,
        title= "% diffference in Apportionments per County"
    )

    display(
        alt.hconcat(amt_diff, amt_pct_diff),
    )
    
    pop_diff = alt.Chart(melt[
            (melt["variable"].isin(["pop_rur_2020","pop_rur_2025_dla"])) &
            (melt["caltrans_district"]==district)
        ]).mark_bar().encode(
            x = "county:N",
            y = "metric:Q",
            xOffset="variable:N",
            row = "caltrans_district",
            color = "variable:N",
            tooltip = ["county","variable","metric"]
        ).properties(
            width= 600,
            title= "Rural Populations per County in Districts"
        )

    pop_pct_diff = alt.Chart(melt[
        (melt["variable"]=="2020_fy25_pop_%_diff")
        & (melt["caltrans_district"]== district)
    ]).mark_bar().encode(
        x = "county:N",
        y = "metric:Q",
        color = "variable:N",
        tooltip = ["county","variable","metric"]
    ).properties(
        width= 600,
        title= "% Difference in Rural Populations per County"
    )

    display(
        alt.hconcat(pop_diff,pop_pct_diff),
    )

In [None]:


# app_chart = alt.Chart(melt[melt["variable"].isin(
#     [
#         "apportionment_calc_2020",
#         "apportionment_dla_2025",
#         "2020_fy25_amt_diff",
#         # "2020_fy25_amt_%_diff" # too small for chart
#     ]
# )]).mark_bar().encode(
#     x = alt.X("variable"),
#     y = alt.Y("metric"),
#     row = "county",
#     tooltip=["county","variable","metric"],
#     color = "variable",
# ).properties(title = "apportionments",width=300)#.resolve_scale(y="independent")

# pop_chart = alt.Chart(melt[melt["variable"].isin([
#     "pop_rur_2020",
#     "pop_rur_2025_dla",
#     "2020_fy25_pop_diff",
#     # "2020_fy25_pop_%_diff" # too small for chart
# ])]).mark_bar().encode(
#     x = alt.X("variable"),
#     y = alt.Y("metric"),
#     row = "county",
#     tooltip=["county","variable","metric"],
#     color = "variable",
# ).properties(title = "populations",width=300)#.resolve_scale(y="independent")

# # diff_chart = alt.Chart(melt[melt["variable"].isin(diff_cols)]).mark_bar().encode(
# #     x = alt.X("variable"),
# #     y = alt.Y("metric"),
# #     row = "county",
# #     tooltip=["county","variable","metric"],
# #     color = "variable"
# # ).resolve_scale(y="independent").properties(title = "differences",width=300)

# app_pop_chart = alt.hconcat(
#     app_chart, 
#     pop_chart, 
#     # diff_chart
# )


# print(f"""
# County with the largest population difference, from 2020 rural census to DLA 2025 population:
# {melt[melt["variable"]=="2020_fy25_pop_diff"].loc[[melt[melt["variable"]=="2020_fy25_pop_diff"]["metric"].abs().idxmax()]]}

# County with the largest apportionment difference, from calculated 2020 apportionment to DLA 2025 apportionment:
# {melt[melt["variable"]=="2020_fy25_amt_diff"].loc[[melt[melt["variable"]=="2020_fy25_amt_diff"]["metric"].abs().idxmax()]]}

# """)
# app_pop_chart

### Full Data Table

In [22]:
compare_merge.drop(columns=["id","_merge"])

Unnamed: 0,county,caltrans_district,pop_rur_2010,apportionment_calc_2010,pop_rur_2020,apportionment_calc_2020,pop_rur_2025_dla,apportionment_dla_2025,2020_fy25_amt_diff,2020_fy25_pop_diff,2020_fy25_amt_%_diff,2020_fy25_pop_%_diff
0,Alpine,10,1175,48000.0,1204,48000.0,1119,48000.0,0.0,-85,0.0,-7.0
1,Amador,10,23016,400197.85,28020,402349.01,46118,357467.0,-44882.01,18098,-11.0,65.0
2,Butte,3,41584,723213.86,44478,638791.07,136143,1055644.0,416852.93,91665,65.0,206.0
3,Calaveras,10,34370,597716.36,37128,533198.1,50990,395251.0,-137947.1,13862,-26.0,37.0
4,Colusa,3,6795,118011.14,9326,133783.69,27483,212945.0,79161.31,18157,59.0,195.0
5,Del Norte,1,9634,167399.47,12123,173966.49,33612,260478.0,86511.51,21489,50.0,177.0
6,El Dorado,3,62827,1092765.19,70112,1007059.14,100322,777840.0,-229219.14,30210,-23.0,43.0
7,Fresno,6,100537,1748782.73,113625,1632183.91,293930,2279339.0,647155.09,180305,40.0,159.0
8,Glenn,3,11494,199756.74,11917,171007.01,34561,267838.0,96830.99,22644,57.0,190.0
9,Humboldt,1,40062,696736.56,43033,618031.64,136463,1058125.0,440093.36,93430,71.0,217.0
