In [1]:
import pandas as pd

In [76]:
# first, let's deal with all of the employed, unemployed columns
def pivot_year(df_to_pivot, column): # this function combines the year and column, putting the year first for sorting order
    to_return = df_to_pivot.pivot(index="FIPS_Code",columns="Year",values=column)
    to_return.columns = to_return.columns.astype("str")+"_"+column
    return to_return
def flip_year(name):
    parts = name.split("_")
    return "_".join(parts[1:])+"_"+parts[0]
    
df = pd.read_csv("data/county_unemployment.csv",dtype={"FIPS_Code":"str"})
df["Civilian_labor_force"] = df.Employed + df.Unemployed
df["Unemployment_rate"] = df.Unemployed/df.Civilian_labor_force
df_buildup = pd.concat([pivot_year(df,"Employed"),pivot_year(df,"Unemployed"),pivot_year(df,"Civilian_labor_force"),pivot_year(df,"Unemployment_rate")],axis=1)
# fixing the sorting order is kind of tricky.  You'll notice that the pivot year puts the year first
df_buildup = df_buildup[sorted(df_buildup.columns)] # so we sort the column names
df_buildup.columns = [flip_year(x) for x in df_buildup.columns] # and then flip them around for the original order
# next, we'll add back the Median Household income as percentage of state total
# we have to get the state total first
df_buildup = pd.concat([df.groupby("FIPS_Code")[["State","Area_Name","Rural_Urban_Continuum_Code_2013","Urban_Influence_Code_2013","Metro_2013"]].first(),
                        df_buildup,
                        df.groupby("FIPS_Code")[["Median_Household_Income_2021"]].first()
                        ],axis=1).reset_index()
# recovering the Median Household income is not entirely possible, but we can try to get close
# Start by computing state values for the labor force in 2021
state_labor_force = df_buildup.groupby("State")["Civilian_labor_force_2021"].sum()
# Now we can add a column that's the percentage of the state labor force
df_buildup["percent_of_state_labor_force"] = df_buildup.apply(lambda x: x["Civilian_labor_force_2021"]/state_labor_force[x["State"]],axis=1)
# Now we can multiply that column by the median household income, and sum the counties within each state to get the weighted average
df_buildup["county_weighted_mhhi"] = df_buildup["percent_of_state_labor_force"] * df_buildup["Median_Household_Income_2021"]
state_mhi_2021 = df_buildup.groupby("State")["county_weighted_mhhi"].sum()
# Lastly, compute the percentage of the state
df_buildup["Med_HH_Income_Percent_of_State_Total_2021"] = df_buildup.apply(lambda x: x["Median_Household_Income_2021"]/state_mhi_2021[x["State"]],axis=1)
df_buildup["State_FIPS"] = df_buildup["FIPS_Code"].apply(lambda x: x[0:2]+"000")
# Now we can add in the county and state level information
state_level = pd.concat([df_buildup.groupby("State").sum(),df_buildup.groupby("State")["State_FIPS"].first()],axis=1).reset_index()
state_level = state_level.drop(columns=["Rural_Urban_Continuum_Code_2013","Urban_Influence_Code_2013","Metro_2013"])
state_level.rename(columns={"State_FIPS":"FIPS_Code"},inplace=True)
combined = pd.concat([df_buildup,state_level])
# fix the area name
combined["Area_Name"] = combined.apply(lambda x: x["Area_Name"] if not pd.isna(x["Area_Name"]) else x["State"],axis=1)
# add in the country level data
country_level = state_level.sum()
country_level.FIPS_Code = "00000"
country_level.State = "US"
country_level["Area_Name"]="United States"
country_level = pd.DataFrame(country_level).T
combined = pd.concat([combined,country_level],axis=0).sort_values("FIPS_Code")
combined.drop(columns=["State_FIPS","percent_of_state_labor_force","county_weighted_mhhi"],inplace=True)
combined # this is pretty close...I give up (should also fix the median household income columns).

Unnamed: 0,FIPS_Code,State,Area_Name,Rural_Urban_Continuum_Code_2013,Urban_Influence_Code_2013,Metro_2013,Civilian_labor_force_2000,Employed_2000,Unemployed_2000,Unemployment_rate_2000,...,Civilian_labor_force_2021,Employed_2021,Unemployed_2021,Unemployment_rate_2021,Civilian_labor_force_2022,Employed_2022,Unemployed_2022,Unemployment_rate_2022,Median_Household_Income_2021,Med_HH_Income_Percent_of_State_Total_2021
0,00000,US,United States,,,,142279084.0,136601138.0,5677946.0,136.834665,...,161847646.0,153188350.0,8659296.0,145.975994,164388718.0,158392355.0,5996363.0,112.762234,184954223.0,2702.308778
1,01000,AL,AL,,,,2147180.0,2047733.0,99447.0,3.732976,...,2259355.0,2183333.0,76022.0,2.549921,2286036.0,2226673.0,59363.0,1.995848,3214393.0,56.970229
0,01001,AL,Autauga,2.0,2.0,1.0,21861.0,20971.0,890.0,0.040712,...,26545.0,25809.0,736.0,0.027727,26789.0,26181.0,608.0,0.022696,66444.0,1.177619
1,01003,AL,Baldwin,3.0,2.0,1.0,69979.0,67370.0,2609.0,0.037283,...,99953.0,97034.0,2919.0,0.029204,102849.0,100432.0,2417.0,0.0235,65658.0,1.163688
2,01005,AL,Barbour,6.0,6.0,0.0,11449.0,10812.0,637.0,0.055638,...,8280.0,7821.0,459.0,0.055435,8241.0,7906.0,335.0,0.04065,38649.0,0.684995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3134,56037,WY,Sweetwater,5.0,8.0,0.0,20623.0,19822.0,801.0,0.03884,...,19354.0,18243.0,1111.0,0.057404,19507.0,18696.0,811.0,0.041575,74677.0,1.1056
3135,56039,WY,Teton,7.0,8.0,0.0,14126.0,13791.0,335.0,0.023715,...,15906.0,15375.0,531.0,0.033384,16647.0,16193.0,454.0,0.027272,102709.0,1.520617
3136,56041,WY,Uinta,7.0,8.0,0.0,10414.0,10004.0,410.0,0.03937,...,8812.0,8355.0,457.0,0.051861,8863.0,8524.0,339.0,0.038249,70162.0,1.038755
3137,56043,WY,Washakie,7.0,11.0,0.0,4287.0,4093.0,194.0,0.045253,...,3939.0,3776.0,163.0,0.041381,3858.0,3704.0,154.0,0.039917,62176.0,0.920522
