In [8]:
import pandas as pd
import matplotlib.pyplot as plt
from pandas.api.types import is_numeric_dtype
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import datetime
from homebrewedFunctions.functions import *
all_keys = pd.read_csv("StateGovFinances2005to2021.csv", index_col = ["State", "Year", "Format"], low_memory =False).sort_index().keys()
all_keys = [k for k in all_keys if "NAN" not in k]
keys_dict = {"Revenue": all_keys[:52],
            "Expenditure": [k for k in all_keys[52:121] if "CAPITAL OUTLAY" not in k],
            "Debt": [k for k in all_keys[121:] if "CAPITAL OUTLAY" not in k]}
            

stack_dfs = {"Expenditure": pd.read_csv("StateGovFinances2005to2021.csv", index_col = ["State", "Year", "Format"], usecols = ["State", "Year", "Format", "1GENERAL REVENUE", "1DEBT OUTSTANDING"] + keys_dict["Expenditure"], low_memory =False).sort_index(),
             "Revenue":pd.read_csv("StateGovFinances2005to2021.csv", index_col = ["State", "Year", "Format"], usecols = ["State", "Year", "Format", "1EXPENDITURE", "1DEBT OUTSTANDING"] + keys_dict["Revenue"], low_memory =False).sort_index(),
             "Debt":pd.read_csv("StateGovFinances2005to2021.csv", index_col = ["State", "Year", "Format"], usecols = ["State", "Year", "Format", "1EXPENDITURE", "1GENERAL REVENUE"] + keys_dict["Debt"], low_memory =False).sort_index()}

panel_dfs_dict = {}
for rev_exp in stack_dfs.keys():
    panel_dfs_dict[rev_exp] = {}
    for k in stack_dfs[rev_exp].index.get_level_values(2).unique():
        panel_dfs_dict[rev_exp][k] = stack_dfs[rev_exp][stack_dfs[rev_exp].index.get_level_values(2)==k].reset_index().set_index(["State","Year"]).sort_index()
        del panel_dfs_dict[rev_exp][k]["Format"]
    panel_dfs_dict[rev_exp] = {k:panel_dfs_dict[rev_exp][k] for k in ["Local government amount",'State & local government amount', 'State government amount']}

for key, dct in panel_dfs_dict.items():
    for k, df in dct.items():
        for col in df.columns:
            if is_numeric_dtype(df[col]):
                df[col] = df[col].fillna(0)

            else:
                try:
                    df[col] = pd.to_numeric(df[col], errors = "coerce").fillna(0).astype(int)
                except:
                    pass
        if key == "Revenue":
            df["1TOTAL INCOME"] = df["1INDIVIDUAL INCOME"].add(df["1CORPORATE INCOME"])
            df["1PROPERTY AND SPECIAL ASSESSMENTS"] = df[["1PROPERTY", "1SPECIAL ASSESSMENTS"]].sum(axis = 1)
            df["1DEFICIT"] = df["1EXPENDITURE"].sub(df["1GENERAL REVENUE"])
            
        # warning indicates that copying dataframe will defragment it;
        #  not sure if this actually fixes the problem
        df = df.copy()                
cpi_code = {"CPI":"CPIAUCSL"}
start = datetime.datetime(1947,1,1)
end = datetime.datetime.now()
cpi = gather_data(cpi_code, start, end, freq = "A").reset_index().rename(columns = {"DATE": "Year"})
cpi["Year"] = pd.to_datetime(cpi["Year"].astype(str).str[:4], format = "%Y")
cpi["Year"] = cpi["Year"].astype(str)
cpi.set_index("Year", inplace = True)
cpi["CPI"] = cpi["CPI"].div(cpi["CPI"].iloc[-2]).astype(float)
efnagdp = pd.read_csv("EFNAGDPTaxes.csv", parse_dates = ["Year"]).set_index(["State", "Year"]).sort_index()
map_keys = ["GENERAL REVENUE","GENERAL REVENUE FROM OWN SOURCES", "EXPENDITURE", "DEFICIT", 
        "FROM FEDERAL GOVERNMENT", "FROM STATE GOVERNMENT", "FROM LOCAL GOVERNMENTS" ,
        "TAXES", "OTHER TAXES", "CURRENT CHARGES", "OTHER CHARGES", 'INDIVIDUAL INCOME', 'CORPORATE INCOME', #'TOTAL INCOME',
        "PROPERTY", "SPECIAL ASSESSMENTS"]

# keys = ["GENERAL REVENUE","GENERAL REVENUE FROM OWN SOURCES", "EXPENDITURE", "DEFICIT", 
#         "FROM FEDERAL GOVERNMENT", "FROM STATE GOVERNMENT", "FROM LOCAL GOVERNMENTS" ,
#         "TAXES", "OTHER TAXES", "CURRENT CHARGES", "OTHER CHARGES", 'INDIVIDUAL INCOME', 'CORPORATE INCOME', 'TOTAL INCOME',
#         "PROPERTY", "SPECIAL ASSESSMENTS", "PROPERTY AND SPECIAL ASSESSMENTS",  
#         "SALES AND GROSS RECEIPTS", "GENERAL SALES", "MOTOR FUEL", "GAS SUPPLY", "EDUCATION", "HIGHER EDUCATION", "PUBLIC WELFARE"]	


def set_plot_dfs(panel_dfs_dict, keys, efnagdp, cpi):
    # keys = ["1" + k for k in keys]
    plot_dfs = {}
    for key in panel_dfs_dict.keys():
        plot_dfs[key] = {}
        plot_dfs[key]["Level"] = panel_dfs_dict[key].mul(10**3).copy()
        plot_dfs[key]["Level"].rename(columns = {k:k.replace("1", "").title() for k in plot_dfs[key]["Level"].keys()}, inplace = True)
    #     plot_dfs[key]["Level"]["Total Income"] = plot_dfs[key]["Level"]["Individual Income"].add(plot_dfs[key]["Level"]["Corporate Income"])
    #     plot_dfs[key]["Level"]["Property and Special Assessments"] = plot_dfs[key]["Level"][["Property", "Special Assessments"]].sum(axis = 1)
    #     plot_dfs[key]["Level"]["Deficit"] = plot_dfs[key]["Level"]["Expenditure"].sub(plot_dfs[key]["Level"]["General Revenue"])
        plot_dfs[key]["Level"]["GDP"] = efnagdp["GDP"]
        plot_dfs[key]["Real Level"] = plot_dfs[key]["Level"].div(cpi["CPI"], level = "Year", axis = 0)
        plot_dfs[key]["Level"]["Population"] = efnagdp["Population"]
        plot_dfs[key]["Percent of General Revenue"] = plot_dfs[key]["Level"].apply(lambda x: pd.to_numeric(x).div(plot_dfs[key]["Level"]["General Revenue"]).mul(100))    
        plot_dfs[key]["Percent of GDP"] = plot_dfs[key]["Level"].apply(lambda x: pd.to_numeric(x).div(plot_dfs[key]["Level"]["GDP"]).mul(100))
        
        plot_dfs[key]["Real Value Per Capita"] = plot_dfs[key]["Real Level"].apply(lambda x: pd.to_numeric(x).div(plot_dfs[key]["Level"]["Population"]))    
        for k in plot_dfs[key].keys():
            plot_dfs[key][k]["EFNA"] = efnagdp["EFNA"]
    return plot_dfs
plot_dfs = {}

for key in panel_dfs_dict.keys():
    plot_dfs[key] = set_plot_dfs(panel_dfs_dict[key], keys_dict[key], efnagdp, cpi)


import os
for revexp_key in plot_dfs.keys():
    for key in plot_dfs[revexp_key].keys():
        try:
            os.mkdir(f"outputs/{key}")
        except:
            pass

regions_df = pd.read_csv("USCensusRegions.csv")#.set_index("State")
# scatter_figs = {}
for revexp_key in plot_dfs.keys():
    for key, p_dfs in plot_dfs[revexp_key].items():
        for p_dfskey, df in p_dfs.items():
            map_figs = {}
            html_path = f"outputs/{key}/ScatterPlots{revexp_key}{key}{p_dfskey}.html"
            create_scatter_dropdown(df, regions_df=regions_df,
                                    filename = html_path, 
                                    show_fig = False)
            for name in df.keys():
                map_figs[name] = create_map(df.reset_index(), name, time_name = "Year")
            combined_map_fig = combine_map_figs(map_figs)
            html_path = f"outputs/{key}/MapPlotsByVariableAndYear{revexp_key}{key}{p_dfskey}.html"
            combined_map_fig.write_html(html_path)
regions_df = pd.read_csv("USCensusRegions.csv")#.set_index("State")

for revexp_key in plot_dfs.keys():
    for key, p_dfs in plot_dfs[revexp_key].items():
        # figs = {k: line_dropdown(dataframe, regions_df) for k, dataframe in p_dfs.items()}
        # fig = dict_of_line_figs_to_dropdown_fig(figs, show_fig = False, use_sliders = True)
        filename = f"outputs/{key}/LinePlotsStateFinances{revexp_key}{key}.html"
        fig = line_dropdown(p_dfs, regions_df)
        fig.write_html(filename, config=dict(displayModeBar=True))


In [9]:
### NEED TO IDENTIFY SUBCOMPONENTS OF EXPENDITURES

start_year = 2005
areas = {"Revenue":{"Revenue Source by Government" : ["General Revenue From Own Sources", "From Federal Government", "From State Government", "From Local Governments"],
         "Taxes": ["Intergovernmental Revenue", 'Property', 'Sales And Gross Receipts', 
                   'Individual Income', 'Corporate Income', 'Motor Vehicle License', 'Other Taxes', 
                   'Current Charges', 'Interest Earnings', 'Special Assessments', 'Sale Of Property', 'Other General Revenue']},
        "Expenditure": {"Expenditures":[
            "Intergovernmental Expenditure", "Education", "Libraries", "Public Welfare", "Hospitals", 
            "Health", "Health", "Employment Security Administration", "Veterans' Services",
            "Highways", "Air Transportation (Airports)", "Parking Facilities", "Sea and Inland Port Facilities",
            "Police Protection", "Fire Protection", "Correction", "Protective Inspection And Regulation",
            "Natural Resources", "Parks and Recreation", "Housing and Community Development", "Sewarage",
            "Solid Waste Management", "Financial Administration","Judicial And Legal", "General Public Buildings",
            "Other Governmental Administration", "Interest On General Debt", "Miscellaneous Commercial Activities",
            "Other And Unallocable", "Utility Expenditure", "Liquor Stores Expenditure", 
            "Insurance Trust Expenditure"]},
            # "Debt": {""}
}
figs = {}
for revexp_key in plot_dfs.keys():
    for key, p_dfs in plot_dfs[revexp_key].items():
        figs[key] = {}
        for form, plot_df in p_dfs.items():# ["Level", "Real Level", "Percent of General Revenue", "Percent of GDP", "Real Value Per Capita"]:
            figs[key][form] = {}
            df = plot_df.copy()
            df = df.reset_index().melt(id_vars=["State","Year"],
                var_name="Name", 
                value_name="Value")
            df = df.set_index(["State", "Year"]).round(2)
            states = df.index.get_level_values("State").unique()

            for components_group, components in areas[key].items():


                figs[key][form][components_group] = {}

                for state in states:
                    plot_df = df.loc[state].reset_index()#.loc[start_year:].reset_index()
                    plot_df = plot_df[plot_df["Name"].isin(components)].dropna()
                    plot_df["Value"] = pd.to_numeric(plot_df["Value"])
                    title_key = f"{components_group} {key}<br>{form}<br>{state}"
                    px_fig = px.area(
                        plot_df, x="Year", y="Value", color="Name", title = title_key)

                    figs[key][form][components_group][state] = px_fig
                figs[key][form][components_group]  = dict_of_figs_to_dropdown_fig(figs[key][form][components_group], 
                                                                                show_fig = False,
                                                                                use_sliders = True)
                figs[key][form][components_group].write_html(f"outputs/{key}/AreaPlots{rev_exp_key}{key}{form}{components_group}Figs.html")

AttributeError: 'dict' object has no attribute 'reset_index'

In [4]:
for i, k in enumerate(all_keys):
    print(i, k)

0 1GENERAL REVENUE
1 1INTERGOVERNMENTAL REVENUE
2 1FROM FEDERAL GOVERNMENT
3 1FROM STATE GOVERNMENT
4 1FROM LOCAL GOVERNMENTS
5 1GENERAL REVENUE FROM OWN SOURCES
6 1TAXES
7 1PROPERTY
8 1SALES AND GROSS RECEIPTS
9 1GENERAL SALES
10 1SELECTIVE SALES
11 1MOTOR FUEL
12 1ALCOHOLIC BEVERAGE
13 1TOBACCO PRODUCTS
14 1PUBLIC UTILITIES
15 1OTHER SELECTIVE SALES
16 1INDIVIDUAL INCOME
17 1CORPORATE INCOME
18 1MOTOR VEHICLE LICENSE
19 1OTHER TAXES
20 1CHARGES AND MISCELLANEOUS GENERAL  REVENUE
21 1CURRENT CHARGES
22 1EDUCATION
23 1INSTITUTIONS  OF HIGHER EDUCATION
24 1SCHOOL LUNCH SALES (GROSS)
25 1HOSPITALS
26 1HIGHWAYS
27 1AIR TRANSPORTATION (AIRPORTS)
28 1PARKING FACILITIES
29 1SEA AND INLAND PORT FACILITIES
30 1NATURAL RESOURCES
31 1PARKS AND RECREATION
32 1HOUSING AND COMMUNITY DEVELOPMENT
33 1SEWERAGE
34 1SOLID WASTE MANAGEMENT
35 1OTHER CHARGES
36 1MISCELLANEOUS GENERAL REVENUE
37 1INTEREST EARNINGS
38 1SPECIAL ASSESSMENTS
39 1SALE OF PROPERTY
40 1OTHER GENERAL REVENUE
41 1UTILITY REVENUE
42

In [48]:
import pandas as pd
import matplotlib.pyplot as plt
from pandas.api.types import is_numeric_dtype
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots
import datetime
from homebrewedFunctions.functions import *
stack_dfs = pd.read_csv("StateGovFinances2005to2021.csv", index_col = ["State", "Year", "Format"], low_memory =False).sort_index()
panel_dfs_dict = {k:stack_dfs[stack_dfs.index.get_level_values(2)==k].reset_index().set_index(["State","Year"]).sort_index() for k in stack_dfs.index.get_level_values(2).unique()}
panel_dfs_dict = {k:panel_dfs_dict[k] for k in ["Local government amount",'State & local government amount', 'State government amount']}
# panel_dfs_dict["State government amount"][]


In [49]:
for key, df in panel_dfs_dict.items():
    for col in df.columns:
        if is_numeric_dtype(df[col]):
            df[col] = df[col].fillna(0)

        else:
            try:
                df[col] = pd.to_numeric(df[col], errors = "coerce").fillna(0).astype(int)
            except:
                pass
    df["1TOTAL INCOME"] = df["1INDIVIDUAL INCOME"].add(df["1CORPORATE INCOME"])
    df["1PROPERTY AND SPECIAL ASSESSMENTS"] = df[["1PROPERTY", "1SPECIAL ASSESSMENTS"]].sum(axis = 1)
    df["1DEFICIT"] = df["1EXPENDITURE"].sub(df["1GENERAL REVENUE"])
    # warning indicates that copying dataframe will defragment it;
    #  not sure if this actually fixes the problem
    df = df.copy()


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented frame, use `newframe = frame.copy()`


DataFrame is highly fragmented.  This is usually the result of calling `frame.insert` many times, which has poor performance.  Consider joining all columns at once using pd.concat(axis=1) instead. To get a de-fragmented fr

In [50]:
cpi_code = {"CPI":"CPIAUCSL"}
start = datetime.datetime(1947,1,1)
end = datetime.datetime.now()
cpi = gather_data(cpi_code, start, end, freq = "A").reset_index().rename(columns = {"DATE": "Year"})
cpi["Year"] = pd.to_datetime(cpi["Year"].astype(str).str[:4], format = "%Y")
cpi["Year"] = cpi["Year"].astype(str)
cpi.set_index("Year", inplace = True)
cpi["CPI"] = cpi["CPI"].div(cpi["CPI"].iloc[-2]).astype(float)
cpi


Unnamed: 0_level_0,CPI
Year,Unnamed: 1_level_1
1947-01-01,0.073290
1948-01-01,0.078913
1949-01-01,0.078140
1950-01-01,0.078971
1951-01-01,0.085242
...,...
2020-01-01,0.849509
2021-01-01,0.889285
2022-01-01,0.960354
2023-01-01,1.000000


In [51]:
efnagdp = pd.read_csv("EFNAGDPTaxes.csv", parse_dates = ["Year"]).set_index(["State", "Year"]).sort_index()
efnagdp[["EFNA", "GDP"]]



Unnamed: 0_level_0,Unnamed: 1_level_0,EFNA,GDP
State,Year,Unnamed: 2_level_1,Unnamed: 3_level_1
AK,1985-01-01,8.04,
AK,1986-01-01,,
AK,1987-01-01,,
AK,1988-01-01,,
AK,1989-01-01,,
...,...,...,...
WY,2019-01-01,8.12,3.997140e+10
WY,2020-01-01,7.97,3.667550e+10
WY,2021-01-01,,4.217620e+10
WY,2022-01-01,,4.908060e+10


In [52]:
map_keys = ["GENERAL REVENUE","GENERAL REVENUE FROM OWN SOURCES", "EXPENDITURE", "DEFICIT", 
        "FROM FEDERAL GOVERNMENT", "FROM STATE GOVERNMENT", "FROM LOCAL GOVERNMENTS" ,
        "TAXES", "OTHER TAXES", "CURRENT CHARGES", "OTHER CHARGES", 'INDIVIDUAL INCOME', 'CORPORATE INCOME', #'TOTAL INCOME',
        "PROPERTY", "SPECIAL ASSESSMENTS"]
        #"PROPERTY AND SPECIAL ASSESSMENTS",  
        # "SALES AND GROSS RECEIPTS", "GENERAL SALES", "MOTOR FUEL", "GAS SUPPLY", "EDUCATION", "HIGHER EDUCATION", "PUBLIC WELFARE"]	

# keys = ["GENERAL REVENUE","GENERAL REVENUE FROM OWN SOURCES", "EXPENDITURE", "DEFICIT", 
#         "FROM FEDERAL GOVERNMENT", "FROM STATE GOVERNMENT", "FROM LOCAL GOVERNMENTS" ,
#         "TAXES", "OTHER TAXES", 'PROPERTY', 'SPECIAL ASSESSMENTS', 'PROPERTY AND SPECIAL ASSESSMENTS', 'SALES AND GROSS RECEIPTS', 'GENERAL SALES', 'SELECTIVE SALES', 'MOTOR FUEL', 'ALCOHOLIC BEVERAGE', 
#         'TOBACCO PRODUCTS', 'PUBLIC UTILITIES', 'OTHER SELECTIVE SALES', 'INDIVIDUAL INCOME', 'CORPORATE INCOME', "TOTAL INCOME", 'MOTOR VEHICLE LICENSE', 
#         'OTHER TAXES', 'CHARGES AND MISCELLANEOUS GENERAL  REVENUE', 'CURRENT CHARGES', 'EDUCATION', 'INSTITUTIONS  OF HIGHER EDUCATION', 
#         'SCHOOL LUNCH SALES (GROSS)', 'HOSPITALS', 'HIGHWAYS', 'AIR TRANSPORTATION (AIRPORTS)', 'PARKING FACILITIES', 'SEA AND INLAND PORT FACILITIES', 
#         'NATURAL RESOURCES', 'PARKS AND RECREATION', 'HOUSING AND COMMUNITY DEVELOPMENT', 'SEWERAGE', 'SOLID WASTE MANAGEMENT', 'OTHER CHARGES', 
#         'MISCELLANEOUS GENERAL REVENUE', 'INTEREST EARNINGS',  'SALE OF PROPERTY', 'OTHER GENERAL REVENUE', 'UTILITY REVENUE', 
#         'WATER SUPPLY', 'ELECTRIC POWER', 'GAS SUPPLY', 'TRANSIT', 'LIQUOR STORE REVENUE', 'INSURANCE TRUST REVENUE', 'UNEMPLOYMENT COMPENSATION', 
#         'EMPLOYEE RETIREMENT', "WORKERS' COMPENSATION", 'OTHER INSURANCE TRUST REVENUE',"EDUCATION", "HIGHER EDUCATION", "PUBLIC WELFARE"]
# keys = ["1" + k for k in keys]

In [53]:
plot_dfs = {}
keys = ["GENERAL REVENUE","GENERAL REVENUE FROM OWN SOURCES", "EXPENDITURE", "DEFICIT", 
        "FROM FEDERAL GOVERNMENT", "FROM STATE GOVERNMENT", "FROM LOCAL GOVERNMENTS" ,
        "TAXES", "OTHER TAXES", "CURRENT CHARGES", "OTHER CHARGES", 'INDIVIDUAL INCOME', 'CORPORATE INCOME', 'TOTAL INCOME',
        "PROPERTY", "SPECIAL ASSESSMENTS", "PROPERTY AND SPECIAL ASSESSMENTS",  
        "SALES AND GROSS RECEIPTS", "GENERAL SALES", "MOTOR FUEL", "GAS SUPPLY", "EDUCATION", "HIGHER EDUCATION", "PUBLIC WELFARE"]	
def set_plot_dfs(panel_dfs_dict, keys, efnagdp, cpi):
    keys = ["1" + k for k in keys]
    for key in panel_dfs_dict.keys():
        plot_dfs[key] = {}
        plot_dfs[key]["Level"] = panel_dfs_dict[key][keys].mul(10**3).copy()
        plot_dfs[key]["Level"].rename(columns = {k:k.replace("1", "").title() for k in plot_dfs[key]["Level"].keys()}, inplace = True)
    #     plot_dfs[key]["Level"]["Total Income"] = plot_dfs[key]["Level"]["Individual Income"].add(plot_dfs[key]["Level"]["Corporate Income"])
    #     plot_dfs[key]["Level"]["Property and Special Assessments"] = plot_dfs[key]["Level"][["Property", "Special Assessments"]].sum(axis = 1)
    #     plot_dfs[key]["Level"]["Deficit"] = plot_dfs[key]["Level"]["Expenditure"].sub(plot_dfs[key]["Level"]["General Revenue"])
        plot_dfs[key]["Level"]["GDP"] = efnagdp["GDP"]
        plot_dfs[key]["Real Level"] = plot_dfs[key]["Level"].div(cpi["CPI"], level = "Year", axis = 0)
        plot_dfs[key]["Level"]["Population"] = efnagdp["Population"]
        plot_dfs[key]["Percent of General Revenue"] = plot_dfs[key]["Level"].apply(lambda x: pd.to_numeric(x).div(plot_dfs[key]["Level"]["General Revenue"]).mul(100))    
        plot_dfs[key]["Percent of GDP"] = plot_dfs[key]["Level"].apply(lambda x: pd.to_numeric(x).div(plot_dfs[key]["Level"]["GDP"]).mul(100))
        
        plot_dfs[key]["Real Value Per Capita"] = plot_dfs[key]["Real Level"].apply(lambda x: pd.to_numeric(x).div(plot_dfs[key]["Level"]["Population"]))    
        for key in plot_dfs.keys():
            plot_dfs[key]["Level"]["EFNA"] = efnagdp["EFNA"]
    return plot_dfs
plot_dfs = set_plot_dfs(panel_dfs_dict, keys, efnagdp, cpi)


In [54]:
import os
for key in plot_dfs:
    try:
        os.mkdir(f"outputs/{key}")
    except:
        pass

In [55]:
regions_df = pd.read_csv("USCensusRegions.csv")#.set_index("State")
# scatter_figs = {}
for key, p_dfs in plot_dfs.items():
    for p_dfskey, df in p_dfs.items():
        map_figs = {}
        html_path = f"outputs/{key}/ScatterPlotsIncomePropertyAssessmentSalesFuelTaxesPctTotalRevenue{key}{p_dfskey}.html"
        create_scatter_dropdown(df, regions_df=regions_df,
                                filename = html_path, 
                                show_fig = False)
        for name in df.keys():
            map_figs[name] = create_map(df.reset_index(), name, time_name = "Year")
        combined_map_fig = combine_map_figs(map_figs)
        html_path = f"outputs/{key}/MapPlotsByVariableAndYear{key}{p_dfskey}.html"
        combined_map_fig.write_html(html_path)

In [56]:
plot_dfs = {}
keys = ["GENERAL REVENUE","GENERAL REVENUE FROM OWN SOURCES", "EXPENDITURE", "DEFICIT", 
        "FROM FEDERAL GOVERNMENT", "FROM STATE GOVERNMENT", "FROM LOCAL GOVERNMENTS" , "INTERGOVERNMENTAL REVENUE",
        "TAXES", "OTHER TAXES", 'PROPERTY', 'SPECIAL ASSESSMENTS', 'PROPERTY AND SPECIAL ASSESSMENTS', 'SALES AND GROSS RECEIPTS', 'GENERAL SALES', 'SELECTIVE SALES', 'MOTOR FUEL', 'ALCOHOLIC BEVERAGE', 
        'TOBACCO PRODUCTS', 'PUBLIC UTILITIES', 'OTHER SELECTIVE SALES', 'INDIVIDUAL INCOME', 'CORPORATE INCOME', "TOTAL INCOME", 'MOTOR VEHICLE LICENSE', 
        'OTHER TAXES', 'CHARGES AND MISCELLANEOUS GENERAL  REVENUE', 'CURRENT CHARGES', 'EDUCATION', 'INSTITUTIONS  OF HIGHER EDUCATION', 
        'SCHOOL LUNCH SALES (GROSS)', 'HOSPITALS', 'HIGHWAYS', 'AIR TRANSPORTATION (AIRPORTS)', 'PARKING FACILITIES', 'SEA AND INLAND PORT FACILITIES', 
        'NATURAL RESOURCES', 'PARKS AND RECREATION', 'HOUSING AND COMMUNITY DEVELOPMENT', 'SEWERAGE', 'SOLID WASTE MANAGEMENT', 'OTHER CHARGES', 
        'MISCELLANEOUS GENERAL REVENUE', 'INTEREST EARNINGS',  'SALE OF PROPERTY', 'OTHER GENERAL REVENUE', 'UTILITY REVENUE', 
        'WATER SUPPLY', 'ELECTRIC POWER', 'GAS SUPPLY', 'TRANSIT', 'LIQUOR STORE REVENUE', 'INSURANCE TRUST REVENUE', 'UNEMPLOYMENT COMPENSATION', 
        'EMPLOYEE RETIREMENT', "WORKERS' COMPENSATION", 'OTHER INSURANCE TRUST REVENUE',"EDUCATION", "HIGHER EDUCATION", "PUBLIC WELFARE"]
plot_dfs = set_plot_dfs(panel_dfs_dict, keys, efnagdp, cpi)

In [57]:
regions_df = pd.read_csv("USCensusRegions.csv")#.set_index("State")

for key, p_dfs in plot_dfs.items():
    # figs = {k: line_dropdown(dataframe, regions_df) for k, dataframe in p_dfs.items()}
    # fig = dict_of_line_figs_to_dropdown_fig(figs, show_fig = False, use_sliders = True)
    filename = f"outputs/{key}/LinePlotsStateFinancesAsPercentRevenuePercentGDPAndPerCapitaFigs{key}.html"
    fig = line_dropdown(p_dfs, regions_df)
    fig.write_html(filename, config=dict(displayModeBar=True))

In [58]:
start_year = 2005
areas = {"Revenue Source by Government" : ["General Revenue From Own Sources", "From Federal Government", "From State Government", "From Local Governments"],
         
    #  Omit revenue sources according to distinction of subcomponents in revenue data 
    #  Some sources additionally needed to be removed as their values perfectly explained
    #  accounting that sums to greater than 100 percent
     
         "Taxes": ["Intergovernmental Revenue", 'Property', 'Sales And Gross Receipts', 
                   'Individual Income', 'Corporate Income', 'Motor Vehicle License', 'Other Taxes', 
                #    'Charges And Miscellaneous General  Revenue', 
                   'Current Charges', 
                #    'Education', 'Institutions  Of Higher Education', 'School Lunch Sales (Gross)',
                #      'Hospitals', 'Highways', 'Air Transportation (Airports)', 'Parking Facilities', 'Sea And Inland Port Facilities', 'Natural Resources',
                #       'Parks And Recreation', 'Housing And Community Development', 'Sewerage', 'Solid Waste Management', 'Other Charges', 
                    #   'Miscellaneous General Revenue', 
                      'Interest Earnings', 'Special Assessments', 'Sale Of Property', 'Other General Revenue']}
                    #   'Utility Revenue']}
                    #   'Water Supply', 'Electric Power', 'Gas Supply', 'Transit', 
                    # 'Liquor Store Revenue',
                #    'Insurance Trust Revenue',
                #    'Unemployment Compensation', 'Employee Retirement', "Workers' Compensation", 'Other Insurance Trust Revenue']}
figs = {}
for key in plot_dfs.keys():
    figs[key] = {}
    for form, plot_df in plot_dfs[key].items():# ["Level", "Real Level", "Percent of General Revenue", "Percent of GDP", "Real Value Per Capita"]:
        figs[key][form] = {}
        df = plot_df.copy()
        df = df.reset_index().melt(id_vars=["State","Year"],
            var_name="Name", 
            value_name="Value")
        df = df.set_index(["State", "Year"]).round(2)
        states = df.index.get_level_values("State").unique()

        for components_group, components in areas.items():


            figs[key][form][components_group] = {}

            for state in states:
                plot_df = df.loc[state].reset_index()#.loc[start_year:].reset_index()
                plot_df = plot_df[plot_df["Name"].isin(components)].dropna()
                plot_df["Value"] = pd.to_numeric(plot_df["Value"])
                title_key = f"{components_group} {key}<br>{form}<br>{state}"
                px_fig = px.area(
                    plot_df, x="Year", y="Value", color="Name", title = title_key)

                figs[key][form][components_group][state] = px_fig
            figs[key][form][components_group]  = dict_of_figs_to_dropdown_fig(figs[key][form][components_group], 
                                                                              show_fig = False,
                                                                              use_sliders = True)
            figs[key][form][components_group].write_html(f"outputs/{key}/AreaPlots{key}{form}{components_group}Figs.html")

In [59]:
# spliced_revenue = pd.read_csv("SplicedStateGovFinances.csv")
# spliced_revenue.set_index(["State", "Year"]).sort_index()

In [13]:
# import os
# folder = "State government amount"
# frmt = "LevelDiscrepancies"
# regions_df = pd.read_csv("USCensusRegions.csv")#.set_index("State")
# # scatter_figs = {}
# df = diff_df.copy()
# map_figs = {}
# create_scatter_dropdown(df, regions_df=regions_df,
#                         filename = f"outputs/{folder}/ScatterPlotsIncomePropertyAssessmentSalesFuelTaxesPctTotalRevenue{folder}{frmt}.html", 
#                         show_fig = False)
# for name in df.keys():
#     map_figs[name] = create_map(df.reset_index(), name, time_name = "Year")
# combined_map_fig = combine_map_figs(map_figs)
# combined_map_fig.write_html(f"outputs/{folder}/MapPlotsByVariableAndYear{folder}{frmt}.html")

#     # combined_scatter_fig = dict_of_figs_line_figs_to_dropdown_fig(scatter_figs,regions_df)


# fig = line_dropdown(df, regions_df)
# # fig.show()
# fig.write_html(f"outputs/{folder}/LinePlotsStateFinancesAsPercentRevenuePercentGDPAndPerCapitaFigs{folder}{frmt}.html")