# Automate the Excel workbook linked to PMP Dashboard

In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
from calitp import *
from shared_utils import utils

# Formatting the nb
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)



In [2]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

## Load in data
### Load manually cleaned df so I can reference 

In [3]:
# DF that William manipulated
FILE_NAME_1 = "PMP Summary Report Data.xlsx"

In [4]:
# Sheets in William's notebook that I want
sheets_list = ["Fund by Division Data", "TPSOE Data", "Timeline Data", "PSOE Timeline"]

In [5]:
dict_df1 = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME_1}", sheet_name=sheets_list)

In [6]:
division_df = to_snakecase(dict_df1.get("Fund by Division Data"))
tpsoe_df = to_snakecase(dict_df1.get("TPSOE Data"))
timeline_df = to_snakecase(dict_df1.get("Timeline Data"))
psoe_df = to_snakecase(dict_df1.get("PSOE Timeline"))

### Load in & clean data for each accounting period

In [7]:
div_crosswalks = {
    "State & Fed Mass Trans": "DRMT",
    "Statewide Planning": "DOTP",
    "Research": "DRISI",
    "PSR/PSSR Development": "DOTP",
    "Rail": "DRMT",
    "Planning Administration": "DOTP",
    "Regional Planning": "DOTP",
}

In [8]:
"""
Each time I receive new data for each accounting period and 
load data into function import_raw_data, the dataframe will be 
appended onto this list
"""
my_clean_dataframes = []

In [9]:
unwanted_appropriations = ["1850522", "22102", "22105"]

In [10]:
def import_raw_data(file_name: str, name_of_sheet: str, appropriations_to_filter: list):

    """Load the raw data and clean it up.

    Args:
        file_name: the Excel workbook
        name_of_sheet: the name of the sheet
        appropriations_to_filter: list of all the appropriations to be filtered out

    Returns:
        The cleaned df. Input the results into a list.

    """
    df = pd.read_excel(f"{GCS_FILE_PATH}{file_name}", sheet_name=name_of_sheet)

    # Get rid of the unnecessary header info
    # Stuff like "Enterprise Datalink Production download as of 05/23/2022"
    df = df.iloc[13:].reset_index(drop=True)

    # The first row contains column names - update it to the column
    df.columns = df.iloc[0]

    # Drop the first row as they are now column names
    df = df.drop(df.index[0]).reset_index(drop=True)

    # Drop rows with NA in PEC Class
    # Since those are probably the grand totals tagged at the end of the Excel sheet
    df = df.dropna(subset=["PEC Class"])

    # Snakecase
    df = to_snakecase(df)

    # Rename columns to mimc dashboard
    df = df.rename(
        columns={
            "ps_alloc": "ps_allocation",
            "ps_exp": "ps_expenditure",
            "ps_bal": "ps_balance",
            "total_projected_%": "total_%_expended",
            "oe_alloc": "oe_allocation",
            "oe_enc": "oe_encumbrance",
            "oe_exp": "oe_expenditure",
            "appr": "appropriation",
            "total_expended___encumbrance": "total_expenditure",
            "oe_bal_excl_pre_enc": "oe_balance",
            "oe__enc_+_oe_exp_projection": "oe_projection",
        }
    )

    # Certain appropriation(s) are filtered out:
    df = df[~df.appropriation.isin(appropriations_to_filter)]

    # Narrow down division names inot a new column
    df["division"] = df["pec_class_description"].replace(div_crosswalks)

    # Adding dataframe to an empty list called my_clean_dataframes
    my_clean_dataframes.append(df)

    """
    Drop AP
    """
    try:
        df = df.drop(columns="ap")
    except:
        pass
    return df

In [11]:
ap4 = import_raw_data("AP4.xls", "Download", unwanted_appropriations)

In [12]:
ap7 = import_raw_data("AP7.xls", "Download", unwanted_appropriations)

In [14]:
ap11 = import_raw_data(
    "FY 2122 AP11_Closed_PMP Summary Report.022822_Updated.xlsx",
    "Raw Data AP11 Closed",
    unwanted_appropriations,
)

In [13]:
ap4.head()

Unnamed: 0,appr_catg,fund,fund_description,appropriation,pec_class,pec_class_description,ps_allocation,ps_expenditure,ps_balance,ps_projection,ps_%_expended,py_pos_alloc,act__hours,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_projection,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,division
0,2122,1,General Fund,22002,2030,Local Assistance,1500000,0.0,1500000.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0%,1500000,0.0,1500000.0,0.0,0.0,Local Assistance
1,2122,1,General Fund,22030,2030,Local Assistance,4001000,135511.93,3865488.07,406535.79,0.03387,0,1334.5,237000,9857.31,0,227142.69,0,0,4238000,145369.24,4092630.76,406535.79,0.034301,Local Assistance
2,2122,1,General Fund,22030,2041,Research,210000,0.0,210000.0,0.0,0.0,0,0.0,9000,0.0,0,9000.0,0,0,219000,0.0,219000.0,0.0,0.0,DRISI
3,2122,1,General Fund,22030,3010,State & Fed Mass Trans,718000,16389.15,701610.85,49167.45,0.022826,0,168.0,55000,0.0,0,55000.0,0,0,773000,16389.15,756610.85,49167.45,0.021202,DRMT
4,2122,1,General Fund,22030,3020,Rail,1000,0.0,1000.0,0.0,0.0,0,0.0,0,0.0,0,0.0,0,0%,1000,0.0,1000.0,0.0,0.0,DRMT


In [82]:
ap11.head()

Unnamed: 0,appr_catg,fund,fund_description,appropriation,pec_class,pec_class_description,ps_allocation,ps_expenditure,ps_balance,ps_projection,year_end_expendded_pace,ps_%_expended,py_pos_alloc,act__hours,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_projection,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,division
0,2122,1,General Fund,22002,2030,Local Assistance,1500000,0.0,1500000.0,0.0,0.0,0.0,0,0.0,0,0.0,0.0,0.0,0.0,0%,1500000,0.0,1500000.0,0.0,0.0,Local Assistance
1,2122,1,General Fund,22030,2030,Local Assistance,2625000,1265708.14,1359291.86,1380772.516364,0.526009,0.482175,0,13875.9,27000,1593.74,10119.4,15286.86,12633.085455,0.467892,2652000,1277421.28,1374578.72,1393405.601818,0.481682,Local Assistance
2,2122,1,General Fund,22030,3010,State & Fed Mass Trans,150000,145312.79,4687.21,158523.043636,1.05682,0.968752,0,1747.0,2000,0.0,0.0,2000.0,0.0,0,152000,145312.79,6687.21,158523.043636,0.956005,DRMT
3,2122,1,General Fund,22030,4010,Statewide Planning,150000,166142.55,-16142.55,181246.418182,1.208309,1.107617,0,2006.5,2000,0.0,0.0,2000.0,0.0,0,152000,166142.55,-14142.55,181246.418182,1.093043,DOTP
4,2122,41,Aeronautics Account STF,22001R,1000,Aeronautics,59000,26017.94,32982.06,28383.207273,0.481071,0.440982,0,299.0,0,0.0,13866.74,-13866.74,15127.352727,0%,59000,39884.68,19115.32,43510.56,0.676012,Aeronautics


In [84]:
#ap11.to_csv('./testing.csv')

In [15]:
set(ap4.columns).difference(set(ap11.columns))

set()

In [16]:
# ap7["total_balance"].sum()

In [17]:
# ap4["total_balance"].sum()

In [18]:
# ap12[['fund','appropriation','pec_class','ps_allocation']].sort_values(['ps_allocation',])

In [19]:
len(my_clean_dataframes)

3

## Fund by Division Data
* Attempt to mimic William's processed sheet called "division df"
* This dataframe is AP 11 with some stuff filtered

In [20]:
def create_fund_by_division(df):
    # Drop excluded cols
    excluded_cols = ["appr_catg", "act__hours", "py_pos_alloc"]
    df = ap11.drop(columns=excluded_cols)

    # Add a blank column for notes
    df["notes"] = np.nan

    return df

In [21]:
division_test = create_fund_by_division(ap11)

### Double checking: William's vs mine

In [22]:
test = set(division_test.appropriation.unique().tolist())
actual = set(division_df.appropriation.unique().tolist())

In [23]:
actual - test

set()

In [24]:
test - actual

set()

In [25]:
# division_test[['appropriation','ps_allocation']].sort_values(['appropriation', 'ps_allocation'])

In [26]:
# division_df[['appropriation','ps_allocation']].sort_values(['appropriation', 'ps_allocation'])

In [27]:
# division_test.loc[division_test["appropriation"] == "22030"]

In [28]:
# division_df.loc[division_df["appropriation"] == "22030"]

In [29]:
testing_cols = [
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_projection",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "total_allocation",
    "total_expenditure",
    "total_balance",
]

In [30]:
for i in testing_cols:
    print("\n" + i)
    print(division_df[i].sum())
    print(division_test[i].sum())


ps_allocation
230928000
230928000

ps_expenditure
183147623.15
183147623.15000004

ps_balance
47780376.85
47780376.85

ps_projection
199797407.0727273
199797407.07272723

oe_allocation
232735000
232735000

oe_encumbrance
104147405.69
104147405.69000001

oe_expenditure
33504730.379999995
33504730.38

total_allocation
463663000
463663000

total_expenditure
320799759.2200001
320799759.22

total_balance
142863240.78
142863240.77999997


## TPSOE Data

In [31]:
# Reference point
# tpsoe_df.loc[tpsoe_df["division"] == "Aeronautics"]

In [32]:
# ap11.loc[ap11["division"] == "Aeronautics"]

### Function to clean

In [33]:
def cleaning_psoe_tpsoe(df, ps_or_oe: str):
    """
    Fill in the column type for either ps: personal services
    or oe: operating expense.
    """
    df["type"] = ps_or_oe

    """
    Strip away the prefixes from column names
    https://stackoverflow.com/questions/54097284/removing-suffix-from-dataframe-column-names-python
    Create suffix
    """
    suffix = f"{ps_or_oe}_"
    df.columns = df.columns.str.replace(suffix, "", regex=True)

    return df

### Function to create sheet

In [34]:
# Cols: for PS
tpsoe_ps_list = [
    "fund",
    "fund_description",
    "appropriation",
    "pec_class",
    "division",
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_projection",
    "year_end_expendded_pace",
    "ps_%_expended",
]

# Cols for OE
tpsoe_oe_list = [
    "fund",
    "fund_description",
    "appropriation",
    "pec_class",
    "division",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "oe_balance",
    "oe_projection",
]

monetary_cols = [
    "allocation",
    "expenditure",
    "balance",
    "encumbrance",
    "projection",
]

order_of_cols = [
    "pec_class",
    "division",
    "fund",
    "fund_description",
    "appropriation",
    "type",
    "allocation",
    "expenditure",
    "balance",
    "encumbrance",
    "projection",
    "year_end_expendded_pace",
    "%_expended",
]

In [35]:
# chocolate

In [36]:
def create_tpsoe(df, ps_list: list, oe_list: list):
    """
    ps_list: a list of all the ps related columns.
    oe_list: a list of all the oe related columns.
    Use this to subset out the whole dataframe,
    one for personal services, one for operating expenses.
    """

    # Clean up and subset out the dataframe
    tpsoe_oe = cleaning_psoe_tpsoe(df[oe_list], "oe")
    tpsoe_ps = cleaning_psoe_tpsoe(df[ps_list], "ps")

    # Concat the two dataframes together
    c1 = pd.concat([tpsoe_ps, tpsoe_oe], sort=False)

    # Rearrange the columns to the right order
    c1 = c1[order_of_cols]

    # Add a notes column
    c1["notes"] = np.nan

    # Correct data types of monetary columns from objects to float
    c1[monetary_cols] = c1[monetary_cols].astype("float64")

    return c1

In [37]:
c1 = create_tpsoe(ap11, tpsoe_ps_list, tpsoe_oe_list)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


### Double checking

In [38]:
c1.shape

(96, 14)

In [39]:
tpsoe_df.shape

(100, 14)

In [40]:
# c1[['division', 'type','pec_class', 'allocation']].sort_values(['pec_class', 'allocation'])

In [41]:
# tpsoe_df[['division','type','pec_class', 'allocation']].sort_values(['pec_class', 'allocation'])

In [42]:
for i in monetary_cols:
    print("\n" + i)
    print(c1[i].sum())
    print(tpsoe_df[i].sum())


allocation
463663000.0
463663000

expenditure
216652353.53000003
216652353.52999997

balance
142863240.78
142863240.78

encumbrance
104147405.69
104147405.69

projection
340495427.72272724
340495427.72272724


In [43]:
test = set(c1.appropriation.unique().tolist())
actual = set(tpsoe_df.appropriation.unique().tolist())

In [44]:
actual - test

set()

In [45]:
test - actual

set()

In [46]:
test = set(c1.pec_class.unique().tolist())
actual = set(tpsoe_df.pec_class.unique().tolist())

In [47]:
actual - test

{1000, 2030, 2041, 3010, 3020, 4010, 4020, 4030, 4050}

In [48]:
test - actual

{'1000', '2030', '2041', '3010', '3020', '4010', '4020', '4030', '4050'}

## Timeline 

In [49]:
def create_timeline():
    """
    Loop through all the cleaned dfs in the list
    my_clean_dataframes. Tag each dataframe as 1,
    2,3,etc to fill in the column "ap" (aka accounting
    period) that differentiates which accounting period each df comes from.
    """
    keys_list = []

    for i, item in enumerate(my_clean_dataframes):
        keys_list.append(i + 1)

    """
    Stack all the dfs in my_clean_dataframes, starting 
    https://stackoverflow.com/questions/59267129/how-to-concatenate-multiple-dataframes-from-multiple-sources-in-pandas
    """
    c1 = (
        pd.concat(my_clean_dataframes, keys=keys_list)
        .rename_axis(("source", "tmp"))
        .reset_index(level=0)
        .reset_index(drop=True)
    )

    # Drop original accounting period column & replace the new source col
    c1 = c1.drop(
        columns=[
            "ap",
        ]
    ).rename(columns={"source": "ap"})
    return c1

In [50]:
keys_list = []

In [51]:
for i, item in enumerate(my_clean_dataframes):
    keys_list.append(i + 1)

In [52]:
keys_list

[1, 2, 3]

In [53]:
# https://stackoverflow.com/questions/59267129/how-to-concatenate-multiple-dataframes-from-multiple-sources-in-pandas
time_test = (
    pd.concat(my_clean_dataframes, keys=keys_list)
    .rename_axis(("source", "tmp"))
    .reset_index(level=0)
    .reset_index(drop=True)
)

In [54]:
time_test = time_test.drop(
    columns=[
        "ap",
    ]
).rename(columns={"source": "ap"})

In [55]:
# Certain appropriation(s) are filtered out:
# time_test = time_test[~time_test.appropriation.isin(unwanted_appropriations)]

In [56]:
# time_test.loc[time_test['appropriation'] == '22030']

In [57]:
timeline_df = timeline_df[timeline_df["ap"].isin([4, 7, 11])]

In [58]:
timeline_df["ap"].value_counts()

4     50
7     50
11    48
Name: ap, dtype: int64

In [59]:
timeline_df_cols = [
    "ps_alloc",
    "ps_exp",
    "ps_bal",
    "ps_projection",
    "oe_alloc",
    "oe_enc",
    "oe_exp",
    "oe_projection",
]

In [60]:
for i in timeline_df_cols:
    print("\n" + i)
    print(timeline_df[i].sum())


ps_alloc
694532000

ps_exp
362067639.31

ps_bal
332464360.69

ps_projection
590649886.2798702

oe_alloc
701335000

oe_enc
340648152.73999995

oe_exp
64541248.23

oe_projection
211249725.08


In [61]:
timeline_test_cols = [
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_projection",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "oe_projection",
]

In [62]:
for i in timeline_test_cols:
    print("\n" + i)
    print(time_test[i].sum())


ps_allocation
694532000

ps_expenditure
362067639.3099998

ps_balance
332464360.69000024

ps_projection
590649886.27987

oe_allocation
701335000

oe_encumbrance
561198815.5199999

oe_expenditure
89307659.13

oe_projection
254044580.5271429


## PSOE Timeline

In [63]:
psoe_ps_cols = [
    "appr_catg",
    "fund",
    "fund_description",
    "appropriation",
    "pec_class",
    "division",
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_projection",
    "ps_%_expended",
    "ap",
    "pec_class_description",
]

psoe_oe_cols = [
    "appr_catg",
    "fund",
    "fund_description",
    "appropriation",
    "pec_class",
    "division",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "oe_balance",
    "oe_projection",
    "oe_%_expended",
    "ap",
    "pec_class_description",
]

psoe_right_col_order = [
    "appr_catg",
    "fund",
    "fund_description",
    "appropriation",
    "division",
    "pec_class",
    "pec_class_description",
    "allocation",
    "expense",
    "balance",
    "projection",
    "%_expended",
    "ap",
    "type",
    "encumbrance",
]

In [64]:
def create_psoe_timeline(df, ps_list: list, oe_list: list):

    # Create 2 dataframes that subsets out OE and PS
    psoe_oe = cleaning_psoe_tpsoe(df[oe_list], "oe")
    psoe_ps = cleaning_psoe_tpsoe(df[ps_list], "ps")

    # Stack both dataframes on top of each other
    c1 = pd.concat([psoe_ps, psoe_oe], sort=False)

    # Rename column to mimic William's
    c1 = c1.rename(columns={"expenditure": "expense"})

    # Rearrange the dataframe in the right order
    c1 = c1[psoe_right_col_order]

    return c1

In [65]:
pose_test = create_psoe_timeline(time_test, psoe_ps_cols, psoe_oe_cols)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [66]:
pose_test = pose_test.loc[pose_test["appropriation"] != "22102F"]

In [67]:
pose_test.shape

(296, 15)

In [68]:
pose_test.head(2)

Unnamed: 0,appr_catg,fund,fund_description,appropriation,division,pec_class,pec_class_description,allocation,expense,balance,projection,%_expended,ap,type,encumbrance
0,2122,1,General Fund,22002,Local Assistance,2030,Local Assistance,1500000,0.0,1500000.0,0.0,0.0,1,ps,
1,2122,1,General Fund,22030,Local Assistance,2030,Local Assistance,4001000,135511.93,3865488.07,406535.79,0.03387,1,ps,


In [69]:
# time_test[['ap','appr_catg','appropriation','pec_class_description','oe_allocation','oe_projection','ap']].loc[time_test["pec_class_description"] == "Aeronautics"]

In [70]:
# test_ps.loc[test_ps["pec_class_description"] == "Aeronautics"]

### Double checking

In [71]:
psoe_df = psoe_df[psoe_df["ap"].isin([4, 7, 11])]

In [72]:
aero_og = psoe_df.loc[psoe_df["division"] == "Aeronautics"]

In [73]:
aero_og

Unnamed: 0,appr_catg,fund,fund_description,appr,division,pec_class,pec_class_description,allocation,expense,balance,projection,%_expended,ap,type,encumbrance
133,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,58000,0.0,58000.0,0.0,0,4,PS,
134,2122,41,Aeronautics Account STF,22001,Aeronautics,1000,Aeronautics,3620000,1024426.6,2595573.4,3073280.0,0.282991,4,PS,
157,2122,890,Federal Trust Fund,22001F,Aeronautics,1000,Aeronautics,86000,0.0,86000.0,0.0,0,4,PS,
283,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,59000,11037.67,47962.33,18921.72,0.187079,7,PS,
284,2122,41,Aeronautics Account STF,22001,Aeronautics,1000,Aeronautics,3742000,1696346.27,2045653.73,2908022.0,0.453326,7,PS,
307,2122,890,Federal Trust Fund,22001F,Aeronautics,1000,Aeronautics,89000,0.0,89000.0,0.0,0,7,PS,
481,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,59000,26017.94,32982.06,28383.21,0.440982,11,PS,
482,2122,41,Aeronautics Account STF,22001,Aeronautics,1000,Aeronautics,3742000,2668503.64,1073496.36,2911095.0,0.713122,11,PS,
505,2122,890,Federal Trust Fund,22001F,Aeronautics,1000,Aeronautics,89000,0.0,89000.0,0.0,0,11,PS,
658,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,0,0.0,0.0,0.0,0%,4,OE,0.0


In [74]:
aero_test = pose_test.loc[pose_test["division"] == "Aeronautics"]

In [75]:
aero_test

Unnamed: 0,appr_catg,fund,fund_description,appropriation,division,pec_class,pec_class_description,allocation,expense,balance,projection,%_expended,ap,type,encumbrance
6,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,58000,0.0,58000.0,0.0,0,1,ps,
7,2122,41,Aeronautics Account STF,22001,Aeronautics,1000,Aeronautics,3620000,1024426.6,2595573.4,3073279.8,0.282991,1,ps,
31,2122,890,Federal Trust Fund,22001F,Aeronautics,1000,Aeronautics,86000,0.0,86000.0,0.0,0,1,ps,
57,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,59000,11037.67,47962.33,18921.72,0.187079,2,ps,
58,2122,41,Aeronautics Account STF,22001,Aeronautics,1000,Aeronautics,3742000,1696346.27,2045653.73,2908022.177143,0.453326,2,ps,
83,2122,890,Federal Trust Fund,22001F,Aeronautics,1000,Aeronautics,89000,0.0,89000.0,0.0,0,2,ps,
107,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,59000,26017.94,32982.06,28383.207273,0.440982,3,ps,
108,2122,41,Aeronautics Account STF,22001,Aeronautics,1000,Aeronautics,3742000,2668503.64,1073496.36,2911094.88,0.713122,3,ps,
131,2122,890,Federal Trust Fund,22001F,Aeronautics,1000,Aeronautics,89000,0.0,89000.0,0.0,0,3,ps,
6,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,0,0.0,0.0,0.0,0%,1,oe,0.0


In [76]:
# aero_test

In [77]:
test = set(pose_test.appropriation.unique().tolist())
actual = set(psoe_df.appr.unique().tolist())

In [78]:
test - actual

set()

In [79]:
actual - test

set()

In [80]:
cols_to_check = ["expense", "balance", "projection", "encumbrance"]

In [81]:
for i in cols_to_check:
    print("\n" + i)
    print(aero_test[i].sum())
    print(aero_og[i].sum())


expense
5675670.43
5675670.430000002

balance
8093318.239999999
8093318.24

projection
9422214.17792208
9422214.177922077

encumbrance
346011.32999999996
346011.32999999996
