In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
from calitp import *
from shared_utils import utils

# Formatting the nb
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

# Specific to this project
import A1_utilities as utils



In [2]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

In [3]:
# DF that William manipulated
FILE_NAME_1 = "PMP Summary Report Data.xlsx"

# Sheets in William's notebook that I want
sheets_list = ["Fund by Division Data", "TPSOE Data", "Timeline Data", "PSOE Timeline"]

dict_df1 = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME_1}", sheet_name=sheets_list)

division_df = to_snakecase(dict_df1.get("Fund by Division Data"))
tpsoe_df = to_snakecase(dict_df1.get("TPSOE Data"))
timeline_df = to_snakecase(dict_df1.get("Timeline Data"))
psoe_df = to_snakecase(dict_df1.get("PSOE Timeline"))

In [4]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

In [5]:
appropriations_unwanted = ["22105"]

In [6]:
ap11 = utils.import_raw_data(
    "FY 2122 AP11_Closed_PMP Summary Report.022822_Updated.xlsx",
    "Raw Data AP11 Closed",
    appropriations_unwanted,
)

In [7]:
ap10 = utils.import_raw_data(
    "FY 2122 AP10_Closed_PMP Summary Report.022822_Andrew Updated.xlsx",
    "Raw Data AP10 Closed",
    appropriations_unwanted,
)

In [8]:
'''
ap12 = utils.import_raw_data(
    "FY 2122 AP12_Closed_PMP Summary Report.022822_Updated.xlsx",
    "Raw Data AP12 Closed",
    appropriations_unwanted,
)
'''

'\nap12 = utils.import_raw_data(\n    "FY 2122 AP12_Closed_PMP Summary Report.022822_Updated.xlsx",\n    "Raw Data AP12 Closed",\n    appropriations_unwanted,\n)\n'

In [9]:
def pmp_dashboard_sheets(
    df, accounting_period: str, unwanted_timeline_appropriations: str
):

    # Running scripts for each sheet
    fund_by_div = utils.create_fund_by_division(df)
    tspoe = utils.create_tpsoe(df, utils.tpsoe_ps_list, utils.tpsoe_oe_list)
    timeline = utils.create_timeline(utils.my_clean_dataframes)
    psoe = utils.create_psoe_timeline(timeline, utils.psoe_ps_cols, utils.psoe_oe_cols)

    # Filter out timeline
    unwanted = timeline[
        (timeline["appropriation"] == unwanted_timeline_appropriations)
        & (timeline["ps_allocation"] == 0)
        & (timeline["oe_allocation"] == 0)
    ]
    timeline = timeline.drop(index=unwanted.index)
    timeline = timeline.reset_index(drop=True)

    # How to save this to sharepoint?
    with pd.ExcelWriter(
        f"{GCS_FILE_PATH}{accounting_period}_cleaned_data.xlsx"
    ) as writer:
        fund_by_div.to_excel(writer, sheet_name="fund_by_div", index=False)
        tspoe.to_excel(writer, sheet_name="tspoe", index=False)
        timeline.to_excel(writer, sheet_name="timeline", index=False)
        psoe.to_excel(writer, sheet_name="psoe", index=False)

    return fund_by_div, tspoe, timeline, psoe

In [10]:
def pmp_dashboard_sheets2(file_name: str, name_of_sheet: str, appropriations_to_filter: list, 
                          accounting_period: str, unwanted_timeline_appropriations: str):
    
    # The original sheet
    df = utils.import_raw_data(file_name,name_of_sheet,appropriations_to_filter)
    
    # Running scripts for each sheet
    fund_by_div = utils.create_fund_by_division(df)
    tspoe = utils.create_tpsoe(df, utils.tpsoe_ps_list, utils.tpsoe_oe_list)
    timeline = utils.create_timeline(utils.my_clean_dataframes)
    psoe = utils.create_psoe_timeline(timeline, utils.psoe_ps_cols, utils.psoe_oe_cols)

    # Filter out stuff for timeline
    unwanted = timeline[
        (timeline["appropriation"] == unwanted_timeline_appropriations)
        & (timeline["ps_allocation"] == 0)
        & (timeline["oe_allocation"] == 0)
    ]
    timeline = timeline.drop(index=unwanted.index)
    timeline = timeline.reset_index(drop=True)

    # How to save this to sharepoint?
    with pd.ExcelWriter(
        f"{GCS_FILE_PATH}{accounting_period}_cleaned_data.xlsx"
    ) as writer:
        fund_by_div.to_excel(writer, sheet_name="fund_by_div", index=False)
        tspoe.to_excel(writer, sheet_name="tspoe", index=False)
        timeline.to_excel(writer, sheet_name="timeline", index=False)
        psoe.to_excel(writer, sheet_name="psoe", index=False)

    return fund_by_div, tspoe, timeline, psoe

In [11]:
df1, df2, df3, df4 = pmp_dashboard_sheets2("FY 2122 AP12_Closed_PMP Summary Report.022822_Updated.xlsx",
    "Raw Data AP12 Closed",
    appropriations_unwanted, "accountingperiod12", "22030")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [12]:
#df1, df2, df3, df4 = pmp_dashboard_sheets(ap11, "accountingperiod11", "22030")

# Fund by Div

In [13]:
len(df1)

54

In [14]:
df1["appropriation"].unique()

array(['22002', '22030', '22001R', '22001', '22102', '22008', '22001F',
       '1850522', '22004'], dtype=object)

In [15]:
set(df1.columns).difference(set(division_df.columns))

{'oe_projection', 'year_expended_pace'}

In [16]:
df1.head()

Unnamed: 0,fund,fund_description,appropriation,pec_class,ps_allocation,ps_expenditure,ps_balance,ps_projection,ps_%_expended,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,year_expended_pace,oe_projection,division,notes
0,1,General Fund,22002,2030,1500000,0,1500000,0,0.0,0,0,0,0,0%,1500000,0,1500000,0,0.0,0.0,0,Local Assistance,
1,1,General Fund,22030,2030,2625000,1405981,1219018,1405981,0.535612,27000,1593,10119,15286,0.43382,2652000,1417694,1234305,1417694,0.534576,0.535612,11712,Local Assistance,
2,1,General Fund,22030,3010,150000,149442,557,149442,0.996283,2000,0,0,2000,0,152000,149442,2557,149442,0.983174,0.99628,0,DRMT,
3,1,General Fund,22030,4010,150000,153219,-3219,153219,1.02146,2000,0,0,2000,0,152000,153219,-1219,153219,1.00802,1.02146,0,DOTP,
4,41,Aeronautics Account STF,22001R,1000,59000,30208,28791,30208,0.512012,0,0,17273,-17273,0%,59000,47482,11517,47482,0.804781,0.512,17273,Aeronautics,


In [17]:
division_df.head()

Unnamed: 0,pec_class,division,fund,fund_description,appropriation,ps_allocation,ps_expenditure,ps_balance,ps_projection,year_end_expendded_pace,ps_%_expended,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_enc_+_oe_exp_projection,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,notes
0,1000,Aeronautics,41,Aeronautics Account STF,22001,3742000,2668503.64,1073496.36,2911095.0,0.777952,0.713122,487000,110463.37,119766.31,256770.32,241117.526364,0.495108,4229000,2898733.32,1330266.68,3152212.0,0.685442,
1,1000,Aeronautics,41,Aeronautics Account STF,22001R,59000,26017.94,32982.06,28383.21,0.481071,0.440982,0,0.0,13866.74,-13866.74,15127.352727,0.0,59000,39884.68,19115.32,43510.56,0.676012,
2,1000,Aeronautics,890,Federal Trust Fund,22001F,89000,0.0,89000.0,0.0,0.0,0.0,370000,0.0,3000.0,367000.0,3272.727273,0.008845,459000,3000.0,456000.0,3272.727,0.006536,
3,2030,Local Assistance,1,General Fund,22030,2625000,1265708.14,1359291.86,1380773.0,0.526009,0.482175,27000,1593.74,10119.4,15286.86,12633.085455,0.467892,2652000,1277421.28,1374578.72,1393406.0,0.481682,
4,2030,Local Assistance,1,General Fund,22002,1500000,0.0,1500000.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,1500000,0.0,1500000.0,0.0,0.0,


In [18]:
df1_cols = [
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_projection",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "oe_balance",
    "total_allocation",
    "total_expenditure",
    "total_balance",
]

In [19]:
for i in df1_cols:
    print("\n" + i)
    print(df1[i].sum())


ps_allocation
244928000

ps_expenditure
211313041

ps_balance
33614927

ps_projection
211313041

oe_allocation
233766000

oe_encumbrance
249639526

oe_expenditure
93943496

oe_balance
-109817058

total_allocation
478694000

total_expenditure
554896086

total_balance
-76202122


# TPSOE Data

In [20]:
df2.head()

Unnamed: 0,pec_class,division,fund,fund_description,appropriation,type,allocation,expenditure,balance,encumbrance,projection,year_expended_pace,%_expended,notes
0,2030,Local Assistance,1,General Fund,22002,ps,1500000.0,0.0,1500000.0,,0.0,0.0,0.0,
1,2030,Local Assistance,1,General Fund,22030,ps,2625000.0,1405981.0,1219018.0,,1405981.0,0.535612,0.535612,
2,3010,DRMT,1,General Fund,22030,ps,150000.0,149442.0,557.0,,149442.0,0.99628,0.996283,
3,4010,DOTP,1,General Fund,22030,ps,150000.0,153219.0,-3219.0,,153219.0,1.02146,1.02146,
4,1000,Aeronautics,41,Aeronautics Account STF,22001R,ps,59000.0,30208.0,28791.0,,30208.0,0.512,0.512012,


In [21]:
tpsoe_df.head()

Unnamed: 0,pec_class,division,fund,fund_description,appropriation,type,allocation,expenditure,balance,encumbrance,projection,year_end_expendded_pace,%_expended,notes
0,1000,Aeronautics,41,Aeronautics Account STF,22001,PS,3742000,2668503.64,1073496.36,,2911095.0,0.777952,0.713122,
1,1000,Aeronautics,41,Aeronautics Account STF,22001R,PS,59000,26017.94,32982.06,,28383.21,0.481071,0.440982,
2,1000,Aeronautics,890,Federal Trust Fund,22001F,PS,89000,0.0,89000.0,,0.0,0.0,0.0,
3,2030,Local Assistance,1,General Fund,22030,PS,2625000,1265708.14,1359291.86,,1380773.0,0.526009,0.482175,
4,2030,Local Assistance,1,General Fund,22002,PS,1500000,0.0,1500000.0,,0.0,0.0,0.0,


In [22]:
df2_cols = [
    "allocation",
    "expenditure",
    "balance",
    "encumbrance",
    "projection",
]

In [23]:
for i in df2_cols:
    print("\n" + i)
    print(df2[i].sum())


allocation
478694000.0

expenditure
305256537.0

balance
-76202131.0

encumbrance
249639526.0

projection
554896063.0


# Timeline Data

In [24]:
len(df3)

169

In [25]:
df3["appropriation"].unique()

array(['22002', '22030', '22001R', '22001', '22102', '22008', '22001F',
       '1850522', '22004', '22102F'], dtype=object)

In [26]:
set(df3.columns).difference(set(timeline_df.columns))

{'appropriation',
 'oe_allocation',
 'oe_balance',
 'oe_encumbrance',
 'oe_expenditure',
 'ps_allocation',
 'ps_balance',
 'ps_expenditure',
 'total_%_expended',
 'total_expenditure',
 'year_expended_pace'}

In [27]:
set(timeline_df.columns).difference(set(df3.columns))

{'appr',
 'oe_alloc',
 'oe_bal_excl_pre_enc',
 'oe_enc',
 'oe_exp',
 'ps_alloc',
 'ps_bal',
 'ps_exp',
 'total_expended___encumbrance',
 'total_projected_%'}

In [28]:
df3["ap"].value_counts()

10    60
11    55
12    54
Name: ap, dtype: int64

In [29]:
for i in df3_cols:
    print("\n" + i)
    print(df3[i].sum())

NameError: name 'df3_cols' is not defined

In [None]:
for i in df3_cols:
    print("\n" + i)
    print(ap10[i].sum() + ap11[i].sum() + ap12[i].sum())

In [None]:
for i in df3_cols:
    print("\n" + i)
    print(ap10[i].sum())

In [None]:
for i in df3_cols:
    print("\n" + i)
    print(ap11[i].sum())

# PSOE Timeline

In [None]:
df4.shape

In [None]:
df4.head()

In [None]:
psoe_df.head()

In [None]:
df4_cols = ["allocation", "expense", "balance", "projection", "encumbrance"]