In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd
from calitp import *
from shared_utils import utils

# Formatting the nb
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

# Specific to this project
import A1_utilities as utils



In [2]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

In [3]:
# DF that William manipulated
FILE_NAME_1 = "PMP Summary Report Data.xlsx"

# Sheets in William's notebook that I want
sheets_list = ["Fund by Division Data", "TPSOE Data", "Timeline Data", "PSOE Timeline"]

dict_df1 = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME_1}", sheet_name=sheets_list)

division_df = to_snakecase(dict_df1.get("Fund by Division Data"))
tpsoe_df = to_snakecase(dict_df1.get("TPSOE Data"))
timeline_df = to_snakecase(dict_df1.get("Timeline Data"))
psoe_df = to_snakecase(dict_df1.get("PSOE Timeline"))

In [4]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

In [5]:
appropriations_unwanted = ['22105']

In [6]:
ap11 = utils.import_raw_data(
    "FY 2122 AP11_Closed_PMP Summary Report.022822_Updated.xlsx",
    "Raw Data AP11 Closed",appropriations_unwanted)

In [7]:
ap10 = utils.import_raw_data(
    "FY 2122 AP10_Closed_PMP Summary Report.022822_Andrew Updated.xlsx",
    "Raw Data AP10 Closed",appropriations_unwanted
)

In [8]:
ap12 = utils.import_raw_data(
    "FY 2122 AP12_Closed_PMP Summary Report.022822_Updated.xlsx",
    "Raw Data AP12 Closed",appropriations_unwanted
)

In [9]:
def pmp_dashboard_sheets(df, accounting_period:str, unwanted_timeline_appropriations: str):
    
    # Running scripts for each sheet
    fund_by_div = utils.create_fund_by_division(df)
    tspoe = utils.create_tpsoe(df, utils.tpsoe_ps_list, utils.tpsoe_oe_list)
    timeline = utils.create_timeline(utils.my_clean_dataframes)
    psoe = utils.create_psoe_timeline(timeline, utils.psoe_ps_cols, utils.psoe_oe_cols)
    
    # How to save this to sharepoint?
    with pd.ExcelWriter(f"{GCS_FILE_PATH}{accounting_period}_cleaned_data.xlsx") as writer:
        fund_by_div.to_excel(writer, sheet_name="fund_by_div", index=False)
        tspoe.to_excel(writer, sheet_name="tspoe", index=False)
        timeline.to_excel(writer, sheet_name="timeline", index=False)
        psoe.to_excel(writer, sheet_name="psoe", index=False)
        
    return fund_by_div, tspoe, timeline, psoe

In [41]:
df1, df2, df3, df4 = pmp_dashboard_sheets(ap11, 'accountingperiod12', '22030')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

# Fund by Div

In [42]:
len(df1)

55

In [43]:
df1['appropriation'].unique()

array(['22002', '22030', '22001R', '22001', '22102', '22008', '22001F',
       '1850522', '22004'], dtype=object)

In [44]:
set(df1.columns).difference(set(division_df.columns))

{'ap', 'oe_projection', 'pec_class_description', 'year_expended_pace'}

In [45]:
df1.head()

Unnamed: 0,fund,fund_description,appropriation,pec_class,pec_class_description,ps_allocation,ps_expenditure,ps_balance,ps_projection,ps_%_expended,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,ap,year_expended_pace,oe_projection,division,notes
0,1,General Fund,22002,2030,Local Assistance,1500000,0,1500000,0,0.0,0,0,0,0,0%,1500000,0,1500000,0,0.0,11,0.0,0,Local Assistance,
1,1,General Fund,22030,2030,Local Assistance,2625000,1265708,1359291,1380772,0.482175,27000,1593,10119,15286,0.467892,2652000,1277421,1374578,1393405,0.481682,11,0.526008,12631,Local Assistance,
2,1,General Fund,22030,3010,State & Fed Mass Trans,150000,145312,4687,158523,0.968752,2000,0,0,2000,0,152000,145312,6687,158523,0.956005,11,1.05682,0,DRMT,
3,1,General Fund,22030,4010,Statewide Planning,150000,166142,-16142,181246,1.107617,2000,0,0,2000,0,152000,166142,-14142,181246,1.093043,11,1.208307,0,DOTP,
4,41,Aeronautics Account STF,22001R,1000,Aeronautics,59000,26017,32982,28383,0.440982,0,0,13866,-13866,0%,59000,39884,19115,43510,0.676012,11,0.481068,15126,Aeronautics,


In [46]:
division_df.head()

Unnamed: 0,pec_class,division,fund,fund_description,appropriation,ps_allocation,ps_expenditure,ps_balance,ps_projection,year_end_expendded_pace,ps_%_expended,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_enc_+_oe_exp_projection,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,notes
0,1000,Aeronautics,41,Aeronautics Account STF,22001,3742000,2668503.64,1073496.36,2911095.0,0.777952,0.713122,487000,110463.37,119766.31,256770.32,241117.526364,0.495108,4229000,2898733.32,1330266.68,3152212.0,0.685442,
1,1000,Aeronautics,41,Aeronautics Account STF,22001R,59000,26017.94,32982.06,28383.21,0.481071,0.440982,0,0.0,13866.74,-13866.74,15127.352727,0.0,59000,39884.68,19115.32,43510.56,0.676012,
2,1000,Aeronautics,890,Federal Trust Fund,22001F,89000,0.0,89000.0,0.0,0.0,0.0,370000,0.0,3000.0,367000.0,3272.727273,0.008845,459000,3000.0,456000.0,3272.727,0.006536,
3,2030,Local Assistance,1,General Fund,22030,2625000,1265708.14,1359291.86,1380773.0,0.526009,0.482175,27000,1593.74,10119.4,15286.86,12633.085455,0.467892,2652000,1277421.28,1374578.72,1393406.0,0.481682,
4,2030,Local Assistance,1,General Fund,22002,1500000,0.0,1500000.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,1500000,0.0,1500000.0,0.0,0.0,


In [47]:
df1_cols = ['ps_allocation', 'ps_expenditure',
       'ps_balance', 'ps_projection', 
        'oe_allocation', 'oe_encumbrance', 'oe_expenditure',
       'oe_balance', 'oe_projection', 'total_allocation',
       'total_expenditure', 'total_balance', 'total_projection',]

In [48]:
for i in df1_cols:
    print("\n" + i)
    print(df1[i].sum())


ps_allocation
244928000

ps_expenditure
191016752

ps_balance
53911217

ps_projection
208381911

oe_allocation
233766000

oe_encumbrance
184226516

oe_expenditure
76845500

oe_balance
-27306049

oe_projection
268057952

total_allocation
478694000

total_expenditure
452088791

total_balance
26605176

total_projection
476439907


# TPSOE Data

In [49]:
df2.head()

Unnamed: 0,pec_class,division,fund,fund_description,appropriation,type,allocation,expenditure,balance,encumbrance,projection,year_expended_pace,%_expended,notes
0,2030,Local Assistance,1,General Fund,22002,ps,1500000.0,0.0,1500000.0,,0.0,0.0,0.0,
1,2030,Local Assistance,1,General Fund,22030,ps,2625000.0,1265708.0,1359291.0,,1380772.0,0.526008,0.482175,
2,3010,DRMT,1,General Fund,22030,ps,150000.0,145312.0,4687.0,,158523.0,1.05682,0.968752,
3,4010,DOTP,1,General Fund,22030,ps,150000.0,166142.0,-16142.0,,181246.0,1.208307,1.107617,
4,1000,Aeronautics,41,Aeronautics Account STF,22001R,ps,59000.0,26017.0,32982.0,,28383.0,0.481068,0.440982,


In [50]:
tpsoe_df.head()

Unnamed: 0,pec_class,division,fund,fund_description,appropriation,type,allocation,expenditure,balance,encumbrance,projection,year_end_expendded_pace,%_expended,notes
0,1000,Aeronautics,41,Aeronautics Account STF,22001,PS,3742000,2668503.64,1073496.36,,2911095.0,0.777952,0.713122,
1,1000,Aeronautics,41,Aeronautics Account STF,22001R,PS,59000,26017.94,32982.06,,28383.21,0.481071,0.440982,
2,1000,Aeronautics,890,Federal Trust Fund,22001F,PS,89000,0.0,89000.0,,0.0,0.0,0.0,
3,2030,Local Assistance,1,General Fund,22030,PS,2625000,1265708.14,1359291.86,,1380773.0,0.526009,0.482175,
4,2030,Local Assistance,1,General Fund,22002,PS,1500000,0.0,1500000.0,,0.0,0.0,0.0,


In [51]:
df2_cols = ['allocation', 'expenditure', 'balance', 'encumbrance',
       'projection',]

In [52]:
for i in df2_cols:
    print("\n" + i)
    print(df2[i].sum())


allocation
478694000.0

expenditure
267862252.0

balance
26605168.0

encumbrance
184226516.0

projection
476439863.0


# Timeline Data

In [53]:
len(df3)

169

In [54]:
df3['appropriation'].unique()

array(['22002', '22030', '22001R', '22001', '22102', '22008', '22001F',
       '1850522', '22004', '22102F'], dtype=object)

In [55]:
set(df3.columns).difference(set(timeline_df.columns))

{'appropriation',
 'oe_allocation',
 'oe_balance',
 'oe_encumbrance',
 'oe_expenditure',
 'ps_allocation',
 'ps_balance',
 'ps_expenditure',
 'total_%_expended',
 'total_expenditure',
 'year_expended_pace'}

In [56]:
set(timeline_df.columns).difference(set(df3.columns))

{'appr',
 'oe_alloc',
 'oe_bal_excl_pre_enc',
 'oe_enc',
 'oe_exp',
 'ps_alloc',
 'ps_bal',
 'ps_exp',
 'total_expended___encumbrance',
 'total_projected_%'}

In [57]:
df3['ap'].value_counts()

10    60
11    55
12    54
Name: ap, dtype: int64

In [58]:
df3_cols = [ 'ps_allocation', 'ps_expenditure',
       'ps_balance','py_pos_alloc', 'ps_projection','oe_allocation',
       'oe_encumbrance', 'oe_expenditure', 'oe_balance', 'oe_projection']

In [95]:
test = (df3[(df3['appropriation'] == '22030') & (df3['ps_allocation'] == 2625000)  & (df3['oe_allocation'] == 27000)])

In [108]:
test.reset_index(drop = True)

Unnamed: 0,appr_catg,fund,fund_description,appropriation,pec_class,pec_class_description,ps_allocation,ps_expenditure,ps_balance,ps_projection,ps_%_expended,py_pos_alloc,act__hours,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,ap,year_expended_pace,oe_projection,division
0,2122,1,General Fund,22030,2030,Local Assistance,2625000,1265708,1359291,1380772,0.482175,0,13875,27000,1593,10119,15286,0.467892,2652000,1277421,1374578,1393405,0.481682,11,0.526008,12631,Local Assistance
1,2122,1,General Fund,22030,2030,Local Assistance,2625000,1405981,1219018,1405981,0.535612,0,15500,27000,1593,10119,15286,0.43382,2652000,1417694,1234305,1417694,0.534576,12,0.535612,11712,Local Assistance


In [105]:
test.index

Int64Index([1, 1], dtype='int64')

In [None]:
df3.drop(index = test.index)

In [104]:
df3.head()

Unnamed: 0,appr_catg,fund,fund_description,appropriation,pec_class,pec_class_description,ps_allocation,ps_expenditure,ps_balance,ps_projection,ps_%_expended,py_pos_alloc,act__hours,oe_allocation,oe_encumbrance,oe_expenditure,oe_balance,oe_%_expended,total_allocation,total_expenditure,total_balance,total_projection,total_%_expended,ap,year_expended_pace,oe_projection,division
0,2122,1,General Fund,22002,2030,Local Assistance,1500000,0,1500000,0,0.0,0,0,0,0,0,0,0%,1500000,0,1500000,0,0.0,11,0.0,0,Local Assistance
1,2122,1,General Fund,22030,2030,Local Assistance,2625000,1265708,1359291,1380772,0.482175,0,13875,27000,1593,10119,15286,0.467892,2652000,1277421,1374578,1393405,0.481682,11,0.526008,12631,Local Assistance
2,2122,1,General Fund,22030,3010,State & Fed Mass Trans,150000,145312,4687,158523,0.968752,0,1747,2000,0,0,2000,0,152000,145312,6687,158523,0.956005,11,1.05682,0,DRMT
3,2122,1,General Fund,22030,4010,Statewide Planning,150000,166142,-16142,181246,1.107617,0,2006,2000,0,0,2000,0,152000,166142,-14142,181246,1.093043,11,1.208307,0,DOTP
4,2122,41,Aeronautics Account STF,22001R,1000,Aeronautics,59000,26017,32982,28383,0.440982,0,299,0,0,13866,-13866,0%,59000,39884,19115,43510,0.676012,11,0.481068,15126,Aeronautics


In [61]:
for i in df3_cols:
    print("\n" + i)
    print(df3[i].sum())


ps_allocation
739447000

ps_expenditure
574215692

ps_balance
165231212

py_pos_alloc
0

ps_projection
625958038

oe_allocation
935858000

oe_encumbrance
1064461360

oe_expenditure
315342594

oe_balance
-678098056

oe_projection
1415700597


In [62]:
for i in df3_cols:
    print("\n" + i)
    print(ap10[i].sum() + ap11[i].sum() +ap12[i].sum())


ps_allocation
739447000

ps_expenditure
574215692

ps_balance
165231212

py_pos_alloc
0

ps_projection
625958038

oe_allocation
935858000

oe_encumbrance
1064461360

oe_expenditure
315342594

oe_balance
-678098056

oe_projection
1415700597


In [63]:
for i in df3_cols:
    print("\n" + i)
    print(ap10[i].sum())


ps_allocation
249591000

ps_expenditure
171885899

ps_balance
77705068

py_pos_alloc
0

ps_projection
206263086

oe_allocation
468326000

oe_encumbrance
630595318

oe_expenditure
144553598

oe_balance
-540974949

oe_projection
804059623


In [64]:
for i in df3_cols:
    print("\n" + i)
    print(ap11[i].sum())


ps_allocation
244928000

ps_expenditure
191016752

ps_balance
53911217

py_pos_alloc
0

ps_projection
208381911

oe_allocation
233766000

oe_encumbrance
184226516

oe_expenditure
76845500

oe_balance
-27306049

oe_projection
268057952


# PSOE Timeline

In [65]:
df4.shape

(338, 15)

In [66]:
df4.head()

Unnamed: 0,appr_catg,fund,fund_description,appropriation,division,pec_class,pec_class_description,allocation,expense,balance,projection,%_expended,ap,type,encumbrance
0,2122,1,General Fund,22002,Local Assistance,2030,Local Assistance,1500000,0,1500000,0,0.0,11,ps,
1,2122,1,General Fund,22030,Local Assistance,2030,Local Assistance,2625000,1265708,1359291,1380772,0.482175,11,ps,
2,2122,1,General Fund,22030,DRMT,3010,State & Fed Mass Trans,150000,145312,4687,158523,0.968752,11,ps,
3,2122,1,General Fund,22030,DOTP,4010,Statewide Planning,150000,166142,-16142,181246,1.107617,11,ps,
4,2122,41,Aeronautics Account STF,22001R,Aeronautics,1000,Aeronautics,59000,26017,32982,28383,0.440982,11,ps,


In [67]:
psoe_df.head()

Unnamed: 0,appr_catg,fund,fund_description,appr,division,pec_class,pec_class_description,allocation,expense,balance,projection,%_expended,ap,type,encumbrance
0,2122,41,Aeronautics Account STF,22001,Aeronautics,1000,Aeronautics,0,259497.99,-259497.99,3113975.88,0,1,PS,
1,2122,42,"Highway Account, State, STF",22001R,DOTP,4050,PSR/PSSR Development,0,242438.21,-242438.21,2909258.52,0,1,PS,
2,2122,42,"Highway Account, State, STF",22001,Local Assistance,2030,Local Assistance,0,3496261.2,-3496261.2,41955134.4,0,1,PS,
3,2122,42,"Highway Account, State, STF",22001,DRISI,2041,Research,0,1142909.74,-1142909.74,13714916.88,0,1,PS,
4,2122,42,"Highway Account, State, STF",22001,DOTP,4010,Statewide Planning,0,331050.38,-331050.38,3972604.56,0,1,PS,


In [68]:
df4_cols = ['allocation', 'expense',
       'balance', 'projection', 'encumbrance']

In [69]:
for i in df3_cols:
    print("\n" + i)
    print(ap10[i].sum() + ap11[i].sum() +ap12[i].sum())


ps_allocation
739447000

ps_expenditure
574215692

ps_balance
165231212

py_pos_alloc
0

ps_projection
625958038

oe_allocation
935858000

oe_encumbrance
1064461360

oe_expenditure
315342594

oe_balance
-678098056

oe_projection
1415700597
