In [None]:
import geopandas as gpd
import numpy as np
import pandas as pd
from calitp import *
from shared_utils import utils

# Formatting the nb
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

# Specific to this project
import A1_utilities as utils

In [None]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

In [None]:
# DF that William manipulated
FILE_NAME_1 = "PMP Summary Report Data.xlsx"

# Sheets in William's notebook that I want
sheets_list = ["Fund by Division Data", "TPSOE Data", "Timeline Data", "PSOE Timeline"]

dict_df1 = pd.read_excel(f"{GCS_FILE_PATH}{FILE_NAME_1}", sheet_name=sheets_list)

division_df = to_snakecase(dict_df1.get("Fund by Division Data"))
tpsoe_df = to_snakecase(dict_df1.get("TPSOE Data"))
timeline_df = to_snakecase(dict_df1.get("Timeline Data"))
psoe_df = to_snakecase(dict_df1.get("PSOE Timeline"))

In [None]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

In [None]:
appropriations_unwanted = ["22105"]

In [None]:
def pmp_dashboard_sheets(
    file_name: str,
    name_of_sheet: str,
    appropriations_to_filter: list,
    accounting_period: int,
    unwanted_timeline_appropriations: str,
):

    # The original sheet
    df = utils.import_raw_data(
        file_name, name_of_sheet, appropriations_to_filter, accounting_period
    )

    # Running scripts for each sheet
    fund_by_div = utils.create_fund_by_division(df)
    tspoe = utils.create_tpsoe(df, utils.tpsoe_ps_list, utils.tpsoe_oe_list)
    timeline = utils.create_timeline(utils.my_clean_dataframes)
    psoe = utils.create_psoe_timeline(timeline, utils.psoe_ps_cols, utils.psoe_oe_cols)

    # Filter out stuff for timeline
    unwanted = timeline[
        (timeline["appropriation"] == unwanted_timeline_appropriations)
        & (timeline["ps_allocation"] == 0)
        & (timeline["oe_allocation"] == 0)
    ]
    timeline = timeline.drop(index=unwanted.index)
    timeline = timeline.reset_index(drop=True)

    # Save
    with pd.ExcelWriter(
        f"{GCS_FILE_PATH}AP_{accounting_period}_cleaned_data.xlsx"
    ) as writer:
        fund_by_div.to_excel(writer, sheet_name="fund_by_div", index=False)
        tspoe.to_excel(writer, sheet_name="tspoe", index=False)
        timeline.to_excel(writer, sheet_name="timeline", index=False)
        psoe.to_excel(writer, sheet_name="psoe", index=False)

    return fund_by_div, tspoe, timeline, psoe

In [None]:
df1, df2, df3, df4 = pmp_dashboard_sheets(
    "AP12 June.xls", "Download", appropriations_unwanted, 12, "22030"
)

In [None]:
# df1, df2, df3, df4 = pmp_dashboard_sheets(ap11, "accountingperiod11", "22030")

# Fund by Div

In [None]:
len(df1)

In [None]:
df1.shape

In [None]:
division_df.shape

In [None]:
df1["appropriation"].unique()

In [None]:
set(df1.columns).difference(set(division_df.columns))

In [None]:
set(division_df.columns).difference(set(df1.columns))

In [None]:
df1.head()

In [None]:
df1_cols = [
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_projection",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "oe_balance",
    "total_allocation",
    "total_expenditure",
    "total_balance",
]

In [None]:
for i in df1_cols:
    print("\n" + i)
    print(df1[i].sum())

# TPSOE Data

In [None]:
df2.head()

In [None]:
tpsoe_df.head()

In [None]:
df2_cols = [
    "allocation",
    "expenditure",
    "balance",
    "encumbrance",
    "projection",
]

In [None]:
for i in df2_cols:
    print("\n" + i)
    print(df2[i].sum())

# Timeline Data

In [None]:
len(df3)

In [None]:
timeline_df.head()

In [None]:
df3["appropriation"].unique()

In [None]:
set(df3.columns).difference(set(timeline_df.columns))

In [None]:
set(timeline_df.columns).difference(set(df3.columns))

In [None]:
df3["ap"].value_counts()

In [None]:
for i in df3_cols:
    print("\n" + i)
    print(df3[i].sum())

In [None]:
for i in df3_cols:
    print("\n" + i)
    print(ap10[i].sum() + ap11[i].sum() + ap12[i].sum())

In [None]:
for i in df3_cols:
    print("\n" + i)
    print(ap10[i].sum())

In [None]:
for i in df3_cols:
    print("\n" + i)
    print(ap11[i].sum())

# PSOE Timeline

In [None]:
df4.shape

In [None]:
df4.head()

In [None]:
psoe_df.head()

In [None]:
df4_cols = ["allocation", "expense", "balance", "projection", "encumbrance"]