In [1]:
import pandas as pd
from calitp import *
from shared_utils import rt_utils

# Formatting the nb
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

# Specific to this project
import A1_utilities as utils



In [2]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/pmp_dashboard/"

In [3]:
appropriations_unwanted = []

In [4]:
ap1 = utils.import_and_clean(
    "exp_bud_actual_prog_622608.xls",
    "Download",
    appropriations_unwanted,
    1,
)

In [5]:
# opening up csv
# ap1.to_csv("./test.csv", index=False)

In [6]:
def clean_up_columns(df):
    df.columns = df.columns.str.replace("_", " ").str.title().str.strip()
    return df

In [7]:
"""
Timeline Sheet
"""
timeline_right_order = [
    "appr_catg",
    "fund",
    "fund_description",
    "appropriation",
    "pec_class",
    "pec_class_description",
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_%_expended",
    "ps_projection",
    "py_pos_alloc",
    "act__hours",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "oe_balance",
    "oe_enc_+_oe_exp_projection",
    "oe_%_expended",
    "total_allocation",
    "total_expenditure",
    "division",
    "total_balance",
    "total_projection",
    "total_%_expended",
    "ap",
]


def create_timeline(df):
    # Open up the sheet with all the accounting periods
    timeline_all_aps = pd.read_excel(
        f"{GCS_FILE_PATH}running_list.xlsx", sheet_name="timeline"
    )

    # Drop irrelevant col(s)
    df = df.drop(columns="year_expended_pace")

    # Rearrange to the right order
    df = df[timeline_right_order]

    # Clean up col names
    df = clean_up_columns(df)

    # Stack my df with the ones from previous month
    c1 = pd.concat([df, timeline_all_aps], sort=False)

    # Reset index
    c1 = c1.reset_index(drop=True)

    return c1

In [8]:
def create_psoe_timeline(df, ps_list: list, oe_list: list):

    # Rename this column so I can concat both sheets properly
    df = df.rename(columns={"oe_enc_+_oe_exp_projection": "oe_projection"})

    # Create 2 dataframes that subsets out OE and PS
    psoe_oe = utils.cleaning_psoe_tpsoe(df[oe_list], "oe")
    psoe_ps = utils.cleaning_psoe_tpsoe(df[ps_list], "ps")

    # Stack both dataframes on top of each other
    c1 = pd.concat([psoe_ps, psoe_oe], sort=False)

    # Rename column
    c1 = c1.rename(columns={"expenditure": "expense"})

    # Rearrange the dataframe in the right order
    c1 = c1[utils.psoe_right_col_order]

    # Fill in na
    c1 = c1.fillna(0)

    # Open up sheet with all accounting periods
    psoe_all_aps = pd.read_excel(f"{GCS_FILE_PATH}running_list.xlsx", sheet_name="psoe")

    # Concat
    c2 = pd.concat([c1, psoe_all_aps], sort=False).reset_index(drop=True)

    # Clean up col names
    c2 = clean_up_columns(c2)

    return c2

In [12]:
def pmp_dashboard_sheets(
    file_name: str,
    name_of_sheet: str,
    appropriations_to_filter: list,
    accounting_period: int,
    year: str,
):

    """Takes a cleaned data frame and returns
    the entire Excel workbook for publishing the PMP dashboard.

    Args:
        df: cleaned dataframe fter using import_raw_data
        unwanted_timeline_appropriations: additional filter option for timeline data
        title: the name for your file, accounting_period_year

    """
    # Running scripts for each sheet
    df = utils.import_and_clean(
        file_name, name_of_sheet, appropriations_to_filter, accounting_period
    )
    fund_by_div = clean_up_columns(utils.create_fund_by_division(df))
    tspoe = clean_up_columns(
        utils.create_tpsoe(df, utils.tpsoe_ps_list, utils.tpsoe_oe_list)
    )
    timeline = create_timeline(df)
    psoe = create_psoe_timeline(df, utils.psoe_ps_cols, utils.psoe_oe_cols)

    """
    # Filter out stuff for timeline
    unwanted = timeline[
        (timeline["appropriation"] == unwanted_timeline_appropriations)
        & (timeline["ps_allocation"] == 0)
        & (timeline["oe_allocation"] == 0)
    ]
    timeline = timeline.drop(index=unwanted.index)
    timeline = timeline.reset_index(drop=True)
    """

    # Save to file with every single accounting period
    with pd.ExcelWriter(f"{GCS_FILE_PATH}all_accounting_periods.xlsx") as writer:
        timeline.to_excel(writer, sheet_name="timeline", index=False)
        psoe.to_excel(writer, sheet_name="psoe", index=False)

    # Save this month's output
    with pd.ExcelWriter(f"{GCS_FILE_PATH}AP_{accounting_period}_{year}.xlsx") as writer:
        fund_by_div.to_excel(writer, sheet_name="fund_by_div", index=False)
        tspoe.to_excel(writer, sheet_name="tspoe", index=False)
        timeline.to_excel(writer, sheet_name="timeline", index=False)
        psoe.to_excel(writer, sheet_name="psoe", index=False)

In [13]:
pmp_dashboard_sheets("AP2 August.xls", "Download", appropriations_unwanted, 2, "21_22")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

In [14]:
len(df1)

NameError: name 'df1' is not defined

In [None]:
df1.shape

In [None]:
df1["Appropriation"].unique()

In [None]:
df1.head(2)

In [None]:
df1_cols = [
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "ps_projection",
    "year_expended_pace",
    "ps_%_expended",
    "oe_allocation",
    "oe_encumbrance",
    "oe_expenditure",
    "oe_balance",
    "oe_enc_+_oe_exp_projection",
    "oe_%_expended",
    "total_allocation",
    "total_expenditure",
    "total_balance",
    "total_projection",
    "total_%_expended",
]

In [None]:
for i in df1_cols:
    print("\n" + i)
    print(ap12[i].sum())
    print(ap_12_test[i].sum())

In [None]:
ap_12_test.shape

In [None]:
ap12.shape

In [None]:
df2.head()

In [None]:
df2_cols = [
    "allocation",
    "expenditure",
    "balance",
    "encumbrance",
    "projection",
]

In [None]:
for i in df2_cols:
    print("\n" + i)
    print(df2[i].sum())

In [None]:
len(df3)

In [None]:
df3["appropriation"].unique()

In [None]:
df3.columns

In [None]:
df3_cols = [
    "ps_allocation",
    "ps_expenditure",
    "ps_balance",
    "oe_allocation",
    "ps_projection",
    "oe_expenditure",
    "oe_balance",
    "total_allocation",
    "total_expenditure",
    "total_balance",
    "total_projection",
    "oe_enc_+_oe_exp_projection",
    "total_%_expended",
]

In [None]:
df3["ap"].value_counts()

In [None]:
df4.shape

In [None]:
df4["ap"].value_counts()

In [None]:
df4_cols = ["allocation", "expense", "balance", "projection", "encumbrance"]