## Quarterly billing or new project report 

Request
* Pull a quarterly billing or new project report from the TIRCP spreadsheet of any new allocations (new project IDs) made since the previous report?
* New projects highlighted in yellow
* Sort small Project ID to larger project ID
* Include existing projects (no highlight)
* Each cycle its own tab!


Columns
* Project ID	
* EA	
* Ph.	
* Dist.	
* Recipient	
* Project	
* Amount Available	
* Allocation Amount	
* Fund Type	
* Budget Year	
* Appropriation	
* CTC Allocation Date

In [None]:
import A1_data_prep
import A2_tableau
import A7_accounting_analysis
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp import *

pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Function 1: Load "Previous" Allocation Sheet for their Project IDs

In [None]:
def previous_project_ids(previous_file_name: str, previous_sheet_name: str):
    """
    Load in the previous TIRCP Tracking Workbook's allocation
    tab and return a set of unique Project Ids.
    """
    # Load in previous allocation sheet
    previous_allocation = to_snakecase(
        pd.read_excel(
            f"{A1_data_prep.GCS_FILE_PATH}{previous_file_name}",
            sheet_name=previous_sheet_name,
        )
    )

    # Clean project ID
    previous_allocation = A7_accounting_analysis.clean_project_ids(
        previous_allocation, "project_id"
    )

    # Coerce project ID to numeric
    previous_allocation.project_id = previous_allocation.project_id.apply(
         pd.to_numeric, errors="coerce"
     )

    # Get set the "previous" project ids
    previous_project_ids = set(previous_allocation.project_id.unique().tolist())

    return previous_project_ids

In [None]:
# test_set = previous_project_ids("fake_allocation_sheet.xlsx", "fake_aa")

In [None]:
# len(test_set)

### Function 2: Load "Current" Allocation Sheet

In [None]:
# Columns for allocation subset
alloc_subset = [
    "allocation_award_year",
    "allocation_ppno",
    "allocation_project_id",
    "allocation_ea",
    "allocation_grant_recipient",
    "allocation_phase",
    "allocation_allocation_amount",
    "allocation_sb1_funding",
    "allocation_sb1_budget_year",
    "allocation_ggrf_funding",
    "allocation_ggrf_budget_year",
    "allocation_allocation_date",
]

In [None]:
def prepare_current_allocation():

    alloc = A1_data_prep.clean_allocation()

    # Subset
    alloc = alloc[alloc_subset]

    # Clean Project Ids
    alloc = A7_accounting_analysis.clean_project_ids(
        alloc,
        "allocation_project_id",
    )

    # Filter out any project IDs that are none
    alloc = (alloc.loc[alloc.allocation_project_id != "None"]).reset_index(drop=True)

    # Coerce project Ids to numeric
    alloc.allocation_project_id = alloc.allocation_project_id.apply(
    pd.to_numeric, errors="coerce")

    return alloc

In [None]:
# alloc2 = prepare_current_allocation()

In [None]:
# alloc2.shape

In [None]:
# Clean up Project IDs
# alloc2 = A7_accounting_analysis.clean_project_ids(
#    alloc2,
#    "allocation_project_id",
# )

In [None]:
# Filter out any project IDs that are none.
# alloc2 = (alloc2.loc[alloc2.allocation_project_id != "None"]).reset_index(drop=True)

In [None]:
# len(alloc2), len(alloc.loc[alloc.allocation_project_id == "None"]), len(alloc)

### Function 3:  Find the new project ids

In [None]:
def get_new_project_ids(previous_TIRCP_file_name: str, previous_TIRCP_sheet_name: str) -> list:
    """
    Compare the project IDS in the most recent
    TIRCP tracking sheet versus the previous one
    to see which projects are new. Returns a list.
    """
    # Get the previous workbook's project ids in a set
    previous_ids = previous_project_ids(
        previous_TIRCP_file_name, previous_TIRCP_sheet_name
    )

    # Get current workbook's project ids in a set
    current_allocation = prepare_current_allocation()[["allocation_project_id"]]
    current_ids = set(current_allocation.allocation_project_id.unique().tolist())

    # Get the new ids
    new_ids = list(current_ids - previous_ids)

    return new_ids

In [None]:
# new_project_ids_list = get_new_project_ids("fake_allocation_sheet.xlsx", "fake_aa")

In [None]:
# new_project_ids_list

In [None]:
#def prepare_project():
    #project = A1_data_prep.clean_project()[project_subset]
    # project = project[project_subset]
    #return project

In [None]:
# project = prepare_project()

In [None]:
# project2.groupby(['project_award_year']).agg({'project_ppno':'nunique'})

In [None]:
# project2

In [None]:
# project2.shape

### Functions 4-6: Merge 
* One function for the first merge
* One for the melt 
* One for merging the original merged df with the melted values


In [None]:
project_subset = [
    "project_grant_recipient",
    "project_project_title",
    "project_tircp_award_amount__$_",
    "project_ppno",
    "project_district",
    "project_award_year",
]

In [None]:
def merge1_allocation_project():
    project = A1_data_prep.clean_project()[project_subset]
    allocation = prepare_current_allocation()

    # Merge the sheets on PPNO & Award Year
    m1 = pd.merge(
        allocation,
        project,
        how="left",
        left_on=["allocation_ppno", "allocation_award_year"],
        right_on=["project_ppno", "project_award_year"],
        indicator=True,
    )

    return m1

In [None]:
# project_alloc = merge1_allocation_project()

In [None]:
# project_alloc.groupby(['allocation_award_year']).agg({'allocation_ppno':'nunique'})

In [None]:
def melt_ggrf_sb1(first_merged_dataframe):
    """ "
    The original allocation sheet has one column for
    SB1 and another one for GGRF. Melt based on Project ID
    so both values go under a single column called
    "Fund Type" and the amount goes under "Value."

    first_merged_dataframe (df): input results from merge1_allocation_project()
    """
    ggrf_sb1_values = pd.melt(
        first_merged_dataframe,
        id_vars=["allocation_project_id"],
        value_vars=["allocation_sb1_funding", "allocation_ggrf_funding"],
    )

    # This melted dataframe is large. Filter out for only
    # rows with a non-zero value. Rename the columns.
    ggrf_sb1_values = (
        (ggrf_sb1_values.loc[ggrf_sb1_values["value"] > 0.00])
        .reset_index(drop=True)
        .rename(columns={"variable": "Fund Type", "value": "Allocation Amount"})
    )
   
    ggrf_sb1_values["Fund Type"] = ggrf_sb1_values["Fund Type"].replace(
        {"allocation_sb1_funding": "Sb1",
         "allocation_ggrf_funding":"GGRF"}) 
    
    return ggrf_sb1_values

In [None]:
def full_merge():
    m1 = merge1_allocation_project()
    ggrf_sb1 = melt_ggrf_sb1(m1)

    # Final merge
    m2 = pd.merge(
        m1.drop(columns=["_merge"]), ggrf_sb1, how="left", on=["allocation_project_id"]
    )

    # Why are there so many duplicates
    m2 = m2.drop_duplicates().reset_index(drop=True)

    # Drop project columns
    m2 = m2.drop(
        columns=[
            "project_grant_recipient",
            "project_ppno",
            "project_award_year",
        ]
    )
    
        
    # Clean up columns
    m2 = A1_data_prep.clean_up_columns(m2)
    m2 = m2.rename(columns = {
        "Date": "CTC Allocation Date",
        "Id":"Project ID",})
    
    # Clean up dates
    m2['CTC Allocation Date'] = pd.to_datetime(m2['CTC Allocation Date']).dt.strftime('%m-%Y')
    
    return m2

In [None]:
#test = full_merge()

In [None]:
#test.columns

In [None]:
# Merge the allocation w/ project sheet
"""
m1 = pd.merge(
    alloc2,
    project2,
    how="inner",
    left_on=["allocation_ppno", "allocation_award_year"],
    right_on=["project_ppno", "project_award_year"],
    indicator=True,
)
"""

In [None]:
# m1.shape

In [None]:
# Melt based on project id
"""ggrf_sb1_values = pd.melt(
    m1,
    id_vars=["allocation_project_id"],
    value_vars=["allocation_sb1_funding", "allocation_ggrf_funding"],
)"""

In [None]:
# Keep only values above 1
"""ggrf_sb1_values = (
    (ggrf_sb1_values.loc[ggrf_sb1_values["value"] > 0.00])
    .reset_index(drop=True)
    .rename(columns={"variable": "Fund Type", "value": "Allocation Amount"})
)"""

In [None]:
# ggrf_sb1_values['Fund Type'].value_counts()

In [None]:
# Merge the m1 w/  ggrf_sb1_values
"""m2 = pd.merge(
    m1.drop(columns=["_merge"]),
    ggrf_sb1_values,
    how="left",
    on=["allocation_project_id"],
)"""

In [None]:
# m3 = m2.drop_duplicates().sort_values("allocation_project_id")

In [None]:
# new_project_ids = get_new_project_ids("fake_allocation_sheet.xlsx", "fake_aa")

In [None]:
# grouped_test.style.apply(HIGHLIGHT_COLOR)

### Function 7: Groupby & Highlight to create sheet 
* Change "Date" from 2023-01-01 to January 2023.

In [None]:
groupby_cols = [
    "Title",
    "Grant Recipient",
    "District",
    "Tircp Award Amount  $",
    "Ea",
    "Phase",
    "Project ID",
    "CTC Allocation Date",
    "Sb1 Budget Year",
    "Ggrf Budget Year",
    "Fund Type",
]

In [None]:
def group_highlight(df,previous_TIRCP_file_name, previous_TIRCP_sheet_name):
        
    # Duplicate project ID so can apply highlighting
    df["Project ID 2"] = df["Project ID"]
    
    # Group
    grouped_df = df.groupby(groupby_cols).agg(
    {"Allocation Amount": "max", "Project ID 2": "max"})
    
    # Get new project IDs
    new_project_ids = get_new_project_ids(previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    
    # Highlight a new project ID in the new_project_ids list in yellow.
    def HIGHLIGHT_COLOR(x):
        def colour_switch(number):
            if number in new_project_ids:
                color = "yellow"
            else:
                # default
                color = "white"

            return color

        return [f"background-color: {colour_switch(number)}" for number in x]
    
    # Apply highlighting.
    grouped_df = grouped_df.style.apply(HIGHLIGHT_COLOR)
    
    return grouped_df

### Function 8: Wrap everything up. 
* Projects should be in different tabs based on whatever cycle they correspond with

In [None]:
def create_quarterly_billing(previous_TIRCP_file_name: str, previous_TIRCP_sheet_name: str):
    
    # Call the merged Allocation-Project sheet
    all_projects = full_merge()
    
    df_2015 = group_highlight((all_projects.loc[all_projects["Award Year"] == 2015]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2016 = group_highlight((all_projects.loc[all_projects["Award Year"] == 2016]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2018 = group_highlight((all_projects.loc[all_projects["Award Year"] == 2018]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2020 = group_highlight((all_projects.loc[all_projects["Award Year"] == 2020]).reset_index(drop = True), 
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
                          
    # Save - maybe do it in a neater way later
    # https://stackoverflow.com/questions/14225676/save-list-of-dataframes-to-multisheet-excel-spreadsheet
    with pd.ExcelWriter(f"{A1_data_prep.GCS_FILE_PATH}quarterly_billing.xlsx") as writer:
        df_2015.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2015", index=True)
        df_2016.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2016", index=True)
        df_2018.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2018", index=True)
        df_2020.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2020", index=True)
    print("Saved to GCS.")

    return df_2015

In [None]:
test1 =  create_quarterly_billing("fake_allocation_sheet.xlsx", "fake_aa")

In [None]:
# merge2["Award Year"].unique()

In [None]:
#for year in project_years:
#    filtered =  (merge2.loc[merge2["Award Year"] == year]).reset_index(drop = True)
#    exec(f"df_{year} = group_sheet(filtered)")

In [None]:
# df_2015

### Scratch Area

In [None]:
def create_quarterly_billing(previous_TIRCP_file_name: str, previous_TIRCP_sheet_name: str):
    
    # Call the merged Allocation-Project sheet
    all_projects = full_merge()
    
    new_project_ids = get_new_project_ids(previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    
    for year in project_years:
        exec(f"df_{year}=  (all_projects.loc[all_projects["Award Year"] == 2015]).reset_index(drop = True)") 
   
    df_2015 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2015]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2016 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2016]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2018 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2018]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2020 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2020]).reset_index(drop = True), 
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
                          
    # Save 
    #with pd.ExcelWriter(f"{A1_data_prep.GCS_FILE_PATH}quarterly_billing.xlsx") as writer:
    #    df_2015.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2015", index=True)
    #    df_2016.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2016", index=True)
    #    df_2018.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2018", index=True)
    #    df_2020.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2020", index=True)
    # print("Saved to GCS.")

    return df_2015

In [None]:
def create_quarterly_billing2(previous_TIRCP_file_name: str, previous_TIRCP_sheet_name: str):
    
    # Call the merged Allocation-Project sheet
    all_projects = full_merge()
    
    new_project_ids = get_new_project_ids(previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    
    df_2015 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2015]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2016 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2016]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2018 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2018]).reset_index(drop = True),
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
    df_2020 = group_sheet((all_projects.loc[all_projects["Award Year"] == 2020]).reset_index(drop = True), 
                          previous_TIRCP_file_name, previous_TIRCP_sheet_name)
                          
    # Save 
    #with pd.ExcelWriter(f"{A1_data_prep.GCS_FILE_PATH}quarterly_billing.xlsx") as writer:
    #    df_2015.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2015", index=True)
    #    df_2016.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2016", index=True)
    #    df_2018.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2018", index=True)
    #    df_2020.to_excel(writer, sheet_name="TIRCP_Selected_Projects_2020", index=True)
    # print("Saved to GCS.")

    return df_2015