## Master Agreement/PSA Report 
* Goal: Make it easy for branch chiefs to know whether a Master Agreement and/or Program Supplement is needed or is going to expire in the next year.
* This will be a monthly report.


In [1]:
import A1_data_prep
import A2_tableau
import A5_crosswalks
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp import *

pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Project Sheet
* Column A - Award Year  
* Column B - Project #       
* Column C - Grant Recipient          
* Column D - Project Title 
* Column E - PPNO             
* Column I - Master Agreement Number
* Column J - Master Agreement Expiration Date
* Column K - Project Manager

### Criteria
* If the master agreement field (column I Master Agreement Number, tab 1) is empty, the information should appear on the report
* If there is less than a year until the Master Agreement Expiration Date, the information should appear on the report

In [2]:
# Subset of cols
master_agreement_cols = [
    "project_award_year",
    "project_project_#",
    "project_grant_recipient",
    "project_project_title",
    "project_ppno",
    "project_master_agreement_number",
    "project_master_agreement_expiration_date",
    "project_project_manager",
]

In [5]:
# Tag projects with the appropriate comment
def ma_comments(row):

    if (row["project_master_agreement_number"] == "None") | (
        row["project_master_agreement_number"] == "Pending"
    ):
        return "None/Pending Master Agreement Number"
    else:
        return "<1 a year until Master Agreement Expiration Date"

In [6]:
# project = A1_data_prep.clean_up_columns(project)

In [7]:
def create_master_agreement(expiration_year: int):
    project = A1_data_prep.clean_project()

    # Subset df to only the columns requested
    project = project[master_agreement_cols]

    # Coerce to datetime
    project.project_master_agreement_expiration_date = (
        project.project_master_agreement_expiration_date.apply(
            pd.to_datetime, errors="coerce"
        )
    )

    # Conditions
    # If there is less than a year until the Master Agreement Expiration Date
    project_cond1 = (
        project.project_master_agreement_expiration_date.dt.year == expiration_year
    )
    # If the master agreement field (column I Master Agreement Number, tab 1) is empty
    project_cond2 = project.project_master_agreement_number.isin(["None", "Pending"])

    # Filter based on criteria above
    project = (project[(project_cond1 | project_cond2)]).reset_index(drop=True)

    project["Tag"] = project.apply(ma_comments, axis=1)

    project = A1_data_prep.clean_up_columns(project)
    
    return project

In [8]:
project_test = create_master_agreement(2023)

  warn(msg)


In [9]:
len(project_test)

16

### Allocation Sheet
* Column F - Project ID
* Column E - EA
* Column I - Phase 
* Column J - Allocation Amount
* Column T - Allocation Date 
* Column V - PSA #
* Column Q: Allocation Amendment
* Column AB - Date Branch Chief Receives PSA       
* Column AC - Date Regional Coordinator Receives PSA      
* Column AD - Date OC Receives PSA          
* Column AE - Date OPM Receives PSA      
* Column AF - Date Legal Receives PSA      
* Column AG - Date Returned to PM           
* Column AH - Date PSA Sent to Local Agency         
* Column AI - Date PSA Approved by Local Agency 
* Column AJ - Date Signed by DRMT
* Column AK – PSA Expiry Date


In [10]:
allocation = A1_data_prep.clean_allocation()

In [11]:
# allocation.columns

In [12]:
alloc_cols = [
    "allocation_grant_recipient",
    'allocation_project_#',
    "allocation_components",
    "allocation_ppno",
    "allocation_project_id",
    "allocation_ea",
    "allocation_phase",
    "allocation_allocation_amount",
    "allocation_allocation_date",
    "allocation_psa_#",
    "allocation_date_branch_chief_receives_psa",
    "allocation_date_regional_coordinator_receives_psa",
    "allocation_date_oc_receives_psa",
    "allocation_date_opm_receives_psa",
    "allocation_date_legal_receives_psa",
    "allocation_date_returned_to_pm",
    "allocation_date_psa_sent_to_local_agency",
    "allocation_date_psa_approved_by_local_agency",
    "allocation_ctc_allocation_amendment",
    "allocation_date_signed_by_drmt",
    "allocation_psa_expiry_date",
]

In [13]:
# alloc2 = alloc[alloc_cols]

#### Criteria #1
* If there is new information in column Q and no change in column (Date Signed by DRMT), then the information should be on the report
* https://stackoverflow.com/questions/54879260/how-to-highlight-differences-in-pandas-data-frame-after-concatenating-them

In [14]:
def summarize_rows(df, col_to_group: list, col_to_summarize: str):
    df_col_to_summarize = (
        df.groupby(col_to_group)[col_to_summarize].apply(",".join).reset_index()
    )
    return df_col_to_summarize

In [15]:
def create_unique_id(df):
    df["unique_id"] = (
        df.project_id.astype(str)
        + "-"
        + df.ppno.astype(str)
        + "-"
        + df.components.astype(str)
        + "-"
        + df.phase.astype(str)
        + df.allocation_amount.astype(str)
    ).str.lower()
    return df

In [16]:
def clean_date_signed_ctc_drmt(df, my_prefix: str):
    """
    Clean the allocation sheet for the PSA agreement condition
    of keeping projects where its CTC allocation column changed
    but date signed by DRMT did not change.
    """
    # Some rows are not completely filled: drop them based on whether or not some cols are populated.
    df = df.dropna(subset=["award_year", "grant_recipient", "ppno"])

    # Do some manual cleaning
    df = A1_data_prep.clean_allocation_manual(df)

    # Subset
    df = df[
        [
            "ppno",
            "project_id",
            "phase",
            "date_signed_by_drmt",
            "components",
            "allocation_amount",
            "ctc_allocation_amendment",
        ]
    ]

    # Fill in NA
    df = df.fillna("None")

    # Create an ID
    df = create_unique_id(df)
    
    # Drop old columns
    df = df[["unique_id", "date_signed_by_drmt", "ctc_allocation_amendment"]]

    # Add prefix
    df = df.add_prefix(my_prefix)

    return df

In [17]:
# alloc_test = clean_date_signed_drmt(allocation, 'test')

In [18]:
def date_signed_drmt_ctc_condition(previous_sheet_path: str, previous_sheet_name: str):

    # Pretend that this is the new allocation sheet
    new = A1_data_prep.load_allocation()

    # This is an allocation sheet I populated with fake values
    # in the Date signed by DRMT column. Pretend this is the "previous" one
    previous = to_snakecase(
        pd.read_excel(
            f"{A1_data_prep.GCS_FILE_PATH}{previous_sheet_path}", sheet_name= previous_sheet_name
        )
    )

    previous = clean_date_signed_ctc_drmt(previous, "previous_")
    new = clean_date_signed_ctc_drmt(new, "new_")

    # Compare the new to previous
    merged = pd.merge(
        previous,
        new,
        how="inner",
        left_on="previous_unique_id",
        right_on="new_unique_id",
    )

    # Drop duplicates
    merged2 = merged.drop_duplicates()

    # First condition: did date signed by DRMT remain the same?
    merged2["do_dates_match"] = (
        merged2["previous_date_signed_by_drmt"] == merged2["new_date_signed_by_drmt"]
    )

    # Second condition: did CTC Allocation column change?
    merged2["does_ctc_match"] = (
        merged2["previous_ctc_allocation_amendment"]
        == merged2["new_ctc_allocation_amendment"]
    )

    # Filter: if there is new information in column CTC and no change in column Date Signed by DRMT, then the information should be on the report
    merged2 = (
        merged2[
            (merged2["does_ctc_match"] == False) & (merged2["do_dates_match"] == True)
        ]
    ).reset_index(drop=True)

    # Grab unique IDS.
    unique_ids_list = merged2.previous_unique_id.tolist()
 
    return unique_ids_list

In [19]:
# date_signed_change_list = date_signed_drmt_ctc_condition("fake_allocation_sheet.xlsx")

In [20]:
# m2 = (m2[(m2["does_ctc_match"] == False) & (m2["do_dates_match"] == True)]).reset_index(drop = True)

In [21]:
# Tag: if the two previous dates match
# m2["Tag"] = ((m2["previousdate_signed_by_drmt"] == m2['newdate_signed_by_drmt'])).astype('str')

#### Criterias #2 and #3
* If there is a date in the “Allocation Date” field (column T) and no date in the “Date Signed by DRMT” (column AJ), then the information should be on the report
* If there is 6 months or less until the expiry date (column AK), then the information would be on the report


In [22]:
allocation.columns = allocation.columns.str.replace("^allocation_", "")

  allocation.columns = allocation.columns.str.replace("^allocation_", "")


In [23]:
# allocation = create_unique_id(allocation)

In [24]:
# allocation = (allocation[allocation.unique_id.isin(date_signed_change_list)]).reset_index(drop = True)

In [25]:
cols_to_group = [
    "Grant Recipient",
    "Project #",
    "Components",
    "Ppno",
    "Project Id",
    "Ea",
    "Phase",
    "Allocation Amount",
    "Allocation Date",
    "Psa #",
    "Date Branch Chief Receives Psa",
    "Date Regional Coordinator Receives Psa",
    "Date Oc Receives Psa",
    "Date Opm Receives Psa",
    "Date Legal Receives Psa",
    "Date Returned To Pm",
    "Date Psa Sent To Local Agency",
    "Date Psa Approved By Local Agency",
    "Ctc Allocation Amendment",
    "Date Signed By Drmt",
    "Psa Expiry Date",
    "Unique Id",
]

In [26]:
psa_project_cols = ["project_ppno", "project_project_#", "project_project_manager"]

In [27]:
def PSA_report(start_date: str, end_date: str):
    
    # Load sheets
    allocation = A1_data_prep.clean_allocation()[alloc_cols]
    project = A1_data_prep.clean_project()[psa_project_cols]
    
    # Subset
    # allocation = allocation

    # Remove prefixes
    allocation.columns = allocation.columns.str.replace("^allocation_", "")

    # Create unique ids
    allocation = create_unique_id(allocation)

    # If there is new information in column CTC and no change in column Date Signed by DRMT, then the information should be on the report
    date_signed_drmt_list = date_signed_drmt_ctc_condition("fake_allocation_sheet.xlsx", "fake_aa")
    date_signed_drmt = (
        allocation[allocation.unique_id.isin(date_signed_drmt_list)]
    ).reset_index(drop=True)
    date_signed_drmt[
        "Tag"
    ] = "New info in CTC Alloc Ammendment but date signed by DRMT didn't change"

    # Find projects with a PSA that will expire in the next 6 months or less
    expiration_condition = (allocation.psa_expiry_date.dt.date.astype(str) > start_date) & (allocation.psa_expiry_date.dt.date.astype(str) < end_date)
    
    will_expire = (allocation[expiration_condition]).reset_index(drop=True)
    will_expire["Tag"] = "PSA will expire in < 6 months"

    # Find projects with a PSA where the allocation date is filled
    # but the allocation date signed by DRMT is empty
    allocation_drmt_date_condition = (allocation.allocation_date.notna()) & (allocation.date_signed_by_drmt.isna())
    allocation_drmt_date = (allocation[allocation_drmt_date_condition]).reset_index(drop=True)
    allocation_drmt_date["Tag"] = "Allocation date is filled but date signed by DRMT is empty"

    # Concat them
    c1 = pd.concat([date_signed_drmt, will_expire, allocation_drmt_date])

    # Clean Up
    c1 = c1.fillna("None")
    c1.columns = c1.columns.str.replace("_", " ").str.title().str.strip()
    
    # Need to clean up projects that appear twice because they meet multiple conditions
    # Place the conditions they meet on the same line
    c1 = summarize_rows(c1, cols_to_group, "Tag")
        
    # Replace with PPNO 
    c1["Ppno"] = c1["Ppno"].replace(A5_crosswalks.ppno_crosswalk_allocation)
    
    # Merge with project tracking tab to get project manager info 
    m1 = pd.merge(
        c1,
        project,
        how="left",
        left_on=["Ppno", "Project #",],
        right_on=["project_ppno", "project_project_#"],
        indicator=True,)
    
    m1 = m1.drop(columns = ["project_project_#", "project_ppno", "_merge", "Unique Id"])
    m1 = m1.rename(columns = {"project_project_manager": "Project Manager"}) 
    
    return m1

In [28]:
m1 = PSA_report("2023-01-01", "2023-06-01")

  allocation.columns = allocation.columns.str.replace("^allocation_", "")
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["_3rd_party_award_date"] = df["_3rd_party_award_date"].replace(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["led"] = df["led"].replace(crosswalks.allocation_led)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["pha

In [29]:
m1.Tag.value_counts()

Allocation date is filled but date signed by DRMT is empty                                                                           48
New info in CTC Alloc Ammendment but date signed by DRMT didn't change                                                               17
New info in CTC Alloc Ammendment but date signed by DRMT didn't change,Allocation date is filled but date signed by DRMT is empty     5
PSA will expire in < 6 months                                                                                                         2
Name: Tag, dtype: int64

In [30]:
#m1[['Grant Recipient', 'Project #','Ppno', 'project_project_#', 'project_project_title',
#       'project_ppno', 'project_project_manager']]

In [31]:
# exporting 2 dataframes into a single workbook
#with pd.ExcelWriter(f"{A1_data_prep.GCS_FILE_PATH}MA_PSA.xlsx") as writer:
#    m1.to_excel(writer, sheet_name="PSA", index=False)