## Master Agreement/PSA Report 
* Goal: Make it easy for branch chiefs to know whether a Master Agreement and/or Program Supplement is needed or is going to expire in the next year.
* This will be a monthly report.


In [1]:
import A1_data_prep
import A2_tableau
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp import *

pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Project Sheet
* Column A - Award Year  
* Column B - Project #       
* Column C - Grant Recipient          
* Column D - Project Title 
* Column E - PPNO             
* Column I - Master Agreement Number
* Column J - Master Agreement Expiration Date
* Column K - Project Manager

#### Criteria
* If the master agreement field (column I Master Agreement Number, tab 1) is empty, the information should appear on the report
* If there is less than a year until the Master Agreement Expiration Date, the information should appear on the report

In [2]:
# Subset of cols
project_cols = [
    "project_award_year",
    "project_project_#",
    "project_grant_recipient",
    "project_project_title",
    "project_ppno",
    "project_master_agreement_number",
    "project_master_agreement_expiration_date",
    "project_project_manager",
]

In [3]:
# Tag projects with the appropriate comment
def ma_comments(row):

    if (row["project_master_agreement_number"] == "None") | (
        row["project_master_agreement_number"] == "Pending"
    ):
        return "None/Pending Master Agreement Number"
    else:
        return "<1 a year until Master Agreement Expiration Date"

In [4]:
def master_agreement(expiration_year: int):
    project = A1_data_prep.clean_project()

    # Subset df to only the columns requested
    project = project[project_cols]

    # Coerce to datetime
    project.project_master_agreement_expiration_date = (
        project.project_master_agreement_expiration_date.apply(
            pd.to_datetime, errors="coerce"
        )
    )

    # Conditions
    # If there is less than a year until the Master Agreement Expiration Date
    project_cond1 = (
        project.project_master_agreement_expiration_date.dt.year == expiration_year
    )
    # If the master agreement field (column I Master Agreement Number, tab 1) is empty
    project_cond2 = project.project_master_agreement_number.isin(["None", "Pending"])

    # Filter based on criteria above
    project = (project[(project_cond1 | project_cond2)]).reset_index(drop=True)

    project["Comments"] = project.apply(ma_comments, axis=1)

    return project

In [5]:
project_test = master_agreement(2023)

  warn(msg)


In [6]:
len(project_test)

16

### Allocation Sheet
* Column F - Project ID
* Column E - EA
* Column I - Phase 
* Column J - Allocation Amount
* Column T - Allocation Date 
* Column V - PSA #
* Column Q: Allocation Amendment
* Column AB - Date Branch Chief Receives PSA       
* Column AC - Date Regional Coordinator Receives PSA      
* Column AD - Date OC Receives PSA          
* Column AE - Date OPM Receives PSA      
* Column AF - Date Legal Receives PSA      
* Column AG - Date Returned to PM           
* Column AH - Date PSA Sent to Local Agency         
* Column AI - Date PSA Approved by Local Agency 
* Column AJ - Date Signed by DRMT
* Column AK – PSA Expiry Date


In [57]:
alloc = A1_data_prep.clean_allocation()

In [86]:
alloc_cols = [
    "allocation_grant_recipient",
    "allocation_components",
    "allocation_ppno",
    "allocation_project_id",
    "allocation_ea",
    "allocation_phase",
    "allocation_allocation_amount",
    "allocation_allocation_date",
    "allocation_psa_#",
    "allocation_date_branch_chief_receives_psa",
    "allocation_date_regional_coordinator_receives_psa",
    "allocation_date_oc_receives_psa",
    "allocation_date_opm_receives_psa",
    "allocation_date_legal_receives_psa",
    "allocation_date_returned_to_pm",
    "allocation_date_psa_sent_to_local_agency",
    "allocation_date_psa_approved_by_local_agency",
    "allocation_ctc_allocation_amendment",
    "allocation_date_signed_by_drmt",
    "allocation_psa_expiry_date",
]


In [87]:
# alloc2 = alloc[alloc_cols]

#### If there is new information in column AJ (Date Signed by DRMT) and no change in column, then the information should be on the report
* https://stackoverflow.com/questions/54879260/how-to-highlight-differences-in-pandas-data-frame-after-concatenating-them

In [96]:
def clean_date_signed_drmt(df, my_prefix: str):
    # Some rows are not completely filled: drop them based on whether or not some
    # cols are populated.
    df = df.dropna(subset=["award_year", "grant_recipient", "ppno"])

    # Do some manual cleaning
    df = A1_data_prep.clean_allocation_manual(df)

    # Clean organization name/de duplicate
    # df = A1_data_prep.organization_cleaning(df, "grant_recipient")
    # Subset
    df = df[["ppno","project_id","phase","date_signed_by_drmt", "components", "allocation_amount"]]

    # Fill in NA based on data type
    df = df.fillna("None")

    # Create an ID NEED ONE MORE
    df = create_unique_id(df)

    # Drop old columns
    df = df[["unique_id", "date_signed_by_drmt"]]
    
    # Add prefix
    df = df.add_prefix(my_prefix)
    
    return df

In [97]:
def date_signed_drmt():

    # Original allocation sheet
    old_allocation = A1_data_prep.load_allocation()

    # This is an allocation sheet I populated with fake values
    # in the Date signed by DRMT column.
    new_allocation = to_snakecase(
        pd.read_excel(
            f"{A1_data_prep.GCS_FILE_PATH}fake_allocation_sheet.xlsx",
            sheet_name="fake_aa"))
    
    old_allocation = clean_date_signed_drmt(old_allocation, "old")
    new_allocation = clean_date_signed_drmt(new_allocation, "new")

    return old_allocation, new_allocation

In [98]:
old_allocation_df, new_allocation_df = date_signed_drmt()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["_3rd_party_award_date"] = df["_3rd_party_award_date"].replace(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["led"] = df["led"].replace(crosswalks.allocation_led)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["phase_completion_date"] = df["phase_completion_date"].replace(
A value is try

In [99]:
# Why is there 4 more rows? 
old_allocation_df.shape, new_allocation_df.shape

((370, 2), (374, 2))

In [100]:
new_allocation_df.newunique_id.nunique(), old_allocation_df.oldunique_id.nunique()

(370, 366)

In [101]:
m1 = pd.merge(
    old_allocation_df,
    new_allocation_df,
    how="inner",
    left_on="oldunique_id",
    right_on="newunique_id",
)

In [102]:
m1.shape

(246, 4)

In [103]:
# Why are there so many duplicates? 463 to 228.
m2 = m1.drop_duplicates()

In [104]:
m2.shape

(232, 4)

In [105]:
# Filter out for nones in both
m2 = (m2[((m2.olddate_signed_by_drmt != "None") & (m2.newdate_signed_by_drmt != "None"))]).reset_index(drop = True)

In [106]:
m2["Tag"] = (m2["olddate_signed_by_drmt"] == m2['newdate_signed_by_drmt'])

In [107]:
m2.Tag = m2.Tag.astype('str')

In [108]:
m2["Tag"] = m2["Tag"].replace(
 {'True':'No change to Date Signed by DRMT',
  'False':'Date Signed by DRMT was Changed'})

In [109]:
# m2["Tag"] ="If there is new information and no change in column in Date Signed by DRMT"

In [110]:
m2

Unnamed: 0,oldunique_id,olddate_signed_by_drmt,newunique_id,newdate_signed_by_drmt,Tag
0,16000048-cp005-purchase 13 60-foot articulated brt buses and 16 45-foot electric commuter buses-const24403000.0,2021-02-02 00:00:00,16000048-cp005-purchase 13 60-foot articulated brt buses and 16 45-foot electric commuter buses-const24403000.0,2021-02-02 00:00:00,No change to Date Signed by DRMT
1,16000276-cp012-track and curve improvements between san jose and martinez for faster journeys benefiting capitol corridor passengers-const4620000.0,2016-12-13 00:00:00,16000276-cp012-track and curve improvements between san jose and martinez for faster journeys benefiting capitol corridor passengers-const4620000.0,2016-12-13 00:00:00,No change to Date Signed by DRMT
2,"16000329-cp015-replace blue line signal system; install new track crossovers, new train controls at 15 locations, new led signals and power switches, 19 turnouts, new track, overhead catenary, and a communications upgrade.-const38494000.0",2017-08-04 00:00:00,"16000329-cp015-replace blue line signal system; install new track crossovers, new train controls at 15 locations, new led signals and power switches, 19 turnouts, new track, overhead catenary, and a communications upgrade.-const38494000.0",2017-08-04 00:00:00,No change to Date Signed by DRMT
3,0016000119-cp007-lossan and 12 transit agencies from san luis obispo to san diego counties to use seamless ticketing to increase use of transit-const1675000.0,2020-01-31 00:00:00,0016000119-cp007-lossan and 12 transit agencies from san luis obispo to san diego counties to use seamless ticketing to increase use of transit-const1675000.0,2020-01-31 00:00:00,No change to Date Signed by DRMT
4,16000275-cp013-renovation and expansion of the monterey maintenance and operations facility.-const10000000.0,2016-11-21 00:00:00,16000275-cp013-renovation and expansion of the monterey maintenance and operations facility.-const10000000.0,2016-11-21 00:00:00,No change to Date Signed by DRMT
5,16000041-cp004-purchase five 40-foot cng buses for brt route linking sartc to metrolink/amtrak-const2320000.0,2016-05-14 00:00:00,16000041-cp004-purchase five 40-foot cng buses for brt route linking sartc to metrolink/amtrak-const2320000.0,2016-05-14 00:00:00,No change to Date Signed by DRMT
6,16000007-cp001-refurbishment of seven utdc light rail vehicles used throughout the system-const6427000.0,2019-10-25 00:00:00,16000007-cp001-refurbishment of seven utdc light rail vehicles used throughout the system-const6427000.0,2019-10-25 00:00:00,No change to Date Signed by DRMT
7,16000008-cp003-bus rapid transit route between downtown san diego and the otay mesa crossing-const4000000.0,2016-05-09 00:00:00,16000008-cp003-bus rapid transit route between downtown san diego and the otay mesa crossing-const4000000.0,2016-05-09 00:00:00,No change to Date Signed by DRMT
8,16000188-cp008-courthouse trolley station (design)-ps&e713000.0,2017-02-01 00:00:00,16000188-cp008-courthouse trolley station (design)-ps&e713000.0,2017-02-01 00:00:00,No change to Date Signed by DRMT
9,17000174-cp008-courthouse trolley station (construction)-const3760000.0,2018-06-11 00:00:00,17000174-cp008-courthouse trolley station (construction)-const3760000.0,2018-06-11 00:00:00,No change to Date Signed by DRMT


#### Criteria
* If there is a date in the “Allocation Date” field (column T) and no date in the “Date Signed by DRMT” (column AJ), then the information should be on the report
* If there is 6 months or less until the expiry date (column AK), then the information would be on the report


In [88]:
alloc_cond1 = (alloc2.allocation_allocation_date.notna()) & (
    alloc2.allocation_date_signed_by_drmt.isna()
)

In [89]:
# Replace with variables later not hard coded values
alloc_cond2 = (alloc2.allocation_psa_expiry_date.dt.date.astype(str) > "2022-12-31") & (
    alloc2.allocation_psa_expiry_date.dt.date.astype(str) < "2023-06-12"
)

In [90]:
# alloc3 = (alloc2[alloc_cond1 | (alloc_cond2)]).reset_index(drop=True)

In [91]:
# len(alloc3)

In [92]:
def create_unique_id(df):
        df["unique_id"] = (
        df.project_id.astype(str)
        + "-"
        + df.ppno.astype(str)
        + "-"
        + df.components.astype(str)
        + "-"
        + df.phase.astype(str)
        + df.allocation_amount.astype(str)
        ).str.lower() 
        return df 

In [93]:
def PSA_allocation_expiry_date(start_date: str, end_date:str):
    allocation = A1_data_prep.clean_allocation()
    
    # Subset
    allocation = allocation[alloc_cols]
    
    # Remove prefixes
    allocation.columns = allocation.columns.str.replace('^allocation_', '')
    
    # Create unique ids 
    allocation = create_unique_id(allocation)
    
    # Find projects with a PSA that will expire in the next 6 months or less
    expiration_condition = (allocation.psa_expiry_date.dt.date.astype(str) > start_date) & (allocation.psa_expiry_date.dt.date.astype(str) < end_date)
    
    # Filter
    will_expire = (allocation[expiration_condition]).reset_index(drop = True)
    
    # Tag 
    will_expire["Tag"] = "PSA will expire in <6 months"
    
    # Find projects with a PSA where the allocation date is filled
    # but the allocation date signed by DRMT is empty
    allocation_drmt_date_condition = (allocation.allocation_date.notna()) & (allocation.date_signed_by_drmt.isna()) 
    
    # Filter
    allocation_drmt_date = (allocation[allocation_drmt_date_condition]).reset_index(drop = True)
    
    # Tag
    allocation_drmt_date["Tag"] = "Allocation date is filled, date signed by DRMT is empty"
    
    # Concat them 
    c1 = pd.concat([will_expire, allocation_drmt_date])
    
    return c1

In [94]:
c1 = PSA_allocation_expiry_date("2023-01-01", "2023-06-01")

  allocation.columns = allocation.columns.str.replace('^allocation_', '')


In [95]:
c1.head(3)

Unnamed: 0,grant_recipient,components,ppno,project_id,ea,phase,allocation_amount,allocation_date,psa_#,date_branch_chief_receives_psa,date_regional_coordinator_receives_psa,date_oc_receives_psa,date_opm_receives_psa,date_legal_receives_psa,date_returned_to_pm,date_psa_sent_to_local_agency,date_psa_approved_by_local_agency,ctc_allocation_amendment,date_signed_by_drmt,psa_expiry_date,unique_id,Tag
0,Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency,Island Centralized Traffic Control,CP043,21000152,R401GC,CONST,5860000.0,2021-06-24,VARLOSSANPS-06 A1,NaT,NaT,2021-04-12,2021-05-18,NaT,NaT,,2021-05-24,TIRCP-2021-29 A\n6/24/21,2021-05-28,2023-02-18,21000152-cp043-island centralized traffic control-const5860000.0,PSA will expire in <6 months
1,Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency,Upgrade of Non-Powered Switches,CP043,21000154,R401GD,CONST,1000000.0,2021-01-28,VARLOSSANPS-06-A1,NaT,NaT,2021-04-12,2021-05-18,NaT,NaT,,2021-05-24,,2021-05-28,2023-02-18,21000154-cp043-upgrade of non-powered switches-const1000000.0,PSA will expire in <6 months
0,City Of Fresno,Bus Stop Improvements,CP016,20000215,T357GB,CONST,3917000.0,2020-06-25,06FRESNOPS-01 A4,NaT,NaT,NaT,2022-03-11,2022-03-16,2022-03-16,4/5/2022\nResent 7/27/22,NaT,TIRCP-2122-18 A\n3/17/2022 Modify Project Description,NaT,NaT,20000215-cp016-bus stop improvements -const3917000.0,"Allocation date is filled, date signed by DRMT is empty"
