# Invoice 

In [35]:
import A1_data_prep
import A5_crosswalks as crosswalks
import numpy as np
import pandas as pd
from calitp import *

pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

In [36]:
# Clean up project sheet
def clean_project_manual(df):

    # Replace agencies with the right PPNO
    df.loc[
        (
            df["grant_recipient"]
            == "San Bernardino County Transportation Authority (SBCTA)"
        ),
        "ppno",
    ] = 1230
    df.loc[
        (df["grant_recipient"] == "Bay Area Rapid Transit District (BART)"), "ppno"
    ] = "CP060"
    df.loc[(df["grant_recipient"] == "Santa Monica Big Blue Bus"), "ppno"] = "CP071"
    df.loc[
        (df["grant_recipient"] == "Antelope Valley Transit Authority (AVTA)")
        & (df["award_year"] == 2020),
        "ppno",
    ] = "CP059"

    # Replace FY 21/22 with Cycle 4
    df["award_cycle"].replace({"FY 21/22": 4}, inplace=True)

    return df

In [37]:
# Clean up project sheet
def clean_project():
    df = A1_data_prep.load_project()

    """
    Some grant recipients have multiple spellings of their name. 
    E.g. BART versus Bay Area Rapid Transit
    """
    df = A1_data_prep.organization_cleaning(df, "grant_recipient")
    df["grant_recipient"] = df["grant_recipient"].replace(
        crosswalks.grant_recipients_projects
    )

    # Fill in nulls based on data type
    df = df.fillna(df.dtypes.replace({"float64": 0.0, "object": "None", "int64": 0}))

    # Replace FY 21/22 with Cycle 4
    df["award_cycle"].replace({"FY 21/22": 4}, inplace=True)

    # Coerce cols that are supposed to be numeric
    df["other_funds_involved"] = df["other_funds_involved"].apply(
        pd.to_numeric, errors="coerce"
    )

    # As this is manual data, correct in a separate function
    df = clean_project_manual(df)

    # Add prefix
    df = df.add_prefix("project_")

    return df

In [38]:
project_test = clean_project()



In [44]:
# List for columns that should be date 
date_columns = [
    "allocation_date",
    "completion_date",
    "_3rd_party_award_date",
    "led",
    "date_regional_coordinator_receives_psa",
    "date_oc_receives_psa",
    "date_opm_receives_psa",
    "date_legal_receives_psa",
    "date_returned_to_pm",
    "date_psa_approved_by_local_agency",
    "date_signed_by_drmt",
    "psa_expiry_date",
    "date_branch_chief_receives_psa",
]

In [51]:
def clean_allocation_manual(df):
    # Replace some string values that are in date columns
    df["_3rd_party_award_date"] = df["_3rd_party_award_date"].replace(
        crosswalks.allocation_3rd_party_date
    )
    df["led"] = df["led"].replace(crosswalks.allocation_led)
    df["completion_date"] = df["completion_date"].replace(
        crosswalks.allocation_completion_date
    )

    # Replace PPNO using clean project as the source of truth
    df.loc[
        (
            df["grant_recipient"]
            == "San Bernardino County Transportation Authority (SBCTA)"
        )
        & (df["award_year"] == 2016),
        "ppno",
    ] = 1230

    # Some PPNO are NaN, sort by award year & grant recipient to backwards fill values
    df = df.sort_values(["award_year", "grant_recipient"])
    
    # Replace with allocation
    df["ppno"] = df["ppno"].replace(crosswalks.ppno_crosswalk_allocation)

    return df

In [52]:
def clean_allocation():
    df = A1_data_prep.load_allocation()

    """
    Some rows are not completely filled: drop them based on whether or not some
    cols are populated.
    """
    df = df.dropna(subset=["award_year", "grant_recipient", "ppno"])

    # Correcting string to 0
    df["expended_amount"] = (
        df["expended_amount"].replace({"Deallocation": 0}).astype("int64")
    )

    # Fill in NA based on data type
    df = df.fillna(df.dtypes.replace({"float64": 0.0, "object": "None"}))

    # Coerce dates to datetime
    for c in date_columns:
        df[c] = df[c].apply(pd.to_datetime, errors="coerce")

    # Clean organization name/de duplicate
    df = A1_data_prep.organization_cleaning(df, "grant_recipient")

    # Do some manually cleaning
    df = clean_allocation_manual(df) 
    
    # Add prefix
    df = df.add_prefix("allocation_")

    return df

In [53]:
alloc_test = clean_allocation()

In [58]:
alloc_test.head()

Unnamed: 0,allocation_award_year,allocation_project_#,allocation_grant_recipient,allocation_implementing_agency,allocation_ppno,allocation_project_id,allocation_ea,allocation_components,allocation_phase,allocation_allocation_amount,allocation_expended_amount,allocation_sb1_funding,allocation_sb1_budget_year,allocation_ggrf_funding,allocation_ggrf_budget_year,allocation_ctc_financial_resolution,allocation_ctc_allocation_amendment,allocation_ctc_waiver,allocation_calsta_waiver,allocation_allocation_date,allocation_completion_date,allocation_psa_#,allocation_ct_document_#,allocation__3rd_party_award_date,allocation_led,allocation_date_branch_chief_receives_psa,allocation_date_regional_coordinator_receives_psa,allocation_date_oc_receives_psa,allocation_date_opm_receives_psa,allocation_date_legal_receives_psa,allocation_date_returned_to_pm,allocation_date_psa_sent_to_local_agency,allocation_date_psa_approved_by_local_agency,allocation_date_signed_by_drmt,allocation_psa_expiry_date,allocation_lonp,allocation_prior_fiscal_years_to_2020,allocation_fiscal_year_2020_2021,allocation_fiscal_year_2021_2022,allocation_fiscal_year_2022_2023,allocation_fiscal_year_2023_2024,allocation_fiscal_year_2024_2025,allocation_fiscal_year_2025_2026,allocation_fiscal_year_2026_2027,allocation_fiscal_year_2027_2028,allocation_fiscal_year_2028_2029,allocation_fiscal_year_2029_2030,allocation_allocation_comments,allocation_non_network_integration_allocations_unique_percentage_split,allocation_psa_comments
0,2015.0,1.0,Antelope Valley Transit Authority,Antelope Valley Transit Authority,CP005,,T343GA,Purchase 13 60-foot articulated BRT buses and 16 45-foot electric commuter buses,CONST,24403000.0,21714177,0.0,,24403000.0,2015-16,TIRCP-1516-02,,,Waiver-1920-17,2015-10-22,2022-03-30,07AVTA2015PS-01 A1 \n\n07AVTA2015PS-05,07AVTA2015PS\n*Listed under Unit 3040,2016-03-14,2022-03-31,NaT,NaT,NaT,NaT,NaT,NaT,0.0,NaT,2021-02-02,NaT,,24403000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"Program Supplement be Amended to show a correction in the invoicing section and to include language that confirms funding for "" supporting infrastructure"" includes WAVE.",,
1,2015.0,2.0,Capitol Corridor Joint Powers Authority,Capitol Corridor Joint Powers Authority,CP012,,R350GA,Track and curve improvements between San Jose and Martinez for faster journeys benefiting Capitol Corridor passengers,CONST,4620000.0,4619999,4620000.0,2015-16,0.0,2012-13,TIRCP-1516-07\nTech. Correction June 2017,TIRCP-1920-17A\n6/25/2020,,,2016-05-19,NaT,VARCCJPAPS-01\n,VARCCJPAPS-01,2016-06-01,2019-06-01,NaT,NaT,NaT,NaT,NaT,NaT,0.0,NaT,2016-12-13,2019-06-01,,4620000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
2,2015.0,3.0,Los Angeles County Metropolitan Transportation Authority,Los Angeles County Metropolitan Transportation Authority,CP015,,R353GA,"Replace Blue Line signal system; install new track crossovers, new train controls at 15 locations, new LED signals and power switches, 19 turnouts, new track, overhead catenary, and a communications upgrade.",CONST,38494000.0,38494000,35879000.0,2015-16,2615000.0,2014-15,TIRCP-1516-09,TIRCP-1617-02A\n10/20/2016,Waiver-17-49,,2016-06-30,2021-06-30,\n07LACMTAPS-01 A1\n,07LACMTAPS-01,2017-06-28,2020-06-28,NaT,NaT,NaT,NaT,NaT,NaT,0.0,NaT,2017-08-04,2020-06-28,,38494000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,"Time Extension (6 months) for third party contract approved Dec 8, 2016.\n\nWashing Siding is not moving forward. Pending submission of scope change request. Sent status request 9/10/19 to confirm when request will be submitted.",,
3,2015.0,4.0,Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency,Los Angeles-San Diego-San Luis Obispo Rail Corridor Agency (LOSSAN),CP007,,R345GA,LOSSAN and 12 transit agencies from San Luis Obispo to San Diego counties to use seamless ticketing to increase use of transit,CONST,1675000.0,277840,0.0,,1675000.0,2015-16,TIRCP-1516-03,,,,2015-12-10,2021-06-30,VARLOSSANPS-01 A3\n,VARLOSSAN2015PS,2016-06-01,2021-06-30,NaT,NaT,NaT,NaT,NaT,NaT,0.0,2020-01-20,2020-01-31,2021-06-30,,1675000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,
4,2015.0,5.0,Monterey-Salinas Transit,Monterey-Salinas Transit,CP013,,T349GA,Renovation and expansion of the Monterey maintenance and operations facility.,CONST,10000000.0,10000000,10000000.0,2015-16,0.0,,TIRCP-1516-08,TIRCP-1617-06A\n1/19/17,,,2016-05-19,2018-09-30,05MST2015PS\n05MST2015PS-01,05MST2015PS,2016-11-03,2019-11-03,NaT,NaT,NaT,NaT,NaT,NaT,0.0,2016-11-11,2016-11-21,2019-11-03,,10000000.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,,


In [54]:
invoice_list = set(invoice.project_id.unique().tolist())
allocation_list = set(allocation.allocation_project_id.unique().tolist())

In [55]:
invoice_list

{'0016000007',
 '0016000008',
 '0016000009',
 '0016000048',
 '0016000119',
 '0016000121',
 '0016000188',
 '0016000237',
 '0016000238',
 '0016000275',
 '0016000276',
 '0016000277',
 '0016000329',
 '0017000040',
 '0017000077',
 '0017000128',
 '0017000129',
 '0017000130',
 '0017000174',
 '0017000181',
 '0017000182',
 '0017000183',
 '0017000206',
 '0017000233',
 '0017000234',
 '0017000240',
 '0018000010',
 '0018000149',
 '0018000170',
 '0018000175',
 '0018000237',
 '0018000278',
 '0018000287',
 '0018000288',
 '0018000321',
 '0018000323',
 '0018000324',
 '0018000354',
 '0018000355',
 '0018000356',
 '0018000357',
 '0018000358',
 '0018000359',
 '0019000014',
 '0019000017',
 '0019000021',
 '0019000063',
 '0019000064',
 '0019000068',
 '0019000069',
 '0019000073',
 '0019000075',
 '0019000078',
 '0019000079',
 '0019000080',
 '0019000081',
 '0019000086',
 '0019000087',
 '0019000088',
 '0019000089',
 '0019000090',
 '0019000091',
 '0019000092',
 '0019000093',
 '0019000095',
 '0019000096',
 '00190000

In [56]:
allocation_list

{'0019000021',
 '0019000087',
 '0019000120',
 '0021000177',
 '0021000249',
 '0021000320',
 '0022000240',
 '0022000241',
 '0022000242',
 '0022000260',
 '0022000268',
 '0022000269',
 '0022000335',
 'None'}

In [57]:
len(allocation_list - invoice_list)

10