## SB1 GIS Template
* Populate TIRCP GIS Template to create a map for TIRCP only projects.

In [1]:
import A1_data_prep
import A2_tableau
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp import *

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Load in sheets

#### TIRCP Completed Projects
* Sheet with completed TIRCP projects.
* Deleted 2015:13 SCCRA-Metrolink off Completed Projects, per Linda.

In [3]:
completed_projects = to_snakecase(pd.read_excel(f"{A1_data_prep.GCS_FILE_PATH}TIRCP - All Completed Projects_UPDATED 11-15-22.xlsx", sheet_name="Table 1"))

In [4]:
completed_projects.shape

(13, 12)

In [5]:
completed_projects.award_year.unique()

array([2015, 2016, 2018])

In [6]:
# Crosswalk for project titles to match Project Sheet 
completed_project_crosswalk = {'Monterey Bay Operations & Maintenance Facility/Salinas Transit Service Project':
 'Monterey Bay Operations and Maintenance Facility/Salinas Transit Service Project',
 'South Bay Bus Rapid Transit Project*': 'South Bay Bus Rapid Transit',
 'San Diego Metropolitan Transit System Trolley Capacity Improvements Project':
 'San Diego Metropolitan Transit System Trolley Capacity Improvements',
 'Expanding the SFMTA Light Rail Vehicle Fleet Project': 'SFMTA Light Rail Vehicle Fleet Expansion',
 'Altamont Corridor Express Wayside Power': 'ACE Wayside Power Project',
 'BRT Expansion: MLK Corridor and Crosstown Miner Corridor': 'Bus Rapid Transit – Martin Luther King Corridor and Crosstown Miner Corridor',}

In [7]:
# Replace 
completed_projects["project_title"] = completed_projects["project_title"].replace(completed_project_crosswalk)

In [8]:
completed_projects = completed_projects.add_prefix("completed_")


#### Project

In [9]:
df_project = A1_data_prep.clean_project()

  warn(msg)


In [10]:
project_cols_wanted = [
    "project_award_year",
    "project_project_#",
    "project_grant_recipient",
    "project_project_title",
    "project_project_description",
    "project_ppno",
    "project_total_project_cost",
    "project_allocated_amount",
    "project_tircp_award_amount__$_",
    "project_is_sb1?",
    "project_is_ggrf?",
    "project_is_iija?",
    "project_on_shs?",
    "project_calitp",
    "project_estimated_tircp_ghg_reductions",
    "project_estimated_tircp_ghg_reductions2",
    "project_increased_ridership",
    "project_service_integration",
    "project_improve_safety",
    "project_project_readiness",
    "project_funding_leverage",
    "project_multi_agency_coordination_integration",
    "project_ab_1550_community_benefits",
    "project_housing_co_benefits",
]

In [11]:
df_project2 = df_project[project_cols_wanted]

##### Project Status 
* Merge TIRCP with Completed Projects
* Create a function to tag all others.

In [12]:
project_m1 = pd.merge(
    df_project2,
    completed_projects,
    how="outer",
    left_on=["project_project_title"],
    right_on=["completed_project_title"],
    indicator=True,
)

In [13]:
project_m1._merge.value_counts()

left_only     83
both          13
right_only     0
Name: _merge, dtype: int64

In [14]:
merge_replace = {'left_only':'tbd',
 'both':'completed'} 

In [15]:
# Change merge values "both" to completed 
# Rename merge column
project_m1 = project_m1.rename(columns = {'_merge':'completed_projects'})

In [16]:
# Merge crosswalk 
project_m1['completed_projects'] = project_m1['completed_projects'].replace(merge_replace)

In [17]:
# project_m1.info()

In [18]:
# Tag if the lead agency is Caltrans or a partner or unknown.
def project_status_categorization(row):
    # If partner is none, return Unknown.
    if row.project_allocated_amount == 0.0:
        return "Planning"
    else:
        return "In Construction"

In [19]:
# Apply function to determine if a project is fully funded or not
project_m1["project_status"] = project_m1.apply(project_status_categorization, axis=1)

In [20]:
project_m1.project_status.value_counts()

In Construction    62
Planning           34
Name: project_status, dtype: int64

In [21]:
# Replace projects that are completed with the values 
project_m1.loc[project_m1['completed_projects'].str.startswith('comp'), 'project_status'] = "Completed"

In [22]:
# Preview
# project_m1[['project_allocated_amount','completed_projects','project_status']]

In [23]:
# Drop Completed Projects col
project_m1 = project_m1.drop(columns = ["completed_projects", 'completed_project_number', 'completed_award_year',
       'completed_applicant', 'completed_project_title',
       'completed_project_description', 'completed_award_amount',
       'completed_total_project_cost',
       'completed_tircp_expended__as_of_6_30_2022',
       'completed_percent_expended', 'completed_date_operational',
       'completed_fdr', 'completed_comments', 'completed_projects'])

#### Allocation

In [24]:
df_alloc = A1_data_prep.clean_allocation()

In [25]:
# Subset
alloc_cols_wanted = [
    "allocation_award_year",
    "allocation_project_#",
    "allocation_implementing_agency",
    "allocation_ppno",
    "allocation_components",
    "allocation_phase",
    "allocation_allocation_amount",
    "allocation_expended_amount",
    "allocation_sb1_funding",
    "allocation_ggrf_funding",
    "allocation_allocation_date",
    "allocation_grant_recipient",
]

In [26]:
df_alloc2 = df_alloc[alloc_cols_wanted]

#### GIS

In [27]:
df_gis = A1_data_prep.load_gis()

### Populate GIS Template
* With the following columns
    * Award Year: Project Number
    * Project Id
    * Project TItle 
    * Project Description	
    * State/SB 1 Program Code	
    * IIJA Program Code	
    * Project Status	
    * SB1 Funds	
    * GGRF Funds	
    * IIJA Funds	
    * General Fund Auxilary Funds	
    * TIRCP Award Amount	
    * Total Project Cost	
    * Fiscal Year	
    * Is SB1?	
    * Is GGRF?	
    * Is IIJA?	
    * ON SHS?	
    * CalITP	
    * Estimated TIRCP GHG Reductions	
    * Increased Ridership	
    * Service Integration	
    * Improved Safety	
    * Project Readiness	
    * Multi-Agency Coordination	
    * AB 1550 Community Benefits	
    * Housing Co-Benefits	
    * Caltrans District	
    * Assembly District	
    * Senate District	Congressional District	
    * City Code	
    * City Agency ID 	
    * County Code	
    * County Agency ID 	
    * Implementing Agency  ID 	
    * Implementing Agency Name

#### Merge sheets

In [28]:
# Merge project & allocation sheets first
m1 = pd.merge(
    project_m1,
    df_alloc2,
    how="outer",
    left_on=["project_award_year", "project_ppno", ],
    right_on=["allocation_award_year", "allocation_ppno",],
    indicator=True,
)

In [29]:
# Left only is 2022 projects that haven't begun
m1._merge.value_counts()

both          428
left_only      25
right_only      1
Name: _merge, dtype: int64

In [30]:
m1.shape

(454, 38)

In [31]:
m1.columns

Index(['project_award_year', 'project_project_#', 'project_grant_recipient',
       'project_project_title', 'project_project_description', 'project_ppno',
       'project_total_project_cost', 'project_allocated_amount',
       'project_tircp_award_amount__$_', 'project_is_sb1?', 'project_is_ggrf?',
       'project_is_iija?', 'project_on_shs?', 'project_calitp',
       'project_estimated_tircp_ghg_reductions',
       'project_estimated_tircp_ghg_reductions2',
       'project_increased_ridership', 'project_service_integration',
       'project_improve_safety', 'project_project_readiness',
       'project_funding_leverage',
       'project_multi_agency_coordination_integration',
       'project_ab_1550_community_benefits', 'project_housing_co_benefits',
       'project_status', 'allocation_award_year', 'allocation_project_#',
       'allocation_implementing_agency', 'allocation_ppno',
       'allocation_components', 'allocation_phase',
       'allocation_allocation_amount', 'allocation_e

In [32]:
# Drop merge
m1 = m1.drop(columns=["_merge"])

In [33]:
# Merge m1 with df_gis
m2 = pd.merge(
    m1,
    df_gis,
    how="outer",
    left_on=["project_award_year", "project_project_title"],
    right_on=["award_year", "project_title"],
    indicator=True,
)

In [34]:
m2._merge.value_counts()

both          453
left_only       1
right_only      0
Name: _merge, dtype: int64

#### Create sheet

##### Create Project ID
GIS program has to have a ten digit ID for the project ID.  Can you convert the project ID’s as follows:
* (four zeros, cycle year, two digit project number) 2015:01 = 0000201501


In [35]:
project_table = m2.rename(columns={"project_project_#": "project_number"})

In [36]:
project_table = project_table.dropna(subset=['project_award_year', 'project_project_title']).reset_index(drop = True)

In [37]:
# Turn award year to int64
project_table[["project_award_year", "project_number"]] = project_table[["project_award_year", "project_number"]].astype('int64')

In [38]:
# Pad single digits with a 0.
# https://stackoverflow.com/questions/67401497/add-a-zero-before-1-digit-number-in-a-column-pandas-dataframe
project_table["project_number"] = project_table.project_number.astype(str).str.zfill(2)

In [39]:
project_table["project_id"] = project_table["project_award_year"].astype(
    str
) + project_table["project_number"].astype(str)

In [40]:
project_table["project_id"] = "0000" + project_table["project_id"]

##### Edit Title to include Project Year & Number

In [41]:
# Create a new column: with year + project numer
project_table["Award Year: Project Number"] = (
    project_table["project_award_year"].astype(str)
    + ":"
    + project_table["project_number"]
)

In [42]:
# Edit title with new
project_table["project_project_title"] = (
    project_table["project_project_title"]
    + " "
    + project_table["Award Year: Project Number"]
)

#### Extract Fiscal Year

In [43]:
project_table["fiscal_year"] = project_table["allocation_allocation_date"].dt.year

In [44]:
project_table.head(1)

Unnamed: 0,project_award_year,project_number,project_grant_recipient,project_project_title,project_project_description,project_ppno,project_total_project_cost,project_allocated_amount,project_tircp_award_amount__$_,project_is_sb1?,project_is_ggrf?,project_is_iija?,project_on_shs?,project_calitp,project_estimated_tircp_ghg_reductions,project_estimated_tircp_ghg_reductions2,project_increased_ridership,project_service_integration,project_improve_safety,project_project_readiness,project_funding_leverage,project_multi_agency_coordination_integration,project_ab_1550_community_benefits,project_housing_co_benefits,project_status,allocation_award_year,allocation_project_#,allocation_implementing_agency,allocation_ppno,allocation_components,allocation_phase,allocation_allocation_amount,allocation_expended_amount,allocation_sb1_funding,allocation_ggrf_funding,allocation_allocation_date,allocation_grant_recipient,award_year,project_#,grant_recipient,project_title,ppno,county,caltransdistrict,assembly_districts,congressional_districts,senate_districts,city_code,city_agency_id_,county_code,county_agency_id_,_implementing_agency__id_,_merge,project_id,Award Year: Project Number,fiscal_year
0,2015,1,Antelope Valley Transit Authority,Regional Transit Interconnectivity & Environmental Sustability 2015:01,Purchase 13 60-foot articulated BRT buses and 16 45-foot electric commuter buses,CP005,39478000.0,24403000.0,24403000.0,N,Y,N,0.0,0.0,"195,380 MTCO2",,,,,,,,,,In Construction,2015.0,1.0,Antelope Valley Transit Authority,CP005,Purchase 13 60-foot articulated BRT buses and 16 45-foot electric commuter buses,CONST,24403000.0,23400943.0,0.0,24403000.0,2015-10-22,Antelope Valley Transit Authority,2015.0,1.0,Antelope Valley Transit Authority (AVTA),Regional Transit Interconnectivity & Environmental Sustability,CP005,LA,|07|,|36|,,|21|,|5419|5378|,,|5953|,,|6166|,both,201501,2015:01,2015.0


#### Columns

In [45]:
# Keep the columns I want.
cols_right_order = [
    "Award Year: Project Number",
    "project_id",
    "project_ppno",
    "project_project_title",
    "project_status",
    "project_project_description",
    "allocation_components",
    "allocation_phase",
    "project_tircp_award_amount__$_",
    "allocation_sb1_funding",
    "allocation_ggrf_funding",
    "allocation_allocation_amount",
    "project_total_project_cost",
    "fiscal_year",
    "project_is_sb1?",
    "project_is_ggrf?",
    "project_is_iija?",
    "project_on_shs?",
    "project_calitp",
    "project_estimated_tircp_ghg_reductions",
    "project_estimated_tircp_ghg_reductions2",
    "project_increased_ridership",
    "project_service_integration",
    "project_improve_safety",
    "project_project_readiness",
    "project_funding_leverage",
    "project_multi_agency_coordination_integration",
    "project_ab_1550_community_benefits",
    "project_housing_co_benefits",
    "caltransdistrict",
    "assembly_districts",
    "senate_districts",
    "city_code",
    "city_agency_id_",
    "county_code",
    "county_agency_id_",
    "_implementing_agency__id_",
    "allocation_implementing_agency",
]

In [46]:
project_table2 = project_table[cols_right_order]

In [47]:
project_table2.columns

Index(['Award Year: Project Number', 'project_id', 'project_ppno',
       'project_project_title', 'project_status',
       'project_project_description', 'allocation_components',
       'allocation_phase', 'project_tircp_award_amount__$_',
       'allocation_sb1_funding', 'allocation_ggrf_funding',
       'allocation_allocation_amount', 'project_total_project_cost',
       'fiscal_year', 'project_is_sb1?', 'project_is_ggrf?',
       'project_is_iija?', 'project_on_shs?', 'project_calitp',
       'project_estimated_tircp_ghg_reductions',
       'project_estimated_tircp_ghg_reductions2',
       'project_increased_ridership', 'project_service_integration',
       'project_improve_safety', 'project_project_readiness',
       'project_funding_leverage',
       'project_multi_agency_coordination_integration',
       'project_ab_1550_community_benefits', 'project_housing_co_benefits',
       'caltransdistrict', 'assembly_districts', 'senate_districts',
       'city_code', 'city_agency_id_', 

##### Create Missing Columns

In [48]:
project_table2["State_SB_1_Program_Code"] = "TIRCP"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2["State_SB_1_Program_Code"] = "TIRCP"


In [49]:
for i in ["IIJA_Program_Code", "IIJA_Funds", "General_Fund_Auxiliary_Funds"]:
    project_table2[i] = "N/A"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2[i] = "N/A"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2[i] = "N/A"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2[i] = "N/A"


In [50]:
# Clean up column names
project_table2 = A1_data_prep.clean_up_columns(project_table2)

In [51]:
project_table2 = project_table2.rename(
    columns={
        "Id": "Project ID",
        "Title": "Project Title",
        "Description": "Project Description",
        "Component": "TIRCP Component",
        "Phase": "Phase",
        "Sb1 Funding": "SB1 Funds",
        "Amount": "Allocation Amount",
        "Caltransdistrict": "Caltrans District",
    }
)

#### Final Checks

In [52]:
project_table2.Ppno.nunique() == df_project.project_ppno.nunique()

True

#### Groupby/Formatting V1 
* Splits out GGRF/SB1 by phase and components.

In [53]:
project_table2 = project_table2.fillna(
    project_table2.dtypes.replace({"float64": 0.0, "object": "None", "int64": 0})
)

In [54]:
project_table2["Components_Cumulative"] = project_table2["Components"]

In [55]:
# Append number of times a component repeats by project ID & status
# Because components-status combination appears more than once for some projects
# And the monetary info is summed up unintentionally in the group by step below.
# https://stackoverflow.com/questions/57605705/how-to-append-counter-number-to-each-repeated-string-value-in-pandas-column
project_table2["Components_Cumulative"] += (
    project_table2.groupby(["Components_Cumulative", "Project ID", "Phase"])
    .cumcount()
    .add(1)
    .astype(str)
)

In [56]:
# project_table2[["Project ID","Project Status",'Components']]

#### Groupby
* Splits out SB1 and GGRF funding by phases. 

In [57]:
groupby_cols_1 = [
    "Award Year: Project Number",
    "Project ID",
    "Ppno",
    "Project Title",
    "Status",
    "Project Description",
    'State Sb 1 Program Code',
    "Iija Program Code",
    "Tircp Award Amount  $",
    "Total  Cost",
    "Is Sb1?",
    "Is Ggrf?",
    "Is Iija?",
    "On Shs?",
    "Calitp",
    "Estimated Tircp Ghg Reductions",
    "Estimated Tircp Ghg Reductions2",
    "Increased Ridership",
    "Service Integration",
    "Improve Safety",
    "Readiness",
    "Funding Leverage",
    "Multi Agency Coordination Integration",
    "Ab 1550 Community Benefits",
    "Housing Co Benefits",
    "Caltrans District",
    "Assembly Districts",
    "Senate Districts",
    "City Code",
    "City Agency Id",
    "County Code",
    "County Agency Id",
    "Implementing Agency  Id",
    "Implementing Agency",
    "Components_Cumulative",
    "Phase",
]

In [58]:
sum_cols = ["SB1 Funds", "Ggrf Funding" ]

In [59]:
max_cols = [
    "Fiscal Year",
    "Iija Funds",
    "General Fund Auxiliary Funds",
]

In [60]:
grouped1 = project_table2.groupby(groupby_cols_1).agg(
    {**{e: "sum" for e in sum_cols}, **{e: "max" for e in max_cols}}
)

#### Groupby/Formatting V2 
* Aggregates GGRF and SB1 by project instead of phase/components.

In [61]:
# Took out phase
groupby_cols_2 = [
    "Award Year: Project Number",
    "Project ID",
    "Ppno",
    "Project Title",
    "Status",
    "Project Description",
    'State Sb 1 Program Code',
    "Iija Program Code",
    "Tircp Award Amount  $",
    "Total  Cost",
    "Is Sb1?",
    "Is Ggrf?",
    "Is Iija?",
    "On Shs?",
    "Calitp",
    "Estimated Tircp Ghg Reductions",
    "Estimated Tircp Ghg Reductions2",
    "Increased Ridership",
    "Service Integration",
    "Improve Safety",
    "Readiness",
    "Funding Leverage",
    "Multi Agency Coordination Integration",
    "Ab 1550 Community Benefits",
    "Housing Co Benefits",
    "Caltrans District",
    "Assembly Districts",
    "Senate Districts",
    "City Code",
    "City Agency Id",
    "County Code",
    "County Agency Id",
    "Implementing Agency  Id",
    "Implementing Agency",

    
]

In [62]:
grouped2 = project_table2.groupby(groupby_cols_2).agg(
    {**{e: "sum" for e in sum_cols}, **{e: "max" for e in max_cols}}
)

#### Save

In [63]:
#with pd.ExcelWriter(f"{A1_data_prep.GCS_FILE_PATH}gis_template.xlsx") as writer:
  #  grouped1.to_excel(writer, sheet_name="Projects Table V1", index=True)
  #  grouped2.to_excel(writer, sheet_name="Projects Table V2", index=True)