## SB1 GIS Template
* Populate TIRCP GIS Template to create a map for TIRCP only projects.

In [1]:
import A1_data_prep
import A2_tableau
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp import *

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Load in sheets

In [3]:
df1 = A2_tableau.tableau_dashboard()

  warn(msg)


In [5]:
df1.groupby(['Award Year']).agg({'Tircp':'sum',
       'Allocated Amount':'sum','Expended Amount':'sum'})

Unnamed: 0_level_0,Tircp,Allocated Amount,Expended Amount
Award Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2015,224278000.0,224278000,211472239.88
2016,390893000.0,327200000,210202526.62
2018,4325000000.0,1693434000,403221626.39
2020,500000000.0,82360000,29270980.28
2022,762637000.0,0,0.0


In [6]:
df1.Progress.value_counts()

No expenditures recorded         46
100% of allocated funds spent    17
Behind                           16
On Track                         12
Ahead                             5
Name: Progress, dtype: int64

In [9]:
df1[['Tircp','Expended Percent Group','Expended Amount', 'Progress']]

Unnamed: 0,Tircp,Expended Percent Group,Expended Amount,Progress
0,24403000.0,100,23400943.53,100% of allocated funds spent
1,4620000.0,100,4619999.9,100% of allocated funds spent
2,38494000.0,100,38494000.0,100% of allocated funds spent
3,1675000.0,26-50,648625.56,Behind
4,10000000.0,100,10000000.0,100% of allocated funds spent
5,2320000.0,100,2320000.0,100% of allocated funds spent
6,6427000.0,100,6315208.89,100% of allocated funds spent
7,4000000.0,100,4000000.0,100% of allocated funds spent
8,31936000.0,100,31936000.0,100% of allocated funds spent
9,41181000.0,100,41181000.0,100% of allocated funds spent


#### Project

In [3]:
df_project = A1_data_prep.clean_project()

  warn(msg)


In [4]:
project_cols_wanted = [
    "project_award_year",
    "project_project_#",
    "project_grant_recipient",
    "project_project_title",
    "project_project_description",
    "project_ppno",
    "project_total_project_cost",
    "project_tircp_award_amount__$_",
    "project_is_sb1?",
    "project_is_ggrf?",
    "project_is_iija?",
    "project_on_shs?",
    "project_calitp",
    "project_estimated_tircp_ghg_reductions",
    "project_estimated_tircp_ghg_reductions2",
    "project_increased_ridership",
    "project_service_integration",
    "project_improve_safety",
    "project_project_readiness",
    "project_funding_leverage",
    "project_multi_agency_coordination_integration",
    "project_ab_1550_community_benefits",
    "project_housing_co_benefits",
]

In [5]:
df_project2 = df_project[project_cols_wanted]

#### Allocation

In [6]:
df_alloc = A1_data_prep.clean_allocation()

In [7]:
# Subset
alloc_cols_wanted = [
    "allocation_award_year",
    "allocation_project_#",
    "allocation_implementing_agency",
    "allocation_ppno",
    "allocation_components",
    "allocation_phase",
    "allocation_allocation_amount",
    "allocation_expended_amount",
    "allocation_sb1_funding",
    "allocation_ggrf_funding",
    "allocation_allocation_date",
]

In [8]:
df_alloc2 = df_alloc[alloc_cols_wanted]

#### GIS

In [9]:
df_gis = A1_data_prep.load_gis()

### Populate GIS Template
* With the following columns
    * Award Year: Project Number
    * Project Id
    * Project TItle 
    * Project Description	
    * State/SB 1 Program Code	
    * IIJA Program Code	
    * Project Status	
    * SB1 Funds	
    * GGRF Funds	
    * IIJA Funds	
    * General Fund Auxilary Funds	
    * TIRCP Award Amount	
    * Total Project Cost	
    * Fiscal Year	
    * Is SB1?	
    * Is GGRF?	
    * Is IIJA?	
    * ON SHS?	
    * CalITP	
    * Estimated TIRCP GHG Reductions	
    * Increased Ridership	
    * Service Integration	
    * Improved Safety	
    * Project Readiness	
    * Multi-Agency Coordination	
    * AB 1550 Community Benefits	
    * Housing Co-Benefits	
    * Caltrans District	
    * Assembly District	
    * Senate District	Congressional District	
    * City Code	
    * City Agency ID 	
    * County Code	
    * County Agency ID 	
    * Implementing Agency  ID 	
    * Implementing Agency Name

#### Merge sheets

In [10]:
# Merge project & allocation sheets first
m1 = pd.merge(
    df_project2,
    df_alloc2,
    how="outer",
    left_on=["project_award_year", "project_ppno"],
    right_on=["allocation_award_year", "allocation_ppno"],
    indicator=True,
)

In [11]:
# Left only is 2022 projects that haven't begun
m1._merge.value_counts()

both          429
left_only      25
right_only      0
Name: _merge, dtype: int64

In [12]:
m1.shape

(454, 35)

In [13]:
# Drop merge
m1 = m1.drop(columns=["_merge"])

In [14]:
# Merge m1 with df_gis
m2 = pd.merge(
    m1,
    df_gis,
    how="outer",
    left_on=["project_award_year", "project_project_title"],
    right_on=["award_year", "project_title"],
    indicator=True,
)

In [15]:
m2._merge.value_counts()

both          454
left_only       0
right_only      0
Name: _merge, dtype: int64

#### Create sheet

##### Create Project ID
GIS program has to have a ten digit ID for the project ID.  Can you convert the project ID’s as follows:
* (four zeros, cycle year, two digit project number) 2015:01 = 0000201501


In [16]:
project_table = m2.rename(columns={"project_project_#": "project_number"})

In [17]:
# Pad single digits with a 0.
# https://stackoverflow.com/questions/67401497/add-a-zero-before-1-digit-number-in-a-column-pandas-dataframe
project_table["project_number"] = project_table.project_number.astype(str).str.zfill(2)

In [18]:
project_table["project_id"] = project_table["project_award_year"].astype(
    str
) + project_table["project_number"].astype(str)

In [19]:
project_table["project_id"] = "0000" + project_table["project_id"]

##### Edit Title to include Project Year & Number

In [20]:
# Create a new column: with year + project numer
project_table["Award Year: Project Number"] = (
    project_table["project_award_year"].astype(str)
    + ":"
    + project_table["project_number"]
)

In [21]:
# Edit title with new
project_table["project_project_title"] = (
    project_table["project_project_title"]
    + " "
    + project_table["Award Year: Project Number"]
)

#### Extract Fiscal Year

In [22]:
project_table["fiscal_year"] = project_table["allocation_allocation_date"].dt.year

#### Columns

In [23]:
# Keep the columns I want.
cols_right_order = [
    "Award Year: Project Number",
    "project_id",
    "project_ppno",
    "project_project_title",
    "project_project_description",
    "allocation_components",
    "allocation_phase",
    "project_tircp_award_amount__$_",
    "allocation_sb1_funding",
    "allocation_ggrf_funding",
    "allocation_allocation_amount",
    "project_total_project_cost",
    "fiscal_year",
    "project_is_sb1?",
    "project_is_ggrf?",
    "project_is_iija?",
    "project_on_shs?",
    "project_calitp",
    "project_estimated_tircp_ghg_reductions",
    "project_estimated_tircp_ghg_reductions2",
    "project_increased_ridership",
    "project_service_integration",
    "project_improve_safety",
    "project_project_readiness",
    "project_funding_leverage",
    "project_multi_agency_coordination_integration",
    "project_ab_1550_community_benefits",
    "project_housing_co_benefits",
    "caltransdistrict",
    "assembly_districts",
    "senate_districts",
    "city_code",
    "city_agency_id_",
    "county_code",
    "county_agency_id_",
    "_implementing_agency__id_",
    "allocation_implementing_agency",
]

In [24]:
project_table2 = project_table[cols_right_order]

##### Create Missing Columns

In [25]:
project_table2["State_SB_1_Program_Code"] = "TIRCP"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2["State_SB_1_Program_Code"] = "TIRCP"


In [26]:
for i in ["IIJA_Program_Code", "IIJA_Funds", "General_Fund_Auxiliary_Funds"]:
    project_table2[i] = "N/A"

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2[i] = "N/A"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2[i] = "N/A"
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  project_table2[i] = "N/A"


In [27]:
# Clean up column names
project_table2 = A1_data_prep.clean_up_columns(project_table2)

In [28]:
project_table2 = project_table2.rename(
    columns={
        "Id": "Project ID",
        "Title": "Project Title",
        "Description": "Project Description",
        "Component": "TIRCP Component",
        "Phase": "Project Status",
        "Sb1 Funding": "SB1 Funds",
        "Amount": "Allocation Amount",
        "Caltransdistrict": "Caltrans District",
    }
)

#### Final Checks

In [29]:
project_table2.Ppno.nunique() == df_project.project_ppno.nunique()

True

#### Groupby/Formatting

In [30]:
project_table2 = project_table2.fillna(
    project_table2.dtypes.replace({"float64": 0.0, "object": "None", "int64": 0})
)

In [31]:
# Append number of times a component repeats by project ID & status
# Because components-status combination appears more than once for some projects
# And the monetary info is summed up unintentionally in the group by step below.
# https://stackoverflow.com/questions/57605705/how-to-append-counter-number-to-each-repeated-string-value-in-pandas-column
project_table2["Components"] += (
    project_table2.groupby(["Components", "Project ID", "Project Status"])
    .cumcount()
    .add(1)
    .astype(str)
)

In [32]:
# project_table2[["Project ID","Project Status",'Components']]

#### Groupby

In [33]:
groupby_cols = [
    "Award Year: Project Number",
    "Project ID",
    "Ppno",
    "Project Title",
    "Project Description",
    'State Sb 1 Program Code',
    "Iija Program Code",
    "Tircp Award Amount  $",
    "Total  Cost",
    "Is Sb1?",
    "Is Ggrf?",
    "Is Iija?",
    "On Shs?",
    "Calitp",
    "Estimated Tircp Ghg Reductions",
    "Estimated Tircp Ghg Reductions2",
    "Increased Ridership",
    "Service Integration",
    "Improve Safety",
    "Readiness",
    "Funding Leverage",
    "Multi Agency Coordination Integration",
    "Ab 1550 Community Benefits",
    "Housing Co Benefits",
    "Caltrans District",
    "Assembly Districts",
    "Senate Districts",
    "City Code",
    "City Agency Id",
    "County Code",
    "County Agency Id",
    "Implementing Agency  Id",
    "Implementing Agency",
    "Components",
    "Project Status",
]

In [34]:
sum_cols = ["SB1 Funds", "Ggrf Funding" ]

In [35]:
max_cols = [
    "Fiscal Year",
    "Iija Funds",
    "General Fund Auxiliary Funds",
]

In [36]:
grouped = project_table2.groupby(groupby_cols).agg(
    {**{e: "sum" for e in sum_cols}, **{e: "max" for e in max_cols}}
)

In [37]:
grouped.sample(1)

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,Unnamed: 9_level_0,Unnamed: 10_level_0,Unnamed: 11_level_0,Unnamed: 12_level_0,Unnamed: 13_level_0,Unnamed: 14_level_0,Unnamed: 15_level_0,Unnamed: 16_level_0,Unnamed: 17_level_0,Unnamed: 18_level_0,Unnamed: 19_level_0,Unnamed: 20_level_0,Unnamed: 21_level_0,Unnamed: 22_level_0,Unnamed: 23_level_0,Unnamed: 24_level_0,Unnamed: 25_level_0,Unnamed: 26_level_0,Unnamed: 27_level_0,Unnamed: 28_level_0,Unnamed: 29_level_0,Unnamed: 30_level_0,Unnamed: 31_level_0,Unnamed: 32_level_0,Unnamed: 33_level_0,Unnamed: 34_level_0,SB1 Funds,Ggrf Funding,Allocation Amount,Fiscal Year,Iija Funds,General Fund Auxiliary Funds
Award Year: Project Number,Project ID,Ppno,Project Title,Project Description,State Sb 1 Program Code,Iija Program Code,Tircp Award Amount $,Total Cost,Is Sb1?,Is Ggrf?,Is Iija?,On Shs?,Calitp,Estimated Tircp Ghg Reductions,Estimated Tircp Ghg Reductions2,Increased Ridership,Service Integration,Improve Safety,Readiness,Funding Leverage,Multi Agency Coordination Integration,Ab 1550 Community Benefits,Housing Co Benefits,Caltrans District,Assembly Districts,Senate Districts,City Code,City Agency Id,County Code,County Agency Id,Implementing Agency Id,Implementing Agency,Components,Project Status,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1
2018:27,201827,CP033,Southern California Optimized Rail Expansion (SCORE) 2018:27,"Delivers more frequent, more reliable rail services throughout Southern California, with station reconfiguration with run-though tracks for Metrolink and Pacific Surfliner trains at Los Angeles Union Station to improve train movement through the station, and 30-min services on multiple Metrolink corridors in the LA Basin. Includes significant investments to improve the frequency and performance of rail services to Moorpark, Santa Clarita, San Bernardino, Riverside, and Orange County. Part of a high-performance long-range vision.",TIRCP,,875708000.0,2049700000.0,Y,Y,N,0.0,0.0,"5,714,000 MTCO2e",High,High,High,High,Medium,Medium-High,High,Medium-High,,|07|08|11|12|,|36|37|38|39|40|41|42|43|44|45|46|47|48|50|51|52|53|54|55|56|57|58|59|60|61|62|63|64|65|66|67|68|69|70|71|72|73|74|75|76|77|78|79|80|,|16|18|19|20|21|22|23|24|25|26|27|28|29|30|31|32|33|34|35|36|37|38|39|40|,|5004|5445|5079|5274|5458|5451|5410|5271|5063|5073|5055|5104|5198|5487|5092|5070|5342|5247|5362|5133|5310|5347|5340|5006|5210|5323|5118|5070|5162|5326|5147|5420|5307|5205|5033|5144|5200|5202|5450|5378|5419|5405|5436|5393|5129|5026|5397|5007|5481|5380|,0.0,|5953|5955|5956|5954|5952|5957|,0.0,|6187|,Southern California Regional Rail Authority,El Monte Station Area Grade Crossing Safety Improvements Located in Los Angeles County1,CONST,0.0,0.0,0.0,0.0,,


#### Save

with pd.ExcelWriter(f"{A1_data_prep.GCS_FILE_PATH}gis_template.xlsx") as writer:
    grouped.to_excel(writer, sheet_name="Projects Table", index=True)
    