## SB1 GIS Template
* Populate TIRCP GIS Template to create a map for TIRCP only projects.

In [1]:
import A1_data_prep
import A2_tableau
import numpy as np
import pandas as pd
from babel.numbers import format_currency
from calitp import *

In [2]:
pd.options.display.max_columns = 100
pd.options.display.float_format = "{:.2f}".format
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Load in sheets

#### Project

In [3]:
df_project = A1_data_prep.clean_project()

  warn(msg)


In [4]:
project_cols_wanted = [
    "project_award_year",
    "project_project_#",
    "project_grant_recipient",
    "project_project_title",
    "project_project_description",
    "project_ppno",
    "project_district",
    "project_county",
    "project_is_sb1?",
    "project_is_ggrf?",
    "project_is_iija?",
    "project_on_shs?",
]

In [5]:
df_project2 = df_project[project_cols_wanted]

#### Allocation

In [6]:
df_alloc = A1_data_prep.clean_allocation()

In [7]:
# Subset
alloc_cols_wanted = [
    "allocation_award_year",
    "allocation_project_#",
    "allocation_grant_recipient",
    "allocation_implementing_agency",
    "allocation_ppno",
    "allocation_components",
    "allocation_phase",
    "allocation_allocation_amount",
    "allocation_expended_amount",
    "allocation_sb1_funding",
    "allocation_allocation_date",
]

In [8]:
df_alloc2 = df_alloc[alloc_cols_wanted]

#### GIS

In [9]:
df_gis = A1_data_prep.load_gis()

### Populate GIS Template
* With the following columns
    * ProjectID	
    * PPNO 	
    * ProjectTitle
    * ProjectDescription	
    * TIRCP Component	
    * ProjectStatus	
    * SB1 Funds	
    * Total Project Cost	
    * Fiscal Year	
    * Allocated	
    * Is SB1?	
    * ON SHS?	
    * "Caltrans District"
    * "Assembly Districts"	
    * "Senate Districts"	
    * City Code	
    * City Agency ID 	
    * County Code	
    * County Agency ID 	 
    * Implementing Agency  ID 
 

#### Merge sheets

In [10]:
# Merge project & allocation sheets first
m1 = pd.merge(
    df_project2,
    df_alloc2,
    how="outer",
    left_on=["project_award_year", "project_ppno"],
    right_on=["allocation_award_year", "allocation_ppno"],
    indicator=True,
)

In [11]:
m1._merge.value_counts()

both          429
left_only      25
right_only      0
Name: _merge, dtype: int64

In [12]:
m1.shape

(454, 24)

In [13]:
# Check that left only values are for Cycle 5.
# m1.loc[m1['_merge'] == 'left_only']

In [14]:
# Drop merge
m1 = m1.drop(columns=["_merge"])

In [15]:
# Merge m1 with df_gis
m2 = pd.merge(
    m1,
    df_gis,
    how="outer",
    left_on=["project_award_year", "project_project_title"],
    right_on=["award_year", "project_title"],
    indicator=True,
)

In [16]:
m2._merge.value_counts()

both          454
left_only       0
right_only      0
Name: _merge, dtype: int64

#### Create sheet

In [17]:
# Subset columns
cols_wanted = [
    "project_award_year",
    "project_project_#",
    "project_ppno",
    "project_project_title",
    "project_project_description",
    "project_grant_recipient",
    "allocation_phase",
    "allocation_components",
    "allocation_sb1_funding",
    "allocation_allocation_amount",
    "allocation_allocation_date",
    "project_on_shs?",
    "allocation_phase",
    "caltransdistrict",
    "assembly_districts",
    "congressional_districts",
    "senate_districts",
    "city_code",
    "city_agency_id_",
    "county_code",
    "county_agency_id_",
    "_implementing_agency__id_",
]

In [18]:
gis_sheet = m2[cols_wanted]

#### Create Project ID
GIS program has to have a ten digit ID for the project ID.  Can you convert the project ID’s as follows:
* (four zeros, cycle year, two digit project number) 2015:01 = 0000201501


In [19]:
gis_sheet = gis_sheet.rename(columns={"project_project_#": "project_number"})

In [20]:
# Pad single digits with a 0.
# https://stackoverflow.com/questions/67401497/add-a-zero-before-1-digit-number-in-a-column-pandas-dataframe
gis_sheet["project_number"] = gis_sheet.project_number.astype(str).str.zfill(2)

In [21]:
gis_sheet["project_id"] = gis_sheet["project_award_year"].astype(str) + gis_sheet[
    "project_number"
].astype(str)

In [22]:
gis_sheet["project_id"] = "0000" + gis_sheet["project_id"]

#### Edit Title to include Project Year & Number

In [23]:
# Create a 2nd project identifier with year + project numer
gis_sheet['project_year_number'] = gis_sheet["project_award_year"].astype(str) + ":" + gis_sheet["project_number"]


In [24]:
gis_sheet["project_project_title"] = gis_sheet["project_project_title"] + ' ' +  gis_sheet["project_year_number"]

#### Extract Fiscal Year

In [25]:
gis_sheet["fiscal_year"] = gis_sheet["allocation_allocation_date"].dt.year

#### SB1 and Allocated funding

In [26]:
def sb1funding(row):
    if row.allocation_sb1_funding <= 0:
        return "No"
    else:
        return "Yes"

In [27]:
gis_sheet["is_sb1_funded"] = gis_sheet.apply(lambda x: sb1funding(x), axis=1)

In [28]:
def is_allocated(row):
    if row.allocation_allocation_amount <= 0:
        return "No"
    else:
        return "Yes"

In [29]:
gis_sheet["allocated"] = gis_sheet.apply(lambda x: is_allocated(x), axis=1)

#### Final Clean Up

In [30]:
# Arrange cols in right order
cols_right_order = [
    "project_year_number",
    "project_id",
    "project_ppno",
    "project_project_title",
    "project_project_description",
    "allocation_components",
    "allocation_phase",
    "allocation_sb1_funding",
    "allocation_allocation_amount",
    "fiscal_year",
    "allocated",
    "is_sb1_funded",
    "project_on_shs?",
    "caltransdistrict",
    "assembly_districts",
    "senate_districts",
    "city_code",
    "city_agency_id_",
    "county_code",
    "county_agency_id_",
    "_implementing_agency__id_",
]

In [31]:
final_gis = gis_sheet[cols_right_order]

In [32]:
final_gis = A1_data_prep.clean_up_columns(final_gis)

In [33]:
final_gis = final_gis.rename(
    columns={
        "Id": "Project ID",
        "Title": "Project Title",
        "Description": "Project Description",
        "Component": "TIRCP Component",
        "Phase": "Project Status",
        "Sb1 Funding": "SB1 Funds",
        "Amount": "Total Project Cost",
        "Caltransdistrict": "Caltrans District",
    }
)

In [34]:
final_gis = final_gis.fillna(
   gis_sheet.dtypes.replace({"float64": 0.0, "object": "None", "int64": 0})
 )

In [35]:
final_gis.sample(1)

Unnamed: 0,Year Number,Project ID,Ppno,Project Title,Project Description,Components,Project Status,Project Status.1,SB1 Funds,Total Project Cost,Fiscal Year,Allocated,Is Sb1 Funded,On Shs?,Caltrans District,Assembly Districts,Senate Districts,City Code,City Agency Id,County Code,County Agency Id,Implementing Agency Id
129,2018:12,201812,CP031,Building Up: LOSSAN North Improvement Program 2018:12,"Improve on-time performance and rail corridor capacity for Pacific Surfliner and Coaster trains by investing in signal optimization, a more robust capital maintenance program and new right of way fencing. These projects prepare the corridor for higher frequency services on the Pacific Surfliner and COASTER. Also includes study of San Diego maintenance/layover facility relocation.",On-Time Performance Incentive Program for Service Improvements,CONST,CONST,1274000.0,2299000.0,2018.0,Yes,Yes,,|05|07|08| 11| 12|,|35|37|38|39|40|41|43|44|45|46|47|48|50|51|52|53|54|55|57|58|59|60|61|62|63|64|65|66|67|68|69|70|72|73|74|75|76|77|78|79|80|,|18|19|20|21|22|23|24|25|26|27|29|30|31|32|33|34|35|36|37|38|39|40|,|5016|5007|5129|5405|5006|5133|5055|5410|5372|5079|5445|5004|,,|5953|5955|5957|5949|5951|5952|,,|0295|


#### Save

In [36]:
with pd.ExcelWriter(f"{A1_data_prep.GCS_FILE_PATH}gis_template.xlsx") as writer:
    final_gis.to_excel(writer, sheet_name="Projects Table", index=False)
