## Add SB1 geographic information.

In [None]:
import _utils
import geopandas as gpd
import numpy as np
import pandas as pd
from calitp.sql import to_snakecase

In [None]:
import fsspec
from calitp import *
from calitp.storage import get_fs

fs = get_fs()
import os

In [None]:
pd.options.display.max_columns = 200
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)

### Non SHOPP-ATP-TIRCP

In [None]:
# Read in 10 Year non SHOPP with ATP and TIRCP
nonshopp = to_snakecase(
    pd.read_excel(f"{_utils.GCS_FILE_PATH}cleaned_data_atp_tircp.xlsx")
)

In [None]:
# Subset to join.
non_shopp_subset = [
    "ppno",
    "ct_project_id",
    "ea",
    "project_name",
    "lead_agency",
    "previous_caltrans_nominations",
    "full_county_name",
    "district",
    "project_description",
    "current_phase",
    "primary_mode",
    "urban_rural",
    "total_project_cost__$1,000",
    "total_unfunded_need__$1,000",
    "notes",
    "shs_capacity_increase_detail",
    "current_phase",
]

In [None]:
nonshopp = nonshopp[non_shopp_subset]

In [None]:
nonshopp.district = nonshopp.district.map("{:02}".format)

### Sb1 Geo
* https://odpsvcs.dot.ca.gov/arcgis/rest/services/RCA/RCA_Projects_032022/FeatureServer

#### Step 1: Read in all projects
* Compare with CSV.
* Clean it up.

In [None]:
sb1_all_projects_url = "https://odpsvcs.dot.ca.gov/arcgis/rest/services/RCA/RCA_Projects_032022/FeatureServer/22/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*+&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=&resultOffset=&resultRecordCount=&returnTrueCurves=false&sqlFormat=none&f=geojson"

In [None]:
# Read in SB1 csv
# sb1_csv = to_snakecase(pd.read_csv(f"{_utils.GCS_FILE_PATH}RebuildingCA_map_Data.csv"))

In [None]:
sb1_all_projects = to_snakecase(gpd.read_file(sb1_all_projects_url))

In [None]:
# Same rows, different columns.
# sb1_all_projects.shape, sb1_csv.shape

In [None]:
# Project ID matches
# csv_projectid = set(sb1_csv.project_id.unique().tolist())
# geojson_projectid = set(sb1_all_projects.projectid.unique().tolist())
# csv_projectid - geojson_projectid

In [None]:
# set(sb1_all_projects.columns).difference(set(sb1_csv.columns))

In [None]:
subset = [
    "projectid",
    "agencyids",
    "agencies",
    "programcodes",
    "fiscalyears",
    "projectstatuses",
    "sb1funds",
    "iijafunds",
    "totalcost",
    "assemblydistricts",
    "senatedistricts",
    "congressionaldistricts",
    "countynames",
    "citynames",
    "ct_districts",
    "issb1codes",
    "isiijacode",
    "isonshscodes",
    "geometry",
    "projecttitle",
    "projectdescription",
]

In [None]:
# Align funding
sb1_all_projects = _utils.align_funding_numbers(
    sb1_all_projects,
    [
        "totalcost",
        "sb1funds",
    ],
)

In [None]:
# Lower case and clean project names
sb1_all_projects.projecttitle = (
    sb1_all_projects.projecttitle.str.lower().str.strip().str.split("20").str[0]
)

In [None]:
# Get rid of |
for i in ["programcodes", "issb1code", "projecttitle", "isiijacode", "isonshscode"]:
    sb1_all_projects[i] = sb1_all_projects[i].str.replace("|", "")

In [None]:
# No geometry, just drop it
sb1_all_projects = sb1_all_projects.drop(columns=["geometry"])

In [None]:
full_gdf2.programcodes.unique()

In [None]:
tircp_atp = full_gdf2.loc[full_gdf2.programcodes.str.contains("TIRCP|ATP")].reset_index(
    drop=True
)

In [None]:
tircp_atp_nonshopp = nonshopp.loc[
    nonshopp.previous_caltrans_nominations.str.contains("TIRCP|ATP")
].reset_index(drop=True)

In [None]:
def clean_project_id(df, project_id_col: str):
    df[project_id_col] = df[project_id_col].str.replace("'", "").str.lower().str.strip()
    return df

In [None]:
# Fill in NA
# sb1_2 = sb1_2.fillna(sb1_2.dtypes.replace({"float64": 0.0, "object": "None"}))

#### Step 2: Read in files with geometry 

In [None]:
# Stopped at Trade Corridor Enhancement Program Pt for testing
url_list = [
    "https://odpsvcs.dot.ca.gov/arcgis/rest/services/RCA/RCA_Projects_032022/FeatureServer/1/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*+&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=&resultOffset=&resultRecordCount=&returnTrueCurves=false&sqlFormat=none&f=geojson",
    "https://odpsvcs.dot.ca.gov/arcgis/rest/services/RCA/RCA_Projects_032022/FeatureServer/2/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*+&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=&resultOffset=&resultRecordCount=&returnTrueCurves=false&sqlFormat=none&f=geojson",
    "https://odpsvcs.dot.ca.gov/arcgis/rest/services/RCA/RCA_Projects_032022/FeatureServer/3/query?where=1%3D1&objectIds=&time=&geometry=&geometryType=esriGeometryEnvelope&inSR=&spatialRel=esriSpatialRelIntersects&distance=&units=esriSRUnit_Foot&relationParam=&outFields=*+&returnGeometry=true&maxAllowableOffset=&geometryPrecision=&outSR=&gdbVersion=&historicMoment=&returnDistinctValues=false&returnIdsOnly=false&returnCountOnly=false&returnExtentOnly=false&orderByFields=&groupByFieldsForStatistics=&outStatistics=&returnZ=false&returnM=false&multipatchOption=&resultOffset=&resultRecordCount=&returnTrueCurves=false&sqlFormat=none&f=geojson",
]

In [None]:
full_gdf = pd.DataFrame()

In [None]:
for i in url_list:
    df = to_snakecase(gpd.read_file(i))[
        ["object_id", "projectid", "projecttitle", "geometry", "programcodes"]
    ]
    full_gdf = pd.concat([full_gdf, df], axis=0)

In [None]:
full_gdf.shape

In [None]:
sb1_project_id = clean_project_id(sb1_project_id, "project_id")

In [None]:
full_gdf2 = clean_project_id(full_gdf2, "projectid")

In [None]:
full_gdf2.projectid.nunique(), sb1_project_id.project_id.nunique(), len(sb1_project_id)

In [None]:
pd.merge(
    sb1_project_id,
    full_gdf2,
    how="outer",
    left_on=["project_id"],
    right_on=["projectid"],
    indicator=True,
)[["_merge"]].value_counts()

In [None]:
sb1_m = pd.merge(
    full_gdf2,
    sb1_project_id,
    how="left",
    left_on=["projectid"],
    right_on=["project_id"],
)

In [None]:
sb1_m.project_name = sb1_m.project_name.fillna(sb1_m.projecttitle)

In [None]:
pd.merge(nonshopp, sb1_m, how="outer", on=["project_name"], indicator=True)[
    ["_merge"]
].value_counts()

In [None]:
sb1.shape, sb1.project_name.nunique()

In [None]:
# Titles are not necessarily specifically named.
# Tends to be very general
# sb1.project_name.value_counts()

In [None]:
sb1_subset = [
    "project_name",
    "implementing_agency",
    "sb1_program",
    "fiscal_year",
    "project_description",
    "total_cost",
    "sb1_funds",
    "is_sb1?",
    "project_status",
    "assembly_districts",
    "senate_districts",
    "congressional_districts",
    "counties",
    "cities",
    "caltrans_districts",
    "on_shs?",
]

In [None]:
sb1_2 = sb1[sb1_subset]

In [None]:
sb1.sb1_program.value_counts()

In [None]:
sb1_2.shape

In [None]:
# Fill in NA
sb1_2 = sb1_2.fillna(sb1_2.dtypes.replace({"float64": 0.0, "object": "None"}))

In [None]:
# Align funding
sb1_2 = _utils.align_funding_numbers(
    sb1_2,
    [
        "total_cost",
        "sb1_funds",
    ],
)

### Merge 9 Sample Non SHOPP with Geojson

In [None]:
nine_projects_names = [
    "LA-210 Median Concrete Barrier Renovation",
    "SR-14 Widening Project",
    "US 395 Freight Mobility and Safety Project",
    "East Bay Greenway Multimodal Corridor Project",
    "Watsonville-Santa Cruz Multimodal Corridor Program",
    "SM 101 Woodside Road Interchange and Port Access Project",
    "I-710 Integrated Corridor Management",
    "Five Cities Multimodal Transportation Network Enhancement Project",
    "SR-86/Avenue 50 New Interchange (Phase II)",
]

In [None]:
nine_projects_names = [x.lower() for x in nine_projects_names]

In [None]:
nine_projects_ea = [
    "33560",
    "N/A (PID)",
    "0F633",
    "4W850 ",
    "0C734",
    "23536",
    "37510K",
]

In [None]:
nine_projects_ea = [x.lower() for x in nine_projects_ea]

In [None]:
nine_projects_id = [
    "0422000202",
    "0414000032",
    "0520000083",
    "0515000063",
    "0721000056",
    "0716000370",
    "0813000222",
    "0814000144",
    "0414000032",
    "0720000165",
]

In [None]:
# .str.split("20").str[0]
for i in ["project_name", "ea", "ppno"]:
    nonshopp[i] = nonshopp[i].str.lower()

In [None]:
(nonshopp[nonshopp.project_name.isin(nine_projects_names)].reset_index(drop=True))[
    ["project_name"]
]

* Missing SM 101 Woodside Road Interchange and Port Access Project
 and SR-14 Widening Project

In [None]:
(nonshopp[nonshopp.ct_project_id.isin(nine_projects_id)].reset_index(drop=True))[
    ["project_name"]
]

In [None]:
# Lower case and clean project names
for i in [sb1_2, df]:
    i["project_name"] = i["project_name"].str.lower().str.strip().str.split("20").str[0]

In [None]:
# 67 without accounting for districts
pd.merge(df, sb1_2, how="outer", on=["project_name"], indicator=True)[
    ["_merge"]
].value_counts()

In [None]:
# 62 matches
pd.merge(
    df,
    sb1_2,
    how="outer",
    left_on=["project_name", "district"],
    right_on=["project_name", "caltrans_districts"],
    indicator=True,
)[["_merge"]].value_counts()

In [None]:
project_title_m = pd.merge(
    df,
    sb1_2,
    how="left",
    left_on=["project_name", "district"],
    right_on=["project_name", "caltrans_districts"],
    indicator=True,
)

In [None]:
preview = [
    "project_name",
    "district",
    "caltrans_districts",
    "counties",
    "full_county_name",
    "project_description_x",
    "project_description_y",
    "previous_caltrans_nominations",
    "sb1_program",
    "total_project_cost__$1,000",
    "total_cost",
]

In [None]:
# project_title_m.loc[project_title_m._merge == 'both'][preview]

In [None]:
project_title_m.sb1_program.value_counts()

In [None]:
sb1_csv = project_title_m.project_name.unique().tolist()

### Tircp
* None of the projects from TIRCP are mapping, even though the names appear the same.

In [None]:
# sb1_tircp = sb1_2.loc[sb1_2.sb1_program == "Transit and Intercity Rail Capital Program"].reset_index(drop = True)

In [None]:
# sb1_tircp[['project_name','caltrans_districts','counties']].sort_values('project_name').head(2)

In [None]:
# tircp_sb[['project_name']].sort_values(by = 'project_name')

In [None]:
# tircp_shopp[['project_name']].sort_values(by = 'project_name')

In [None]:
tircp_shopp = df.loc[
    df.previous_caltrans_nominations.str.contains("TIRCP")
].reset_index(drop=True)

In [None]:
# why are there no matches??
pd.merge(tircp_shopp, tircp_sb, how="outer", on=["project_name"], indicator=True)[
    ["_merge"]
].value_counts()

In [None]:
import urllib.parse

import requests

In [None]:
url = r"https://odpsvcs.dot.ca.gov/arcgis/rest/services/RCA/RCA_Projects_032022/FeatureServer/query"

In [None]:
# params = {
#    'geometry': '-118.21637221791077, 34.094916196179504',
#    'geometryType': 'esriGeometryPoint',
#    'returnGeometry': 'true',
#    'f': 'pjson'
# }

In [None]:
# url_final = url + urllib.parse.urlencode(params)

In [None]:
# response = requests.get(url=url_final)

In [None]:
# data = response.text

In [None]:
# test = gpd.read_file(data)

In [None]:
# https://services.arcgisonline.com/arcgis/rest/services/Elevation/World_Hillshade/MapServer?f=json

In [None]:
# https://services.arcgisonline.com/arcgis/rest/services/Elevation/World_Hillshade/MapServer/tilemap/0/0/0/8/8?f=json