# {county_title}

In [None]:
%%capture

# Turn off warnings
import warnings
warnings.filterwarnings("ignore")

# Normal packages
import geopandas as gpd
import numpy as np
import pandas as pd

# Format
from babel.numbers import format_currency

# Display
from IPython.display import HTML, Image, Markdown, display, display_html

# Settings
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = "{:,.2f}".format

# GCS, del later since this will presumbly be read from a script that cleans up the data
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/project_prioritization/"
FILE = "fake_data.xlsx"

# My utilities
import _utils

In [None]:
# Parameter Cell - the county of interest
parameter_county = "Inyo"

In [None]:
# Create dfs
df_statewide = pd.read_excel(f"{GCS_FILE_PATH}{FILE}", sheet_name="fake")

In [None]:
# Fill in empty county names with various
df_statewide["full_county_name"] = df_statewide["full_county_name"].fillna("Various")

In [None]:
# Filter out a df for the county set in the parameter cell
df_parameter = df_statewide.loc[
    df_statewide["full_county_name"] == parameter_county
].reset_index(drop=True)

In [None]:
# Grab the district this county is in
parameter_county_district = df_parameter["district"][0]

# Grab the full district name
parameter_county_district_full_name = df_parameter["district_full_name"][0]

In [None]:
# Dataframe just for the district the parameter county is located in
df_parameter_district = df_statewide.loc[
    df_statewide["district"] == parameter_county_district
].reset_index(drop=True)

In [None]:
# Statewide Objects/DF
# Number of projects
total_projects_statewide = df_statewide.project_name.nunique()

# Number of unique counties
unique_counties_statewide = df_statewide.county.nunique()

# Count of projects across ALL counties - for mapping
# Using county abbreviations.
counties_gdf_statewide = _utils.summarize_by_project_names(df_statewide, "county")

# Count of projects by county
counties_df_statewide = _utils.summarize_by_project_names(df_statewide, "full_county_name")

# Rank counties by number of total projects
counties_df_statewide["Project Rank"] = (
    counties_df_statewide["Total Projects"].rank(ascending=False).astype("int64")
)

# Rank counties by total project costs
counties_df_statewide["Project Cost"] = (
    counties_df_statewide["Total Project Cost  $1,000"].rank(ascending=False).astype("int64")
)

# Median benefit score
statewide_benefit_score = int(df_statewide.fake_benefit_score.median())

In [None]:
# County Objects/Df
# Count of projects by phases it is in
phases_df_county = _utils.summarize_by_project_names(df_parameter, "current_phase")

# Count of projects whether it is rural or urban
rural_urban_df_county = _utils.summarize_by_project_names(df_parameter, "urban_rural")

# Count of projects by project type
projects_df_county = _utils.summarize_by_project_names(df_parameter, "primary_mode")

# Count of projects by lead agency
agency_df_county = _utils.summarize_by_project_names(df_parameter, "lead_agency")

In [None]:
# County Objects
# Number of projects in this county by project name
total_number_projects_county = df_parameter["project_name"].nunique()

# Median benefit score
median_benefit_score_county = int(df_parameter.fake_benefit_score.median())

# Total Requested Funds
total_cost_county = format_currency(
    (df_parameter["total_project_cost__$1,000_"].sum()),
    currency="USD",
)

# Median project cost
median_cost_county = format_currency(
    (df_parameter["total_project_cost__$1,000_"].median()),
    currency="USD",
)

# Total Requested Funds
total_req_county = format_currency(
    (df_parameter["current_fake_fund_requested"].sum()),
    currency="USD",
)

# Median Requested Funds
median_req_county = format_currency(
    (df_parameter["current_fake_fund_requested"].median()),
    currency="USD",
)

# Project category with the most funding
project_cat_most_money_county = (
    projects_df_county.sort_values("Total Project Cost  $1,000")
    .tail(1)
    .iloc[0]["Primary Mode"]
)

# Project that costs the most
project_cat_most_money_county = (
    projects_df_county.sort_values("Total Project Cost  $1,000")
    .tail(1)
    .iloc[0]["Total Project ($1000) Formatted"]
)

# Get a line of where the county ranks.
county_rank_county = (
    counties_df_statewide[
        [
            "Full County Name",
            "Project Rank",
            "Project Cost",
        ]
    ]
    .loc[counties_df_statewide["Full County Name"] == parameter_county]
    .reset_index(drop=True)
)

# Find the agency that has the highest project cost among a county
agency_most_money = (
    agency_df_county.sort_values("Total Project Cost  $1,000").tail(1).iloc[0]["Lead Agency"]
)

In [None]:
# District Objects
# Number of projects
total_district_projects = df_parameter_district['project_name'].nunique()

# Count of projects by districts across the whole state
df_summary_district = _utils.summarize_by_project_names(
    df_statewide, "district_full_name"
)

# Rank districts by number of total projects
df_summary_district["Project Rank"] = (
    df_summary_district["Total Projects"].rank(ascending=False).astype("int64")
)

# Rank districts by total project costs
df_summary_district["Project Cost"] = (
    df_summary_district["Total Project Cost  $1,000"]
    .rank(ascending=False)
    .astype("int64")
)

# Count of projects by district - for mapping
gdf_summary_district = _utils.create_caltrans_map(
    _utils.summarize_by_project_names(df_statewide, "district")
)

# Median benefit score across all districts
district_benefit_score = int(df_parameter_district.fake_benefit_score.median())

# Total Requested Funds
total_district_req = format_currency(
    (df_parameter_district["current_fake_fund_requested"].sum()),
    currency="USD",
)

# Total Requested Funds
total_district_project_cost = format_currency(
    (df_parameter_district["total_project_cost__$1,000_"].sum()),
    currency="USD",
)

# Get a line of where the district ranks.
district_rank = (
    df_summary_district[
        [
            "District Full Name",
            "Project Rank",
            "Project Cost",
        ]
    ]
    .loc[
        df_summary_district["District Full Name"] == parameter_county_district_full_name
    ]
    .reset_index(drop=True)
)

In [None]:
display(
    Markdown(
        f"""<h4>Overview for {parameter_county} County</h4>
         <li><b>NOTE</b>: the data below is partially composed of placeholder values.
        <li><b>{total_number_projects_county}</b> out of {total_projects_statewide} projects are in {parameter_county} County.
        <li>Agencies requested a total of total  <b>{total_req_county}</b> in funds.
        <li>The total cost of all the projects is <b>{total_cost_county}</b>.
        <li>The most common project phases_df_county is <b>{projects_df_county['Primary Mode'][0]}</b>.
        <li>Most projects are in the <b>{phases_df_county['Current Phase'][0]}</b> phase.
        <li><b>{median_benefit_score_county}</b> is the median benefit score. 
        <h4>{parameter_county_district_full_name} Overview</h4>
        <li>There are <b>{total_district_projects}</b> projects in this district.
        <li><b>{format((total_number_projects_county)/(total_district_projects),".1%")}</b> of projects in District {parameter_county_district} 
      are located in {parameter_county} County
        <li>The total amount requested for projects in District {parameter_county_district} is <b>{total_district_req}</b>.
        <li>The total cost of all the projects is <b>{total_district_project_cost}</b>.
        <li><b>{district_benefit_score}</b> is the median benefit score of projects. 
        <li>District {parameter_county_district} ranks {district_rank['Project Rank'][0]} in total projects
        and {district_rank['Project Cost'][0]} in costs compared to other districts.
        """
    )
)

In [None]:
gdf_summary_district.explore(
    "Total Projects",
    cmap="GnBu_r",
    width=800,
    height=400,
    tooltip=["District", "Total Projects", "Total Project ($1000) Formatted"],
    highlight=True,
    legend=True,
    style_kwds={"fillOpacity": 1},
)

In [None]:
display(
    Markdown(
        f"""<h4>Rural versus Urban</h4>
        Most projects in {parameter_county} County are in a(n) <b>{rural_urban_df_county['Urban Rural'][0]}</b> area, 
        totaling to <b>{rural_urban_df_county['Total Project ($1000) Formatted'][0]}</b> in project costs. 
        """
    )
)

In [None]:
total_urban_rural_bar = _utils.basic_bar_chart_custom_tooltip(
    rural_urban_df_county,
    "Total Project Cost  $1,000",
    "Urban Rural",
    "Total Project ($1000) Formatted",
    "Urban Rural",
    "Cost of Projects",
)

In [None]:
total_urban_rural_pie = _utils.basic_pie_chart(
    rural_urban_df_county,
    "Total Projects:Q",
    "Urban Rural:N",
    "Total Projects",
    "Total Projects",
)

In [None]:
total_urban_rural_pie | total_urban_rural_bar

In [None]:
display(
    Markdown(
        f"""<h4>County Comparison</h4>
        There are {unique_counties_statewide} different counties 
        (including projects that fall in multiple counties and are coded as 'Various'). 
        {parameter_county} County ranks <b>{county_rank_county['Project Rank'][0]}</b> in total number of projects and 
        <b>{county_rank_county['Project Cost'][0]}</b> in project costs among all the other counties. 
        """
    )
)

In [None]:
county_map = _utils.create_county_map(
    counties_gdf_statewide,
    "COUNTY_ABBREV",
    "County",
)
county_map = county_map.drop(columns="County").rename(columns={"COUNTY_NAME": "County"})

In [None]:
county_map.explore(
    "Total Projects",
    cmap="Oranges",
    width=800,
    height=400,
    tooltip=["County", "Total Projects", "Total Project ($1000) Formatted"],
    highlight=True,
    style_kwds={"fillOpacity": 1},
)

In [None]:
display(
    Markdown(
        f"""<h4>Project Categories</h4>
        Most projects ({projects_df_county['Total Projects'][0]}) are in the <b>{projects_df_county['Primary Mode'][0]}</b> category,
        followed by <b>{projects_df_county['Primary Mode'][1]}</b>. 
        <b>{project_cat_most_money_county}</b> received the most money ({project_cat_most_money_county}). 
        """
    )
)

In [None]:
_utils.dual_bar_chart(
    projects_df_county,
    "Primary Mode",
    "Primary Mode:N",
    "Total Project Cost  $1,000:Q",
    "Primary Mode:N",
    "Total Projects:Q",
    ["Total Project ($1000) Formatted"],
    ["Total Projects"],
    "Categories by Cost and Total Projects - Click on the first graph to highlight the second",
)

In [None]:
display(
    Markdown(
        f"""<h4>Project Details</h4>
       All the projects in {parameter_county} County are listed below, ranked by benefit score. 
       The median benefit score is <b>{median_benefit_score_county}</b>, 
       compared with {district_benefit_score} for projects across the districts and {statewide_benefit_score} for projects across California.
        """
    )
)

In [None]:
# Subset
df_subset = df_parameter[
    [
        "fake_benefit_score",
        "lead_agency",
        "primary_mode",
        "project_name",
        "current_fake_fund_requested",
        "total_project_cost__$1,000_",
    ]
]

# Format
df_subset["total_project_cost__$1,000_"] = df_subset[
    "total_project_cost__$1,000_"
].apply(lambda x: format_currency(x, currency="USD", locale="en_US"))

df_subset["current_fake_fund_requested"] = df_subset[
    "current_fake_fund_requested"
].apply(lambda x: format_currency(x, currency="USD", locale="en_US"))
df_parameter["fake_benefit_score"] = df_parameter["fake_benefit_score"].apply(
    lambda x: round(x, 2)
)

# Clean up Col Names
df_subset = _utils.clean_up_columns(df_subset)

# Sort by score
df_subset = df_subset.sort_values("Fake Benefit Score", ascending=False)

In [None]:
_utils.styled_df(df_subset)

In [None]:
score_card = _utils.create_fake_score_card(df_parameter)
score_card = _utils.clean_up_columns(score_card)

In [None]:
score_card = score_card.rename(
    columns={"Value X": "Measure", "Value Y": "Monetary Values"}
)

In [None]:
project_dropdown = score_card["Project Name"].unique().tolist()

In [None]:
score_card_tooltip = [
    "Project Name",
    "Total Category Score",
    "Factor Weight",
    "Weighted Factor Value",
    "Category Description",
]

In [None]:
project_desc_tooltip = [
    "Project Name",
    "Project Description",
    "Monetary",
    "Monetary Values",
]

In [None]:
display(
    Markdown(
        f"""Use the dropdown menu below to retrive information for a specific project. Hover over the bars for more detail.
        """
    )
)

In [None]:
_utils.dual_chart_with_dropdown(
    score_card,
    project_dropdown,
    "Project Name",
    "Monetary:N",
    "Monetary Values:Q",
    "Monetary:N",
    project_desc_tooltip,
    "Total Category Score:Q",
    "Category:N",
    "Category:N",
    score_card_tooltip,
    f"View Individual Projects in {parameter_county} County",
)

In [None]:
display(
    Markdown(
        f"""<h4>Lead Agencies</h4>
        There are <b>{len(agency_df_county)}</b> unique agencies with projects located in {parameter_county} County. 
        <b>{agency_df_county['Lead Agency'][0]}</b> is the agency with the most projects and  
       <b>{agency_most_money}</b> is the agency with highest project costs. 
        """
    )
)

In [None]:
_utils.styled_df(agency_df_county.drop(columns=["Total Project Cost  $1,000"]))