In [116]:
%%capture
import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings("ignore")

# Format 
from babel.numbers import format_currency
from calitp import to_snakecase

# Display
from IPython.display import HTML, Image, Markdown, display, display_html
from shared_utils import altair_utils, styleguide

# Settings
pd.options.display.max_columns = 100
pd.set_option("display.max_rows", None)
pd.set_option("display.max_colwidth", None)
pd.options.display.float_format = '{:,.2f}'.format

# GCS, del later since this will presumbly be read from a script that cleans up the data
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/project_prioritization/"
FILE = "fake_data.xlsx"

# My utilities
import _utils

In [117]:
df = pd.read_excel(f"{GCS_FILE_PATH}{FILE}", sheet_name="fake")

In [118]:
# Parameter Cell
# 'Eureka-Arcata Sea Level Rise Project 1.0' in D1 2589
# 'Marin Sonoma Narrows (Msn B7) Hov Lanes' in D4 0361M
# 'Us 395 Freight Mobility And Safety Project' in D8 0195J
# 'Forrester Rd Widening And Realignment' in D11
ppno = '0195J'

In [119]:
# Objects

# DF with project that was set in the parameter cell
df2 = df.loc[df["ppno"] == ppno].reset_index(drop=True)

# Grab district of parameter cell project
district_object = df2["district"].values[0]

# DF with projects in the same district as project
df_district = df.loc[df["district"] == district_object]

# Median benefit score  in district
district_median_benefit_score = int(df_district.fake_benefit_score.median())

# Median project cost in district
district_median_cost = format_currency(
    (df_district["total_project_cost__$1,000_"].median()),
    currency="USD",
)

# Median money requested.
district_median_req = format_currency(
    (df_district["current_fake_fund_requested"].median()),
    currency="USD",
)

In [120]:
# Formatting
# Format some currency columns
df2["formatted_total_project"] = df2["total_project_cost__$1,000_"].apply(
    lambda x: format_currency(x, currency="USD", locale="en_US")
)

df2["formatted_requested"] = df2["current_fake_fund_requested"].apply(
    lambda x: format_currency(x, currency="USD", locale="en_US")
)

In [121]:
display(
    Markdown(
        f"""<h1><b>{df2.project_name.values[0]}</b></h1>
        <br> This project (PPNO:{df2.ppno.values[0]}) is led by <b>{df2.lead_agency.values[0]}</b> 
        in <b>District {df2['district_full_name'].values[0]}</b> and 
        is in its <b>{df2.current_phase[0]}</b> phase.
        <br><br>{df2['project_description'].values[0]}
        """
    )
)

<h1><b>Sbd-210 Construct Victoria Ave Ic</b></h1>
        <br> This project (PPNO:0195J) is led by <b>None</b> in <b>District 08 - San Bernardino</b> and 
        is in its <b>ON HOLD</b> phase.
        <br><br>The project proposes to construct a Type 8 cloverleaf interchange by either braiding on and off ramps with existing Highland Avenue interchange on and off ramps (Alternative 2) or by removing the existing highland Avenue eastbound entrance and westbound exit ramps (Alternative 3). Both Alternatives include aux lanes, ramp metering, CHP enforcement areas, maintenance pads, retaining walls, and proposed mainland, interchange, and local intersection improvements (At Highland Avenue Eastbound/westbound exit ramp, Victoria Avenue eastbound/westbound exit ramp, Arden Avenue eastbound entrance ramp, Victoria Avenue eastbound/westbound entrance ramps, and Highland Avenue at Victoria).
        

In [122]:
display(
    Markdown(
        f"""<h2>Basic Project Statistics</h2>
        <li>Project Category: <b>{df2['primary_mode'].values[0]}</b></li>
        <li>Benefit Score: <b>{df2['fake_benefit_score'].values[0].astype('int64')}</b></li>
        <li>Statewide Benefit Rank: <b>{df2['statewide_rank'].values[0]}</b> out of {len(df)}</li>
        <li>District Benefit Rank: <b>{df2['district_rank'].values[0]}</b> out of {len(df.loc[df["district"]==district_object])}</li>
        <br><b>Important Dates</b><br>
        <li>PID Approval: <b>{df['pid_approval_date'].values[0]}</b>
        <li>PA/ED: <b>{df['target_pa_ed'].values[0]}</b>
        <li>RTL: <b>{df['rtl_date'].values[0]}</b>
        <li>Construction: <b>{df['con_start_date'].values[0]}</b>
        """
    )
)

<h2>Basic Project Statistics</h2>
        <li>Project Category: <b>Interchange (New)</b></li>
        <li>Benefit Score: <b>8</b></li>
        <li>Statewide Benefit Rank: <b>407</b> out of 717</li>
        <li>District Benefit Rank: <b>40</b> out of 68</li>
        <br><b>Important Dates</b><br>
        <li>PID Approval: <b>datetime64[ns]</b>
        <li>PA/ED: <b>None</b>
        <li>RTL: <b>datetime64[ns]</b>
        <li>Construction: <b>datetime64[ns]</b>
        

In [123]:
# Calculate funded %
df2["funded"] = (
    df2["current_fake_fund_requested"] / df2["total_project_cost__$1,000_"]
)


# Remaining project cost
df2["unfunded"] = (1-df2["funded"]
)

# Format
df2["percentage_req_formatted"] = (
    df2["funded"].astype(float).map("{:.0%}".format)
)

df2["unfunded_percentage_formatted"] = (
    df2["unfunded"].astype(float).map("{:.0%}".format)
)



In [124]:
#  In comparison,the median total cost of projects in {df2.district[0]} is {district_median_cost} and the median amount requested is {district_median_req}. 

In [125]:
display(
    Markdown(
        f"""<h2>Funding</h2>
        {df2.project_name[0]} costs <b>{df2.formatted_total_project[0]}</b> in total.
        They have requested <b>{df2.formatted_requested[0]}</b>, which covers about <b>{df2.percentage_req_formatted[0]}</b> of the project's cost.
        <br> This project could potentially qualify for additional fund(s) from {df2.potential_funding_program_s_[0]}.
        
        """
    )
)

<h2>Funding</h2>
        Sbd-210 Construct Victoria Ave Ic costs <b>$888,888.00</b> in total.
        They have requested <b>$358,869.59</b>, which covers about <b>40%</b> of the project's cost.
        <br> This project could potentially qualify for additional fund(s) from Congestion Relief.
        
        

In [126]:
# Create a new dataframe for percentage funded

# Transform
percentage = df2.T.reset_index().rename(columns={"index": "Percentage", 0: "Value"})

# Select only certain values
percentage = percentage[
    percentage["Percentage"].isin(
        [
            "unfunded",
            "funded",
            
        ]
    )
]

# Remove underscores off of old column names
percentage["Percentage"] = (
    percentage["Percentage"].str.replace("_", " ").str.title()
)

# Reset Index
percentage = percentage.reset_index(drop=True)

In [127]:
funding_pie_chart = _utils.basic_pie_chart(percentage,  "Value:Q", "Percentage:N", "Percentage:N",
                  "Percentage Funded vs. Unfunded")

In [128]:
# Create a new dataframe for funding

# Transform
funding = df2.T.reset_index().rename(columns={"index": "Monetary Columns", 0: "Value"})

# Select only certain values
funding = funding[
    funding["Monetary Columns"].isin(
        [
            "total_project_cost__$1,000_",
            "current_fake_fund_requested",
        ]
    )
]

# Remove underscores off of old column names
funding["Monetary Columns"] = (
    funding["Monetary Columns"].str.replace("_", " ").str.title()
)

# Add a column with formatted values
funding["Total"] = funding["Value"].apply(
    lambda x: format_currency(x, currency="USD", locale="en_US")
)

# Reset Index
funding = funding.reset_index(drop=True)

In [129]:
requested_bar_chart = _utils.basic_bar_chart_custom_tooltip(
    funding,
    "Value",
    "Monetary Columns",
    "Total",
    "Monetary Columns",
    chart_title="Total Project Cost versus Requested",
)

In [130]:
requested_bar_chart | funding_pie_chart

In [131]:
display(
    Markdown(
        f"""<h2>Score Card</h2>
        The total score is <b>{(df2.fake_benefit_score[0]).astype('int64')}</b>, compared to the median score of 
        <b>{district_median_benefit_score}</b> of other projects in District {df2.district[0]}. 
        <br>Click on the bar for more detail.
        """
    )
)

<h2>Score Card</h2>
        The total score is <b>8</b>, compared to the median score of 
        <b>9</b> of other projects in District 8. 
        <br>Click on the bar for more detail.
        

In [132]:
# df2.drop(columns=["project_description"]).head()

In [133]:
# Create new df for score card
score_card = df2[
    [
        "increase_peak_person_throughput",
        "reduction_in_peak_period_delay",
        "reduction_in_fatal_and_injury_crashes",
        "reduction_in_injury_rates",
        "increase_access_to_jobs",
        "increase_access_jobs_to_DAC",
        "commercial_dev_developed",
        "tons_of_goods_impacted",
        "improve_air_quality",
        "impact_natural_resources",
        "support_of_trasnportation",
    ]
]

In [134]:
# Transform
score_card = score_card.T.reset_index().rename(
    columns={"index": "Measure", 0: "Measure Score"}
)
# Remove underscores off of old column names
score_card["Measure"] = score_card["Measure"].str.replace("_", " ").str.title()

# New column with broader Measures
score_card["Category"] = score_card["Measure"]

score_card["Category"] = score_card["Category"].replace(
    {
        "Increase Peak Person Throughput": "Congestion Mitigation",
        "Reduction In Peak Period Delay": "Congestion Mitigation",
        "Reduction In Fatal And Injury Crashes": "Safety",
        "Reduction In Injury Rates": "Safety",
        "Increase Access To Jobs": "Accessibility Increase",
        "Increase Access Jobs To Dac": "Accessibility Increase",
        "Commercial Dev Developed": "Economic Dev.",
        "Tons Of Goods Impacted": "Economic Dev.",
        "Improve Air Quality": "Environment",
        "Impact Natural Resources": "Environment",
        "Support Of Trasnportation": "Land Use",
    }
)

# Get total scores
total = (
    score_card.groupby(["Category"])
    .agg({"Measure Score": "sum"})
    .rename(columns={"Measure Score": "Total Category Score"})
)
score_card = pd.merge(score_card, total, how="left", on=["Category"])

In [135]:
for i in ['Measure Description', 'Factor Weight', 'Weighted Factor Value', 'Category Description']:
    score_card[i]  = 'Text Here'

In [136]:
_utils.dual_bar_chart(
    score_card,
    "Category",
    "Category:N",
    "Total Category Score:Q",
    "Measure:N",
    "Measure Score:Q",
    ['Category', 'Total Category Score', 'Category Description'],
    ['Measure', 'Measure Description', 'Measure Score', 'Factor Weight', 'Weighted Factor Value'],
    
)