# Additional styling to a dataframe. 
* By default, bool is False and will return original dataframe without a scrollbar.
* When bool is true, will return a dataframe with a scrollbar for any overflow content. 
* You can change the height, width, and font size based.

In [1]:
import pandas as pd
import pandas.io.formats.style
from IPython.display import HTML, Image, Markdown, display, display_html

In [2]:
GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/project_prioritization/"
FILE = "fake_data.xlsx"

In [3]:
df = pd.read_excel(f"{GCS_FILE_PATH}{FILE}", sheet_name="fake")

In [4]:
df = df[
    [
        "county",
        "district_full_name",
        "district_rank",
        "project_name",
        "total_project_cost__$1,000_",
        "reduction_in_fatal_and_injury_crashes",
        "statewide_rank",
    ]
].sample(20)

In [5]:
def edited_style_table(
    df: pd.DataFrame,
    rename_cols: dict = {},
    drop_cols: list = [],
    integer_cols: list = [],
    one_decimal_cols: list = [],
    two_decimal_cols: list = [],
    three_decimal_cols: list = [],
    currency_cols: list = [],
    percent_cols: list = [],
    left_align_cols: list = "first",  # by default, left align first col
    center_align_cols: list = "all",  # by default, center align all other cols
    right_align_cols: list = [],
    custom_format_cols: dict = {},
    display_table: bool = True,
    display_scrollbar: bool = False,
    scrollbar_font: str = "10px",
    scrollbar_height: str = "400px",
    scrollbar_width: str = "fit-content",
) -> pd.io.formats.style.Styler:
    """
    Returns a pandas Styler object with some basic formatting.
    Even if display_table is True, pandas Styler object returned,
    just with a display() happening in the notebook cell.
    Any other tweaks for currency, percentages, etc should be done before / after,
    if it can't be put into custom_format_cols.

    custom_format_cols = {
        '{:,.1%}': ['colA', 'colB']
    }

    Generalize with dict comprehension or list comprehension
    list comprehension: df.style.format(subset=percent_cols,  **{'formatter': '{:,.2%}'})
    dict comprehension: df.style.format(formatter = {c: '{:,.2%}' for c in percent_cols})
    """
    df = df.drop(columns=drop_cols).rename(columns=rename_cols)

    if len(integer_cols) > 0:
        df = df.astype({c: "Int64" for c in integer_cols})

    if left_align_cols == "first":
        left_align_cols = list(df.columns)[0]
    if center_align_cols == "all":
        center_align_cols = list(df.columns)
        # all other columns except first one is center aligned
        center_align_cols = [c for c in center_align_cols if c not in left_align_cols]

    df_style = (
        df.style.format(formatter={c: "{:,g}" for c in integer_cols})
        .format(formatter={c: "{:,.1f}" for c in one_decimal_cols})
        .format(formatter={c: "{:,.2f}" for c in two_decimal_cols})
        .format(formatter={c: "{:,.3f}" for c in three_decimal_cols})
        .format(formatter={c: "{:,.2%}" for c in percent_cols})
        .format(formatter={c: "$ {:,.2f}" for c in currency_cols})
        .set_properties(subset=left_align_cols, **{"text-align": "left"})
        .set_properties(subset=center_align_cols, **{"text-align": "center"})
        .set_properties(subset=right_align_cols, **{"text-align": "right"})
        .set_table_styles([dict(selector="th", props=[("text-align", "center")])])
        .hide(axis="index")
    )

    def add_custom_format(
        df_style: pd.io.formats.style.Styler,
        format_str: str,
        cols_to_format: list,
    ) -> pd.io.formats.style.Styler:
        """
        Appends any additional formatting needs.
            key: format string, such as '{:.1%}'
            value: list of columns to apply that formatter to.
        """
        new_styler = df_style.format(formatter={c: format_str for c in cols_to_format})

        return new_styler

    if len(list(custom_format_cols.keys())) > 0:
        for format_str, cols_to_format in custom_format_cols.items():
            df_style = add_custom_format(df_style, format_str, cols_to_format)

    if display_table is True:
        if display_scrollbar is True:
            display(
                HTML(
                    f"<div style='height: {scrollbar_height}; overflow: auto; width: {scrollbar_width}'>"
                    + (
                        (df_style)
                        .set_properties(
                            **{
                                "font-size": scrollbar_font,
                            }
                        )
                        .render()
                    )
                    + "</div>"
                )
            )
        else:
            display(HTML(df_style.to_html()))

    return df_style

#### When you turn on "true" for scrollbar and input your own arguments. 

In [14]:
test1 = edited_style_table(
    df,
    {"district_full_name": "renamed1", "project_name": "renamed2"},
    ["reduction_in_fatal_and_injury_crashes"],
    ["district_rank"],
    ["fake_benefit_score"],
    [],
    [],
    ["total_project_cost__$1,000_"],
    ["statewide_rank"],
    [],
    [],
    [],
    {},
    True,
    True,
    "9px",
    "250px",
    "650px",
)

  .render()


county,renamed1,district_rank,renamed2,"total_project_cost__$1,000_",statewide_rank
STA,10 - Stockton,38,North County Corridor Tully Rd To Sr-120 (New Sr-108),"$ 163,000.00",372
LA,07 - Los Angeles,115,Grade Separation -- Industry/La County,"$ 86,200.00",689
SBD,08 - San Bernardino,44,Sbd-210 Construct Victoria Ave Ic,"$ 888,888.00",407
MER,10 - Stockton,29,Altamont Corridor Vision,"$ 888,888.00",297
LA,07 - Los Angeles,24,Aux Lane Construction,"$ 888,888.00",165
SAC,03 - Marysville,40,State Route 51 Corridor Improvements: J Street To Arden Way Capital City Freeway Managed Lanes,"$ 437,400.00",705
VEN,07 - Los Angeles,46,Port Capacity Expansion - Ln Widening,"$ 7,000.00",300
SJ,10 - Stockton,8,Sr 120/Guthmiller Rd. Interchange Improvement,"$ 888,888.00",57
SBD,08 - San Bernardino,40,Fontana Truck Parking,"$ 25,950.00",334
SD,11 - San Diego,22,Central Mobility Hub,"$ 888,888.00",340


#### Default is false: runs original style_table function

In [18]:
test2 = edited_style_table(
    df,
    {"district_full_name": "renamed1", "project_name": "renamed2"},
    ["reduction_in_fatal_and_injury_crashes"],
    ["district_rank"],
    ["fake_benefit_score"],
    [],
    [],
    ["total_project_cost__$1,000_"],
    ["statewide_rank"],
    [],
    [],
    [],
    {},
    True,
)

county,renamed1,district_rank,renamed2,"total_project_cost__$1,000_",statewide_rank
STA,10 - Stockton,38,North County Corridor Tully Rd To Sr-120 (New Sr-108),"$ 163,000.00",372
LA,07 - Los Angeles,115,Grade Separation -- Industry/La County,"$ 86,200.00",689
SBD,08 - San Bernardino,44,Sbd-210 Construct Victoria Ave Ic,"$ 888,888.00",407
MER,10 - Stockton,29,Altamont Corridor Vision,"$ 888,888.00",297
LA,07 - Los Angeles,24,Aux Lane Construction,"$ 888,888.00",165
SAC,03 - Marysville,40,State Route 51 Corridor Improvements: J Street To Arden Way Capital City Freeway Managed Lanes,"$ 437,400.00",705
VEN,07 - Los Angeles,46,Port Capacity Expansion - Ln Widening,"$ 7,000.00",300
SJ,10 - Stockton,8,Sr 120/Guthmiller Rd. Interchange Improvement,"$ 888,888.00",57
SBD,08 - San Bernardino,40,Fontana Truck Parking,"$ 25,950.00",334
SD,11 - San Diego,22,Central Mobility Hub,"$ 888,888.00",340
