In [None]:
# math
import math
import numpy as np
import pandas as pd
import datetime

# plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.subplots as sbplt

# other
import copy
import os
from pathlib import Path
import warnings

from experiments_2022 import DATASETS_PATH, IMAGE_PATH
from experiments_2022.zone_level_analysis import (
    base,
    cleaning,
    viz,
    regression_functions,
    clustering,
)
from experiments_2022.datasets import (
    load_zones,
    load_weather,
    load_building,
    pull_from_dataset,
)

In [None]:
PROJECTS_2022 = [
    "OFF-1",
    "OFF-2",
    "OFF-3",
    "OFF-4",
    "OFF-5",
    "OFF-6",
    "OFF-7",
    "LAB-1",
    "LAB-2",
    "LAB-3",
]

PROJECTS_2021 = ["OFF-1", "OFF-3", "OFF-4", "OFF-6", "LAB-1", "LAB-3"]

PROJECTS_VAV = [
    "OFF-1",
    "OFF-2",
    "OFF-3",
    "OFF-4",
    "OFF-5",
    "OFF-6",
    "OFF-7",
    "LAB-2",
]

In [None]:
NO_WEEKENDS = {
    "OFF-1": True,
    "OFF-2": True,
    "OFF-3": True,
    "OFF-4": True,
    "OFF-5": True,
    "OFF-6": True,
    "OFF-7": True,
    "LAB-1": True,
    "LAB-2": True,
    "LAB-3": True,
}

CONTROL_FOR_WEEKENDS = {
    "OFF-1": False,
    "OFF-2": False,
    "OFF-3": False,
    "OFF-4": False,
    "OFF-5": False,
    "OFF-6": False,
    "OFF-7": False,
    "LAB-1": False,
    "LAB-2": False,
    "LAB-3": False,
}

In [None]:
ez_csv = pd.read_csv(DATASETS_PATH / "csvs/2022_experiment_csvs/excluded_zones.csv")
ALL_ZONES = {}
EXCLUDED_ZONES = {}
ZONES_BY_TYPE = {}
for project in PROJECTS_2022:
    ZONES_BY_TYPE[project] = {}

    tload_zones = list(load_zones("2022", project, "zone-tloads").columns)
    temp_zones = list(load_zones("2022", project, "zone-temps").columns)
    csp_zones = list(load_zones("2021", project, "zone-coolsp").columns)
    all_zones = list(set(tload_zones).union(set(temp_zones)).union(set(csp_zones)))

    vavs = [zone for zone in all_zones if "VAV" in zone.upper()]
    fcus = [
        zone for zone in all_zones if ("FC" in zone.upper()) or ("VVS" in zone.upper())
    ]
    all_zones = list(set(vavs).union(set(fcus)))

    ZONES_BY_TYPE[project]["VAVs"] = vavs
    ZONES_BY_TYPE[project]["FCUs"] = fcus
    ALL_ZONES[project] = all_zones

    ezs = list(ez_csv[project].dropna())
    ezs = list(set(ezs).intersection(set(all_zones)))
    ez_df = pd.DataFrame(0, index=all_zones, columns=["Excluded Zones"])
    ez_df.loc[ezs, :] = 1
    EXCLUDED_ZONES[project] = ez_df

In [None]:
AHUs = {}
for project in PROJECTS_2022:
    dat = load_zones("2022", project, "ahu-dat")
    AHUs[project] = list(cleaning.clean_df(dat, "ahu-dat").columns)

In [None]:
SUMMER_START_2021 = pd.Timestamp("05-01-2021")
SUMMER_END_2021 = pd.Timestamp("10-01-2021")

SUMMER_START_2022 = pd.Timestamp("05-01-2022")
SUMMER_END_2022 = pd.Timestamp("10-01-2022")

In [None]:
def run_building_regressions(
    df_dict,
    T,
    year="2022",
    mode="Percent Change",
    summary_statistic="Mean",
    y_axis_title="Y Axis Title",
    use_raw=False,
):
    projects = list(df_dict.keys())

    summary = pd.DataFrame(
        index=[
            "CSP = 76F",
            "CSP = 78F",
            "OAT",
            "Weekend",
            "P-Value CSP = 76F",
            "P-Value CSP = 78F",
            "P-Value OAT",
            "P-Value Weekend",
            "R2",
        ],
        columns=projects,
    )
    if year == "2022":
        sps = ["76", "78"]
        line_legend = {
            "name": {
                "Control": "CSP = 23.3C",
                "CSP = 76F": "CSP = 24.4C",
                "CSP = 78F": "CSP = 25.5C",
            },
            "color": {
                "Control": "RoyalBlue",
                "CSP = 76F": "DarkOrange",
                "CSP = 78F": "Firebrick",
            },
        }
    else:
        sps = ["76"]
        line_legend = {
            "name": {
                "Control": "CSP = 23.3C",
                "CSP = 76F": "CSP = 24.4C",
            },
            "color": {
                "Control": "RoyalBlue",
                "CSP = 76F": "DarkOrange",
            },
        }

    deltas = pd.DataFrame(index=projects, columns=sps)
    deltas_high = pd.DataFrame(index=projects, columns=sps)
    deltas_low = pd.DataFrame(index=projects, columns=sps)
    figs = {}

    for project in projects:
        days = pd.DatetimeIndex(df_dict[project].index.date).unique()
        shapes = pd.Series(0, index=days)
        shapes[days.dayofweek >= 5] = 1

        binary_df = regression_functions.get_2021_2022_binary_df(
            project=project,
            experiment_year=year,
            freq="daily",
            baseline_column="CSP = 74F",
            drop_baseline_column=True,
            no_weekends=NO_WEEKENDS[project],
            control_for_weekends=CONTROL_FOR_WEEKENDS[project],
            use_raw=use_raw,
        )
        reg_results = regression_functions.general_Delta_fn(
            df=df_dict[project],
            T=T,
            binary=binary_df,
            mode=mode,
            summary_statistic=summary_statistic,
        )

        for sp in sps:
            deltas.loc[project, sp] = reg_results.loc[project, f"Delta CSP = {sp}F"]
            deltas_high.loc[project, sp] = (
                reg_results.loc[project, f"Delta Low CSP = {sp}F"]
                - deltas.loc[project, sp]
            )
            deltas_low.loc[project, sp] = deltas.loc[project, sp] - (
                reg_results.loc[project, f"Delta High CSP = {sp}F"]
            )
        if NO_WEEKENDS[project]:
            if year == "2022":
                conditions = ["CSP = 76F", "CSP = 78F", "OAT"]
            else:
                conditions = ["CSP = 76F", "OAT"]
            shape_legend = None
            additive_column_dict = None
            dont_add_to_legend = []
        else:
            shape_legend = {
                "series": shapes,
                "name": {0: "Weekday", 1: "Weekend"},
                "shape": {0: "circle", 1: "x"},
            }
            dont_add_to_legend = ["Weekday"]
            if year == "2022":
                additive_column_dict = {
                    "Weekend": ["Control", "CSP = 76F", "CSP = 78F"]
                }
                conditions = ["CSP = 76F", "CSP = 78F", "OAT", "Weekend"]
            else:
                additive_column_dict = {"Weekend": ["Control", "CSP = 76F"]}
                conditions = ["CSP = 76F", "OAT", "Weekend"]

        for condition in conditions:
            summary.loc[
                condition, project
            ] = f"{round(reg_results.loc[project, f'Slope {condition}'], 3)} ({round(reg_results.loc[project, f'Std Err {condition}'], 3)})"
            summary.loc[
                f"P-Value {condition}", project
            ] = f"{round(reg_results.loc[project, f'P-Value {condition}'], 3)}"
        summary.loc["R2", project] = round(reg_results.loc[project, "R2"], 3)
        fig = viz.plot_experiment_regression(
            experiment_results=reg_results,
            df=df_dict[project],
            T=T,
            binary=binary_df,
            line_legend=line_legend,
            shape_legend=shape_legend,
            additive_column_dict=additive_column_dict,
            mode=mode,
            summary_statistic=summary_statistic,
            marker_size=10,
            line_width=2.5,
            y_axis_title=y_axis_title,
            x_axis_title="Average Daytime OAT (C)",
            dont_add_to_legend=dont_add_to_legend,
        )
        figs[project] = fig

    # clean summary
    new_index = list(summary.index)
    for i in range(len(new_index)):
        idx = new_index[i]
        if "CSP = 76F" in idx:
            new_index[i] = idx.replace("CSP = 76F", "CSP = 24.4C")
        if "CSP = 78F" in idx:
            new_index[i] = idx.replace("CSP = 78F", "CSP = 25.5C")

    summary.index = new_index
    summary.columns = [word.capitalize() for word in list(summary.columns)]
    summary.dropna(inplace=True, how="all", axis=0)

    # combine regression figs
    regression_fig = viz.combine_figs(
        figs,
        y_axis_title=y_axis_title,
        x_axis_title="Average Daytime OAT (C)",
        force_same_yaxes=False,
        force_same_xaxes=False,
        num_cols=3,
        horizontal_spacing=0.1,
        vertical_spacing=0.075,
    )

    return deltas, deltas_high, deltas_low, summary, regression_fig

In [None]:
def run_equip_regressions(
    dict_df,
    T,
    mode,
    year="2022",
    summary_statistic="Mean",
    no_weekends=NO_WEEKENDS,
    control_for_weekends=CONTROL_FOR_WEEKENDS,
):
    deltas_76 = {}
    deltas_low_76 = {}
    deltas_high_76 = {}

    deltas_78 = {}
    deltas_low_78 = {}
    deltas_high_78 = {}

    for project in dict_df:
        df = dict_df[project]
        zones = list(df.columns)

        this_deltas_76 = pd.Series(index=zones)
        this_deltas_low_76 = pd.Series(index=zones)
        this_deltas_high_76 = pd.Series(index=zones)
        if year == "2022":
            this_deltas_78 = pd.Series(index=zones)
            this_deltas_low_78 = pd.Series(index=zones)
            this_deltas_high_78 = pd.Series(index=zones)

        for zone in zones:
            binary_df = regression_functions.get_2021_2022_binary_df(
                project=project,
                experiment_year=year,
                freq="daily",
                baseline_column="CSP = 74F",
                drop_baseline_column=True,
                no_weekends=no_weekends[project],
                control_for_weekends=control_for_weekends[project],
                zone=zone,
            )
            reg_results = regression_functions.general_Delta_fn(
                df=df[zone].to_frame(),
                T=T,
                binary=binary_df,
                mode=mode,
                summary_statistic=summary_statistic,
            )
            # grab results
            this_deltas_76[zone] = reg_results.loc[zone, "Delta CSP = 76F"]
            this_deltas_low_76[zone] = (
                reg_results.loc[zone, "Delta CSP = 76F"]
                - reg_results.loc[zone, "Delta Low CSP = 76F"]
            )
            this_deltas_high_76[zone] = (
                reg_results.loc[zone, "Delta High CSP = 76F"]
                - reg_results.loc[zone, "Delta CSP = 76F"]
            )
            if year == "2022":
                this_deltas_78[zone] = reg_results.loc[zone, "Delta CSP = 78F"]
                this_deltas_low_78[zone] = (
                    reg_results.loc[zone, "Delta CSP = 78F"]
                    - reg_results.loc[zone, "Delta Low CSP = 78F"]
                )
                this_deltas_high_78[zone] = (
                    reg_results.loc[zone, "Delta High CSP = 78F"]
                    - reg_results.loc[zone, "Delta CSP = 78F"]
                )

        this_deltas_76 = this_deltas_76.to_frame()
        this_deltas_low_76 = this_deltas_low_76.to_frame()
        this_deltas_high_76 = this_deltas_high_76.to_frame()

        this_deltas_76.columns = ["76"]
        this_deltas_low_76.columns = ["76"]
        this_deltas_high_76.columns = ["76"]

        deltas_76[project] = this_deltas_76
        deltas_low_76[project] = this_deltas_low_76
        deltas_high_76[project] = this_deltas_high_76

        if year == "2022":
            this_deltas_78 = this_deltas_78.to_frame()
            this_deltas_low_78 = this_deltas_low_78.to_frame()
            this_deltas_high_78 = this_deltas_high_78.to_frame()

            this_deltas_78.columns = ["78"]
            this_deltas_low_78.columns = ["78"]
            this_deltas_high_78.columns = ["78"]

            deltas_78[project] = this_deltas_78
            deltas_low_78[project] = this_deltas_low_78
            deltas_high_78[project] = this_deltas_high_78
        print(f"Done with {project}")

    if year == "2022":
        return (
            deltas_76,
            deltas_low_76,
            deltas_high_76,
            deltas_78,
            deltas_low_78,
            deltas_high_78,
        )
    return (
        deltas_76,
        deltas_low_76,
        deltas_high_76,
    )

In [None]:
def run_group_regressions(
    dict_df,
    T,
    mode,
    year="2022",
    summary_statistic="Mean",
    no_weekends=NO_WEEKENDS,
    control_for_weekends=CONTROL_FOR_WEEKENDS,
    use_raw=False,
):
    deltas_76 = {}
    deltas_low_76 = {}
    deltas_high_76 = {}

    deltas_78 = {}
    deltas_low_78 = {}
    deltas_high_78 = {}

    for project in dict_df:
        binary_df = regression_functions.get_2021_2022_binary_df(
            project=project,
            experiment_year=year,
            freq="daily",
            baseline_column="CSP = 74F",
            drop_baseline_column=True,
            no_weekends=no_weekends[project],
            control_for_weekends=control_for_weekends[project],
            use_raw=use_raw,
        )
        reg_results = regression_functions.general_Delta_fn(
            df=dict_df[project],
            T=T,
            binary=binary_df,
            mode=mode,
            summary_statistic=summary_statistic,
        )
        # grab results
        deltas_76[project] = reg_results["Delta CSP = 76F"].to_frame()
        deltas_low_76[project] = (
            reg_results["Delta CSP = 76F"] - reg_results["Delta Low CSP = 76F"]
        ).to_frame()
        deltas_high_76[project] = (
            reg_results["Delta High CSP = 76F"] - reg_results["Delta CSP = 76F"]
        ).to_frame()
        deltas_76[project].columns = ["76"]
        deltas_low_76[project].columns = ["76"]
        deltas_high_76[project].columns = ["76"]

        if year == "2022":
            deltas_78[project] = reg_results["Delta CSP = 78F"].to_frame()
            deltas_low_78[project] = (
                reg_results["Delta CSP = 78F"] - reg_results["Delta Low CSP = 78F"]
            ).to_frame()
            deltas_high_78[project] = (
                reg_results["Delta High CSP = 78F"] - reg_results["Delta CSP = 78F"]
            ).to_frame()
            deltas_78[project].columns = ["78"]
            deltas_low_78[project].columns = ["78"]
            deltas_high_78[project].columns = ["78"]
    if year == "2022":
        return (
            deltas_76,
            deltas_low_76,
            deltas_high_76,
            deltas_78,
            deltas_low_78,
            deltas_high_78,
        )
    return (
        deltas_76,
        deltas_low_76,
        deltas_high_76,
    )

In [None]:
def get_2022_control_data(
    project,
    variable,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    no_weekends=True,
    only_business_hours=True,
    SI_units=True,
    resample_rule="1h",
    resample_statistic="Mean",
    remove_FCUs=False,
    clean_underyling_data=False,
):
    df = load_zones("2022", project, variable, clean_data=clean_underyling_data)

    df = cleaning.clean_df(
        df=df,
        this_var=variable,
        start_date=start_date,
        end_date=end_date,
        only_business_hours=only_business_hours,
        no_weekends=no_weekends,
        remove_FCUs=remove_FCUs,
        SI_units=SI_units,
        resample_rule=resample_rule,
        resample_statistic=resample_statistic,
    )

    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 76F", "CSP = 78F"],
        no_weekends=no_weekends,
    )
    df = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=no_weekends,
        hourly_filter_reverse=True,
    )
    return df

In [None]:
def add_vertical_boxes(fig, x_points, background_color="lightgray"):
    for i in range(len(x_points) - 1):
        fig.add_shape(
            type="rect",
            x0=x_points[i],
            x1=x_points[i + 1],
            y0=0,
            y1=1,
            xref="x",
            yref="paper",
            fillcolor=background_color
            if i % 2 == 0
            else "rgba(0, 0, 0, 0)",  # Alternates between gray and transparent
            line=dict(width=0),  # No border for the rectangles,
            layer="below",
            opacity=0.7,
        )
    return fig

In [None]:
def add_line_to_subplots(fig, x_range, y_range, total_subplots, dash="dash", width=4):
    """
    Add a line to each of the specified subplots in a Plotly figure.

    Parameters:
        fig: plotly.graph_objects.Figure
        x_range: (x0, x1) tuple
        y_range: (y0, y1) tuple
        total_subplots: number of subplots to add the line to (in order from top-left to bottom-right)
        dash: line dash style (default "dash")
        width: line width (default 4)
    """
    for i in range(1, total_subplots + 1):
        xref = "x" if i == 1 else f"x{i}"
        yref = "y" if i == 1 else f"y{i}"

        fig.add_shape(
            type="line",
            x0=x_range[0],
            y0=y_range[0],
            x1=x_range[1],
            y1=y_range[1],
            line=dict(color="black", dash=dash, width=width),
            xref=xref,
            yref=yref,
            layer="above",
        )

    return fig

## Create testbed table

In [None]:
testbed = pd.DataFrame(
    index=[
        "Lab Building",
        "1000m2",
        "Year of Construction",
        "Year of Last Retrofit",
        "# of AHUs",
        "# of VAVs + FCUs",
        "# of VAVs",
        "# of FCUs",
        "# of Excluded Zones",
        "% of Excluded Zones",
        "Summer Daily Energy Demand (kWh/m2/day)",
        "% Cooling Demand",
        "% Electric Demand",
        "% Heating Demand",
        "Total Experiment Weekdays",
        "# of 74F Weekdays",
        "# of 76F Weekdays",
        "# of 78F Weekdays",
    ],
    columns=PROJECTS_2022,
)
testbed["TOTAL"] = np.nan

### Lab building

In [None]:
testbed.loc["Lab Building", PROJECTS_2022] = "N"
testbed.loc["Lab Building", "LAB-1"] = "Y"
testbed.loc["Lab Building", "LAB-2"] = "Y"
testbed.loc["Lab Building", "LAB-3"] = "Y"

### m2

In [None]:
testbed.loc["1000m2", "OFF-1"] = 144982 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "OFF-2"] = 135556 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "OFF-3"] = 28420 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "OFF-4"] = 169619 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "OFF-5"] = 84501 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "OFF-6"] = 105465 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "OFF-7"] = 63439 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "LAB-1"] = 75081 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "LAB-2"] = 63670 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "LAB-3"] = 76462 * base.M2_PER_SF / 1000
testbed.loc["1000m2", "TOTAL"] = testbed.loc["1000m2", PROJECTS_2022].sum()

### Year of construction

In [None]:
testbed.loc["Year of Construction", "OFF-1"] = 2000
testbed.loc["Year of Construction", "OFF-2"] = 1996
testbed.loc["Year of Construction", "OFF-3"] = 1893
testbed.loc["Year of Construction", "OFF-4"] = 1996
testbed.loc["Year of Construction", "OFF-5"] = 2015
testbed.loc["Year of Construction", "OFF-6"] = 1998
testbed.loc["Year of Construction", "OFF-7"] = 1900
testbed.loc["Year of Construction", "LAB-1"] = 1965
testbed.loc["Year of Construction", "LAB-2"] = 1902
testbed.loc["Year of Construction", "LAB-3"] = 1963

### Year of last retrofit

In [None]:
testbed.loc["Year of Last Retrofit", "OFF-1"] = 2014
testbed.loc["Year of Last Retrofit", "OFF-2"] = 2021
testbed.loc["Year of Last Retrofit", "OFF-3"] = 2015
testbed.loc["Year of Last Retrofit", "OFF-4"] = 2015
testbed.loc["Year of Last Retrofit", "OFF-5"] = np.nan
testbed.loc["Year of Last Retrofit", "OFF-6"] = 2012
testbed.loc["Year of Last Retrofit", "OFF-7"] = 2017
testbed.loc["Year of Last Retrofit", "LAB-1"] = 2020
testbed.loc["Year of Last Retrofit", "LAB-2"] = 2016
testbed.loc["Year of Last Retrofit", "LAB-3"] = 2018

### Number of equipment

In [None]:
excluded = pd.read_csv(DATASETS_PATH / "csvs/2022_experiment_csvs/excluded_zones.csv")
for project in PROJECTS_2022:
    # ahus
    testbed.loc["# of AHUs", project] = len(AHUs[project])
    # vavs
    testbed.loc["# of VAVs", project] = len(ZONES_BY_TYPE[project]["VAVs"])
    # fcus
    testbed.loc["# of FCUs", project] = len(ZONES_BY_TYPE[project]["FCUs"])
    # excluded
    testbed.loc["# of Excluded Zones", project] = len(
        (EXCLUDED_ZONES[project][EXCLUDED_ZONES[project] == 1]).dropna()
    )

for row in ["# of VAVs", "# of FCUs", "# of AHUs", "# of Excluded Zones"]:
    testbed.loc[row, "TOTAL"] = testbed.loc[row, PROJECTS_2022].sum()

testbed.loc["# of VAVs + FCUs", :] = (
    testbed.loc["# of VAVs", :] + testbed.loc["# of FCUs", :]
)
testbed.loc["% of Excluded Zones", :] = (
    (testbed.loc["# of Excluded Zones", :] / testbed.loc["# of VAVs + FCUs", :])
    .astype(float)
    .round(2)
)

### Cooling

In [None]:
cooling = cleaning.clean_df(
    df=load_building("2023", "C"),
    this_var="building-cooling",
    start_date=pd.Timestamp("05-01-2023"),
    end_date=pd.Timestamp("10-01-2023"),
    only_business_hours=False,
    no_weekends=True,
)
cooling = cooling * base.MW_PER_TON * (10**3)
cooling = cooling[PROJECTS_2022]
cooling["TOTAL"] = cooling.sum(axis=1, skipna=True)
cooling = cooling.groupby(cooling.index.date).sum()
for project in list(cooling.columns):
    testbed.loc["Average Cooling Demand (kWh/m2/day)", project] = cooling[
        project
    ].mean() / (1000 * testbed.loc["1000m2", project])

### Heating

In [None]:
heating = cleaning.clean_df(
    df=load_building("2023", "H"),
    this_var="building-heating",
    start_date=pd.Timestamp("05-01-2023"),
    end_date=pd.Timestamp("10-01-2023"),
    only_business_hours=False,
    no_weekends=True,
)
heating = heating * base.WH_PER_BTU
heating = heating[PROJECTS_2022]
heating["TOTAL"] = heating.sum(axis=1, skipna=True)
heating = heating.groupby(heating.index.date).sum()
for project in list(heating.columns):
    testbed.loc["Average Heating Demand (kWh/m2/day)", project] = heating[
        project
    ].mean() / (1000 * testbed.loc["1000m2", project])

### Electricity

In [None]:
electricity = cleaning.clean_df(
    df=load_building("2023", "E"),
    this_var="building-electricity",
    start_date=pd.Timestamp("05-01-2023"),
    end_date=pd.Timestamp("10-01-2023"),
    only_business_hours=False,
    no_weekends=True,
)
electricity = electricity[PROJECTS_2022]
electricity["TOTAL"] = electricity.sum(axis=1, skipna=True)
electricity = electricity.groupby(electricity.index.date).sum()
for project in list(electricity.columns):
    testbed.loc["Average Electric Demand (kWh/m2/day)", project] = electricity[
        project
    ].mean() / (1000 * testbed.loc["1000m2", project])

### Total 

In [None]:
testbed.loc["Summer Daily Energy Demand (kWh/m2/day)", PROJECTS_2022] = (
    testbed.loc["Average Cooling Demand (kWh/m2/day)", PROJECTS_2022]
    + testbed.loc["Average Heating Demand (kWh/m2/day)", PROJECTS_2022]
    + testbed.loc["Average Electric Demand (kWh/m2/day)", PROJECTS_2022]
)
testbed.loc["Summer Daily Energy Demand (kWh/m2/day)", "TOTAL"] = (
    (
        testbed.loc["Summer Daily Energy Demand (kWh/m2/day)", PROJECTS_2022]
        * testbed.loc["1000m2", PROJECTS_2022]
        * 1000
    ).sum()
) / (testbed.loc["1000m2", PROJECTS_2022].sum() * 1000)

for utility in ["Cooling", "Heating", "Electric"]:
    testbed.loc[f"% {utility} Demand", :] = (
        (
            testbed.loc[f"Average {utility} Demand (kWh/m2/day)", :]
            / testbed.loc["Summer Daily Energy Demand (kWh/m2/day)", :]
        )
        .astype(float)
        .round(3)
    )
    testbed.drop(index=f"Average {utility} Demand (kWh/m2/day)", inplace=True)

### # of Day Types

In [None]:
schedule = pd.read_csv(
    DATASETS_PATH / "csvs/2022_experiment_csvs/sp_schedule_daily.csv"
).set_index("Unnamed: 0")
schedule.index = pd.to_datetime(schedule.index)
for project in PROJECTS_2022:
    ser = schedule[project]
    for sp in [74, 76, 78]:
        this_ser = ser[ser == sp]
        this_ser = this_ser[this_ser.index.dayofweek < 5]
        testbed.loc[f"# of {sp}F Weekdays", project] = len(this_ser)
        testbed.loc[f"# of {sp}F Weekdays", "TOTAL"] = testbed.loc[
            f"# of {sp}F Weekdays", PROJECTS_2022
        ].sum()

testbed.loc["Total Experiment Weekdays", :] = testbed.loc[
    ["# of 74F Weekdays", "# of 76F Weekdays", "# of 78F Weekdays"], :
].sum()

In [None]:
# testbed

## Count rogue zones

In [None]:
CRs = pull_from_dataset(
    "2022",
    PROJECTS_2022,
    "zone-simple_cooling_requests",
)
CRs = cleaning.clean_dfs(
    dfs=CRs,
    this_var="zone-dummy",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=True,
    resample_rule="1h",
)

CRs_74 = {}
for project in PROJECTS_2022:
    df = CRs[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    CRs_74[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

CRs_74_mean = base.run_passive_test_on_dfs(
    dfs=CRs_74, this_test="Mean", col_name="Time Sending CR"
)

In [None]:
fig = viz.make_dot_plot(
    y_data=CRs_74_mean,
    y_axis_title="Fraction Time<br>Sending Estimated CR",
    vertical_spacing=0.1,
    y_range=[0, 1],
)

In [None]:
# fig

In [None]:
rogue = pd.DataFrame(index=list(CRs_74_mean.keys()), columns=["Rogue", "Total"])
for project in rogue.index:
    rogue.loc[project, "Rogue"] = len(
        CRs_74_mean[project][CRs_74_mean[project] >= 0.7].dropna()
    )
    rogue.loc[project, "Total"] = len(CRs_74_mean[project])

In [None]:
rogue.loc["Total", :] = np.nan
rogue.loc["Total", "Rogue"] = rogue["Rogue"].sum()
rogue.loc["Total", "Total"] = rogue["Total"].sum()

In [None]:
100 * (rogue["Rogue"] / rogue["Total"])

## Min airflow

In [None]:
these_projects = copy.deepcopy(PROJECTS_VAV)
these_projects.remove("LAB-2")

In [None]:
min_airflow = pd.read_csv(DATASETS_PATH / "csvs/min_airflow_2023.csv").drop(
    columns=["Unnamed: 0"]
)

In [None]:
min_airflow_dict = {}
for project in these_projects:
    this_min_airflow = (
        min_airflow[min_airflow["Building"] == project]
        .set_index("VAV")["Minimum Airflow (cfm)"]
        .to_frame()
    )
    this_min_airflow[this_min_airflow < cleaning.LOWER_LIMIT["zone-airflow"]] = np.nan
    this_min_airflow[this_min_airflow > cleaning.UPPER_LIMIT["zone-airflow"]] = np.nan
    min_airflow_dict[project] = this_min_airflow

In [None]:
fig = viz.make_dot_plot(
    y_data=min_airflow_dict,
    y_axis_title="Min Airflow (cfm)",
    vertical_spacing=0.1,
    force_same_yaxes=True,
)

In [None]:
# fig

In [None]:
min_airflow_summary = pd.Series(index=these_projects)
for project in these_projects:
    min_airflow_summary[project] = min_airflow_dict[project].sum()[0] / (
        testbed.loc["1000m2", project] * 1000 / base.M2_PER_SF
    )

In [None]:
# min_airflow_summary.to_frame()

## Building wide cooling

In [None]:
cooling_df = cleaning.clean_by_column(
    df=load_building("2022", "C")[PROJECTS_2022] * base.MW_PER_TON * 1000,  # kWh
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
)
cooling_df = cleaning.clean_df(
    df=cooling_df,
    this_var="building-cooling",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=False,
)
cooling = {}
for project in PROJECTS_2022:
    cooling[project] = cooling_df[project].to_frame() / testbed.loc["1000m2", project]
for project in PROJECTS_2022:
    for day in ["06-23-2022"]:
        cooling[project].loc[day, :] = np.nan

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=False,
    SI_units=True,
)["temperature"]

In [None]:
(
    percent_deltas_cooling,
    percent_deltas_high_cooling,
    percent_deltas_low_cooling,
    percent_summary,
    fig,
) = run_building_regressions(
    cooling,
    T,
    mode="Percent Change",
    summary_statistic="Mean",
    y_axis_title="Daytime Average<br>Cooling Demand (kW/1000m2)",
    use_raw=False,
)

In [None]:
fig = fig.update_layout(
    legend=dict(
        x=0.5, y=-0.05, xanchor="center", yanchor="top", orientation="h", font_size=30
    ),
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/regression.png")

In [None]:
percent_summary.columns = percent_summary.columns.str.upper()
# percent_summary

In [None]:
(
    absolute_deltas_cooling,
    absolute_deltas_high_cooling,
    absolute_deltas_low_cooling,
    summary,
    fig,
) = run_building_regressions(
    cooling,
    T,
    mode="Absolute Change",
    summary_statistic="Mean",
    y_axis_title="Average Cooling Demand<br>(kW/1000m2)",
    use_raw=False,
)

In [None]:
# fig

In [None]:
fig = viz.plot_experiment_summary(
    y_data=percent_deltas_cooling["78"].to_frame(),
    y_error_up_data=percent_deltas_high_cooling["78"].to_frame(),
    y_error_down_data=percent_deltas_low_cooling["78"].to_frame(),
    marker_legend={"color": {"78": "Black"}},
    y_axis_title="Percent Change in<br>Cooling Demand (%)",
    point_start=(1 / 2),
    offset_delta=(1 / 2),
    tick_vals=[i + 0.5 for i in range(len(PROJECTS_2022))],
    width=1200,
    height=600,
    text_size=22,
    x_range=[-0.25, len(PROJECTS_2022)],
    y_range=[-70, 10],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
    dont_add_to_legend=["78"],
)
fig = add_vertical_boxes(
    fig, list(range(len(PROJECTS_2022) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
percent_deltas_cooling["78"]

In [None]:
# fig.write_image(f"{IMAGE_PATH}/delta_cooling_percent.png")

In [None]:
fig = viz.plot_experiment_summary(
    y_data=absolute_deltas_cooling["78"].to_frame(),
    y_error_up_data=absolute_deltas_high_cooling["78"].to_frame(),
    y_error_down_data=absolute_deltas_low_cooling["78"].to_frame(),
    marker_legend={"color": {"78": "Black"}},
    y_axis_title="Absolute Change in<br>Cooling Demand (kW/1000m2)",
    point_start=(1 / 2),
    offset_delta=(1 / 2),
    tick_vals=[i + 0.5 for i in range(len(PROJECTS_2022))],
    width=1200,
    height=600,
    text_size=22,
    x_range=[-0.25, len(PROJECTS_2022)],
    y_range=[-14, 2],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
    dont_add_to_legend=["78"],
)
fig = add_vertical_boxes(
    fig, list(range(len(PROJECTS_2022) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# absolute_deltas_cooling["78"]

In [None]:
# fig.write_image(f"{IMAGE_PATH}/delta_cooling_absolute.png")

### Check regression

In [None]:
project = "OFF-2"

In [None]:
df = cooling[project]

In [None]:
binary_df = regression_functions.get_2021_2022_binary_df(
    project=project,
    experiment_year="2022",
    freq="daily",
    baseline_column="CSP = 74F",
    drop_baseline_column=True,
    no_weekends=NO_WEEKENDS[project],
    control_for_weekends=CONTROL_FOR_WEEKENDS[project],
)

reg_results = regression_functions.general_Delta_fn(
    df=df,
    T=T,
    binary=binary_df,
    mode="Absolute Change",
    summary_statistic="Mean",
)

In [None]:
fig = viz.plot_experiment_regression(
    experiment_results=reg_results,
    df=df,
    T=T,
    binary=binary_df,
    line_legend={
        "name": {
            "Control": "CSP = 23.3C",
            "CSP = 76F": "CSP = 24.4C",
            "CSP = 78F": "CSP = 25.5C",
        },
        "color": {
            "Control": "RoyalBlue",
            "CSP = 76F": "DarkOrange",
            "CSP = 78F": "Firebrick",
        },
    },
    mode="Absolute Change",
    summary_statistic="Mean",
    marker_size=10,
    line_width=2.5,
    y_axis_title="Daily Average<br>Cooling Load (kW)",
    x_axis_title="Average Daytime OAT (C)",
    height=550,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.225,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

new_titles = ["Building Level"]
for i, annotation in enumerate(fig.layout.annotations):
    annotation.text = new_titles[i]

In [None]:
# fig

## MAT - SAT

In [None]:
these_projects = list(set(PROJECTS_2022).intersection(set(PROJECTS_VAV)))
these_projects.remove("LAB-2")
these_projects.sort()

In [None]:
dat = pull_from_dataset("2022", these_projects, "ahu-dat")
dat = cleaning.clean_dfs(
    dfs=dat,
    this_var="ahu-dat",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=True,
    resample_rule="1h",
)
mat = pull_from_dataset("2022", these_projects, "ahu-mat")
mat = cleaning.clean_dfs(
    dfs=mat,
    this_var="ahu-mat",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=True,
    resample_rule="1h",
)
mat_dat = {}
for project in these_projects:
    mat_dat[project] = mat[project] - dat[project]

airflow = pull_from_dataset("2022", these_projects, "ahu-airflow")
airflow = cleaning.clean_dfs(
    dfs=airflow,
    this_var="ahu-airflow",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=True,
    resample_rule="1h",
)
mat = base.calculate_airflow_weighted_average(mat, airflow)
dat = base.calculate_airflow_weighted_average(dat, airflow)
mat_dat = base.calculate_airflow_weighted_average(mat_dat, airflow)

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=False,
    SI_units=True,
)["temperature"]

In [None]:
(
    percent_deltas,
    percent_deltas_high,
    percent_deltas_low,
    percent_summary,
    fig,
) = run_building_regressions(
    mat_dat,
    T,
    mode="Percent Change",
    summary_statistic="Mean",
    y_axis_title="Average MAT - SAT (C)",
    use_raw=False,
)

In [None]:
# percent_deltas

In [None]:
fig = viz.plot_experiment_summary(
    y_data=percent_deltas["78"].to_frame(),
    y_error_up_data=percent_deltas_high["78"].to_frame(),
    y_error_down_data=percent_deltas_high["78"].to_frame(),
    marker_legend={"color": {"78": "Black"}},
    y_axis_title="Percent Change in MAT - SAT (%)",
    point_start=(1 / 2),
    offset_delta=(1 / 2),
    tick_vals=[i + 0.5 for i in range(len(these_projects))],
    width=1200,
    height=600,
    text_size=22,
    x_range=[-0.25, len(these_projects)],
    y_range=[-70, 20],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
    dont_add_to_legend=["78"],
)
fig = add_vertical_boxes(
    fig, list(range(len(these_projects) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/delta_mat_sat_percent.png")

In [None]:
(
    absolute_deltas_mat_dat,
    absolute_deltas_high_mat_dat,
    absolute_deltas_low_mat_dat,
    absolute_summary_mat_dat,
    fig,
) = run_building_regressions(
    mat_dat,
    T,
    mode="Absolute Change",
    summary_statistic="Mean",
    y_axis_title="Average MAT - SAT (C)",
    use_raw=False,
)

(
    absolute_deltas_dat,
    absolute_deltas_high_dat,
    absolute_deltas_low_dat,
    absolute_summary_dat,
    fig,
) = run_building_regressions(
    dat,
    T,
    mode="Absolute Change",
    summary_statistic="Mean",
    y_axis_title="Average SAT (C)",
    use_raw=False,
)

(
    absolute_deltas_mat,
    absolute_deltas_high_mat,
    absolute_deltas_low_mat,
    absolute_summary_mat,
    fig,
) = run_building_regressions(
    mat,
    T,
    mode="Absolute Change",
    summary_statistic="Mean",
    y_axis_title="Average MAT (C)",
    use_raw=False,
)

In [None]:
deltas = pd.DataFrame(
    index=absolute_deltas_mat_dat.index,
    columns=["MAT - SAT (C)", "SAT (C)", "MAT (C)"],
)
deltas["MAT - SAT (C)"] = absolute_deltas_mat_dat["78"]
deltas["SAT (C)"] = absolute_deltas_dat["78"]
deltas["MAT (C)"] = absolute_deltas_mat["78"]

deltas_high = pd.DataFrame(
    index=absolute_deltas_mat_dat.index,
    columns=["MAT - SAT (C)", "SAT (C)", "MAT (C)"],
)
deltas_high["MAT - SAT (C)"] = absolute_deltas_high_mat_dat["78"]
deltas_high["SAT (C)"] = absolute_deltas_high_dat["78"]
deltas_high["MAT (C)"] = absolute_deltas_high_mat["78"]

deltas_low = pd.DataFrame(
    index=absolute_deltas_mat_dat.index,
    columns=["MAT - SAT (C)", "SAT (C)", "MAT (C)"],
)
deltas_low["MAT - SAT (C)"] = absolute_deltas_low_mat_dat["78"]
deltas_low["SAT (C)"] = absolute_deltas_low_dat["78"]
deltas_low["MAT (C)"] = absolute_deltas_low_mat["78"]

In [None]:
fig = viz.plot_experiment_summary(
    y_data=deltas,
    y_error_up_data=deltas_high,
    y_error_down_data=deltas_low,
    marker_legend={
        "color": {
            "MAT - SAT (C)": "Black",
            "SAT (C)": "Coral",
            "MAT (C)": "Orchid",
        }
    },
    y_axis_title="Absolute Change in Temperature (C)",
    point_start=(1 / 4),
    offset_delta=(1 / 4),
    tick_vals=[i + 0.5 for i in range(len(these_projects))],
    width=1200,
    height=600,
    text_size=22,
    x_range=[-0.25, len(these_projects)],
    y_range=[-6, 6],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
)
fig = add_vertical_boxes(
    fig, list(range(len(these_projects) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# deltas

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/delta_mat_sat_absolute.png")

### Check regression


In [None]:
project = "OFF-2"

In [None]:
df = mat_dat[project]

In [None]:
binary_df = regression_functions.get_2021_2022_binary_df(
    project=project,
    experiment_year="2022",
    freq="daily",
    baseline_column="CSP = 74F",
    drop_baseline_column=True,
    no_weekends=NO_WEEKENDS[project],
    control_for_weekends=CONTROL_FOR_WEEKENDS[project],
)

reg_results = regression_functions.general_Delta_fn(
    df=df,
    T=T,
    binary=binary_df,
    mode="Absolute Change",
    summary_statistic="Mean",
)

In [None]:
fig = viz.plot_experiment_regression(
    experiment_results=reg_results,
    df=df,
    T=T,
    binary=binary_df,
    line_legend={
        "name": {
            "Control": "CSP = 23.3C",
            "CSP = 76F": "CSP = 24.4C",
            "CSP = 78F": "CSP = 25.5C",
        },
        "color": {
            "Control": "RoyalBlue",
            "CSP = 76F": "DarkOrange",
            "CSP = 78F": "Firebrick",
        },
    },
    mode="Absolute Change",
    summary_statistic="Mean",
    marker_size=10,
    line_width=2.5,
    y_axis_title="Daily Average<br>MAT - SAT (C)",
    x_axis_title="Average Daytime OAT (C)",
    height=550,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.225,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

new_titles = ["AHU Level"]
for i, annotation in enumerate(fig.layout.annotations):
    annotation.text = new_titles[i]

In [None]:
# fig

## Airflow 

In [None]:
these_projects = list(set(PROJECTS_2022).intersection(set(PROJECTS_VAV)))
these_projects.remove("LAB-2")
these_projects.sort()

In [None]:
airflow = pull_from_dataset("2022", these_projects, "zone-airflowsp")
airflow = cleaning.clean_dfs(
    dfs=airflow,
    this_var="zone-airflowsp",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=True,
    resample_rule="1h",
)
for project in these_projects:
    airflow[project] = airflow[project].sum(axis=1).to_frame()
    airflow[project].columns = [project]

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=False,
    SI_units=True,
)["temperature"]

In [None]:
(
    percent_deltas,
    percent_deltas_high,
    percent_deltas_low,
    percent_airflow,
    fig,
) = run_building_regressions(
    airflow,
    T,
    mode="Percent Change",
    summary_statistic="Mean",
    y_axis_title="Average Airflow (m3/hr)",
    use_raw=False,
)

In [None]:
# fig

In [None]:
fig = viz.plot_experiment_summary(
    y_data=percent_deltas["78"].to_frame(),
    y_error_up_data=percent_deltas_high["78"].to_frame(),
    y_error_down_data=percent_deltas_low["78"].to_frame(),
    y_axis_title="Percent Change in Airflow (%)",
    marker_legend={"color": {"78": "Black"}},
    point_start=(1 / 2),
    offset_delta=(1 / 2),
    tick_vals=[i + 0.5 for i in range(len(these_projects))],
    width=1200,
    height=600,
    text_size=22,
    x_range=[-0.25, len(these_projects)],
    y_range=[-70, 10],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
    dont_add_to_legend=["78"],
)
fig = add_vertical_boxes(
    fig, list(range(len(these_projects) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# percent_deltas

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/delta_airflow_percent.png")

## Determine dominant zones

In [None]:
SLICE1 = 10  # delta tload 10% or more
SLICE2 = -10  # delta tload -10% or less

HIGH_THRESH = 30  # control tload 30% or more
LOW_THRESH = -10  # control tload -10% or less

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    only_business_hours=True,
    no_weekends=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
)["temperature"]

In [None]:
tloads = pull_from_dataset("2022", PROJECTS_2022, "zone-tloads")
tloads = cleaning.clean_dfs(
    dfs=tloads,
    this_var="zone-tloads",
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    remove_FCUs=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    resample_rule="1h",
)

In [None]:
(
    deltas_76_tloads_2022,
    deltas_low_76_tloads_2022,
    deltas_high_76_tloads_2022,
    deltas_78_tloads,
    deltas_low_78_tloads,
    deltas_high_78_tloads,
) = run_equip_regressions(
    tloads,
    T,
    "Absolute Change",
)

In [None]:
dominant_zones = clustering.run_1D_clustering_on_dict(
    deltas_78_tloads, slices=[SLICE1, SLICE2], mapping={2: 0, 1: 1, 0: 3}
)

## Corrections based on control days

In [None]:
tloads = pull_from_dataset("2022", PROJECTS_2022, "zone-tloads")

tloads = cleaning.clean_dfs(
    dfs=tloads,
    this_var="zone-tloads",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=True,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    resample_rule="1h",
)

control_tloads = {}
for project in PROJECTS_2022:
    df = tloads[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    control_tloads[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

control_tloads = base.run_passive_test_on_dfs(
    dfs=control_tloads,
    this_test="Mean",
    col_name="Average Zonal Load (%)<br>Control Days",
)

In [None]:
for project in PROJECTS_2022:
    # small change in tload, but remained high
    these_dzs = dominant_zones[project].iloc[:, 0]
    these_dzs = list(these_dzs[these_dzs == 1].index)
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control >= HIGH_THRESH].index)
    correct = list(set(these_dzs).intersection(set(these_control)))
    dominant_zones[project].loc[correct, :] = 2

for project in PROJECTS_2022:
    # in heating
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control <= LOW_THRESH].index)
    dominant_zones[project].loc[these_control, :] = 4  # np.nan

In [None]:
for project in PROJECTS_2022:
    dominant_zones[project] = dominant_zones[project].dropna()

## Detailed plot 1

In [None]:
(
    this_deltas_76_tloads_2022,
    this_deltas_high_76_tloads_2022,
    this_deltas_low_76_tloads_2022,
    this_deltas_78_tloads,
    this_deltas_high_78_tloads,
    this_deltas_low_78_tloads,
    this_control_tloads,
) = base.make_common_index(
    [
        copy.deepcopy(deltas_76_tloads_2022),
        copy.deepcopy(deltas_low_76_tloads_2022),
        copy.deepcopy(deltas_high_76_tloads_2022),
        copy.deepcopy(deltas_78_tloads),
        copy.deepcopy(deltas_high_78_tloads),
        copy.deepcopy(deltas_low_78_tloads),
        copy.deepcopy(control_tloads),
    ]
)

In [None]:
fig = viz.make_scatter_plot(
    y_data=this_deltas_78_tloads,
    y_error_up_data=this_deltas_high_78_tloads,
    y_error_down_data=this_deltas_low_78_tloads,
    y_axis_title="Absolute Change<br>Zonal Load (%)",
    x_data=this_control_tloads,
    x_axis_title="Zonal Load (%) Control Days",
    color_data=dominant_zones,
    color_legend={
        "name": {
            0: f"Reduced zonal load {abs(SLICE1)}% or more",
            1: "Small change zonal load",
            2: "Small change zonal load (remained high)",
            3: f"Increased zonal load {abs(SLICE2)}% or more",
            4: "Typically in heating",
        },
        "color": {
            0: "ForestGreen",
            1: "RoyalBlue",
            2: "DarkOrange",
            3: "Firebrick",
            4: "Gray",
        },
    },
    shape_data=EXCLUDED_ZONES,
    shape_legend={
        "name": {
            0: "Included",
            1: "Excluded",
        },
        "shape": {
            0: "circle",
            1: "x",
        },
    },
    num_cols=3,
    horizontal_spacing=0.1,
    vertical_spacing=0.075,
    height=500,
    width=800,
    y_range=[-110, 110],
    x_range=[-110, 110],
    title_size=34,
    text_size=26,
    legend_size=30,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.05,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)
p = 0
for i in range(1, 4 + 1):  # rows
    for j in range(1, 3 + 1):  # cols
        if i == 4 and j > 1:
            continue
        project = list(deltas_78_tloads.keys())[p]
        for slice in [SLICE1, SLICE2]:
            fig.add_shape(
                type="line",
                x0=-110,
                x1=110,
                y0=slice,
                y1=slice,
                line=dict(color="Black", width=3, dash="solid"),
                row=i,
                col=j,
            )
            fig.add_shape(
                type="line",
                x0=LOW_THRESH,
                x1=LOW_THRESH,
                y0=-110,
                y1=110,
                line=dict(color="Black", width=3, dash="solid"),
                row=i,
                col=j,
            )
        fig.add_shape(
            type="line",
            x0=HIGH_THRESH,
            x1=HIGH_THRESH,
            y0=SLICE1,
            y1=SLICE2,
            line=dict(color="Black", width=3, dash="solid"),
            row=i,
            col=j,
        )
        p += 1

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/dzs_detailed.png")

### Check regression

In [None]:
project = "OFF-2"
zone = "VAV 3-184"

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    only_business_hours=True,
    no_weekends=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
)["temperature"]

In [None]:
tloads = load_zones("2022", project, "zone-tloads")
tloads = cleaning.clean_df(
    df=tloads,
    this_var="zone-tloads",
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    remove_FCUs=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    resample_rule="1h",
)

In [None]:
df = tloads[zone].to_frame()

binary_df = regression_functions.get_2021_2022_binary_df(
    project=project,
    experiment_year="2022",
    freq="daily",
    baseline_column="CSP = 74F",
    drop_baseline_column=True,
    no_weekends=NO_WEEKENDS[project],
    control_for_weekends=CONTROL_FOR_WEEKENDS[project],
    zone=zone,
)

reg_results = regression_functions.general_Delta_fn(
    df=df,
    T=T,
    binary=binary_df,
    mode="Absolute Change",
    summary_statistic="Mean",
)

In [None]:
fig = viz.plot_experiment_regression(
    experiment_results=reg_results,
    df=df,
    T=T,
    binary=binary_df,
    line_legend={
        "name": {
            "Control": "CSP = 23.3C",
            "CSP = 76F": "CSP = 24.4C",
            "CSP = 78F": "CSP = 25.5C",
        },
        "color": {
            "Control": "RoyalBlue",
            "CSP = 76F": "DarkOrange",
            "CSP = 78F": "Firebrick",
        },
    },
    mode="Absolute Change",
    summary_statistic="Mean",
    marker_size=10,
    line_width=2.5,
    y_axis_title="Daily Average<br>Zonal Load (%)",
    x_axis_title="Average Daytime OAT (C)",
    height=550,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.225,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

new_titles = ["Zonal Level"]
for i, annotation in enumerate(fig.layout.annotations):
    annotation.text = new_titles[i]

In [None]:
# fig

## Detailed plot 2

In [None]:
fig = viz.make_dot_plot(
    y_data=deltas_78_tloads,
    y_error_up_data=deltas_high_78_tloads,
    y_error_down_data=deltas_low_78_tloads,
    y_axis_title="Absolute Change in<br>Zonal Load (%)",
    x_axis_title="Rooms",
    color_data=dominant_zones,
    shape_data=EXCLUDED_ZONES,
    color_legend={
        "name": {
            0: f"Reduced zonal load {abs(SLICE1)}% or more",
            1: "Small change zonal load",
            2: "Small change zonal load (remained high)",
            3: f"Increased zonal load {abs(SLICE2)}% or more",
            4: "Typically in heating",
        },
        "color": {
            0: "ForestGreen",
            1: "RoyalBlue",
            2: "DarkOrange",
            3: "Firebrick",
            4: "Gray",
        },
    },
    shape_legend={
        "name": {
            0: "Included",
            1: "Excluded",
        },
        "shape": {
            0: "circle",
            1: "cross",
        },
    },
    num_cols=3,
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
    # y_range=[-50, 50]
)

fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

p = 0
for i in range(1, 3 + 1):  # rows
    for j in range(1, 3 + 1):  # cols
        if i == 4 and j > 1:
            continue
        project = list(deltas_78_tloads.keys())[p]
        for slice in [SLICE1, SLICE2]:
            fig.add_shape(
                type="line",
                x0=0,
                x1=len(deltas_78_tloads[project]),
                y0=slice,
                y1=slice,
                line=dict(color="Black", width=3.5),
                row=i,
                col=j,
            )
        p += 1

In [None]:
# fig

## Summary

In [None]:
categories = [
    f"Reduced zonal load {abs(SLICE1)}% or more",
    "Small change zonal load",
    "Small change zonal load (remained high)",
    f"Increased zonal load {abs(SLICE2)}% or more",
    "Typically in heating",
]

excluded_labels = ["included", "excluded"]

labels = []
for ex in excluded_labels:
    for cat in categories:
        labels.append(f"{cat} ({ex})")

projects_total = copy.deepcopy(PROJECTS_2022)
projects_total.append("TOTAL")

tot_zones = 0

dz_summary = pd.DataFrame(0, index=projects_total, columns=labels)

In [None]:
for project in PROJECTS_2022:
    these_dominant_zones = dominant_zones[project].iloc[:, 0]
    these_excluded_zones = EXCLUDED_ZONES[project].iloc[:, 0]
    common = list(
        set(list(these_dominant_zones.index)).intersection(
            set(list(these_excluded_zones.index))
        )
    )
    these_dominant_zones = these_dominant_zones[common]
    these_excluded_zones = these_excluded_zones[common]
    for i in range(len(categories)):
        group = list((these_dominant_zones[these_dominant_zones == i]).index)
        for j in range(len(excluded_labels)):
            criticality = list((these_excluded_zones[these_excluded_zones == j]).index)
            these_zones = list(set(group).intersection(set(criticality)))
            dz_summary.loc[project, f"{categories[i]} ({excluded_labels[j]})"] = len(
                these_zones
            ) / len(these_dominant_zones)
            dz_summary.loc["TOTAL", f"{categories[i]} ({excluded_labels[j]})"] += len(
                these_zones
            )
    tot_zones += len(these_dominant_zones)

dz_summary.loc["TOTAL", :] = dz_summary.loc["TOTAL", :] / tot_zones

In [None]:
# dz_summary

In [None]:
fig = viz.make_bar_plot(
    y_data=dz_summary.loc[PROJECTS_2022, :],
    bar_legend={
        "color": {
            f"Reduced zonal load {abs(SLICE1)}% or more (included)": "ForestGreen",
            "Small change zonal load (included)": "RoyalBlue",
            "Small change zonal load (remained high) (included)": "DarkOrange",
            f"Increased zonal load {abs(SLICE2)}% or more (included)": "Firebrick",
            "Typically in heating (included)": "Gray",
            f"Reduced zonal load {abs(SLICE1)}% or more (excluded)": "ForestGreen",
            "Small change zonal load (excluded)": "RoyalBlue",
            "Small change zonal load (remained high) (excluded)": "DarkOrange",
            f"Increased zonal load {abs(SLICE2)}% or more (excluded)": "Firebrick",
            "Typically in heating (excluded)": "Gray",
        },
        "name": {
            f"Reduced zonal load {abs(SLICE1)}% or more (included)": f"Reduced zonal load {abs(SLICE1)}% or more",
            "Small change zonal load (included)": "Small change zonal load",
            "Small change zonal load (remained high) (included)": "Small change zonal load (remained high)",
            f"Increased zonal load {abs(SLICE2)}% or more (included)": f"Increased zonal load {abs(SLICE2)}% or more",
            "Typically in heating (included)": "Typically in heating",
            f"Reduced zonal load {abs(SLICE1)}% or more (excluded)": f"Reduced zonal load {abs(SLICE1)}% or more (excluded)",
            "Small change zonal load (excluded)": "Small change zonal load (excluded)",
            "Small change zonal load (remained high) (excluded)": "Small change zonal load (remained high) (excluded)",
            f"Increased zonal load {abs(SLICE2)}% or more (excluded)": f"Increased zonal load {abs(SLICE2)}% or more (excluded)",
            "Typically in heating (excluded)": "Typically in heating (excluded)",
        },
        "opacity": {
            f"Reduced zonal load {abs(SLICE1)}% or more (included)": 1,
            "Small change zonal load (included)": 1,
            "Small change zonal load (remained high) (included)": 1,
            f"Increased zonal load {abs(SLICE2)}% or more (included)": 1,
            "Typically in heating (included)": 1,
            f"Reduced zonal load {abs(SLICE1)}% or more (excluded)": 0.75,
            "Small change zonal load (excluded)": 0.75,
            "Small change zonal load (remained high) (excluded)": 0.75,
            f"Increased zonal load {abs(SLICE2)}% or more (excluded)": 0.75,
            "Typically in heating (excluded)": 0.75,
        },
        "pattern": {
            f"Reduced zonal load {abs(SLICE1)}% or more (included)": "",
            "Small change zonal load (included)": "",
            "Small change zonal load (remained high) (included)": "",
            f"Increased zonal load {abs(SLICE2)}% or more (included)": "",
            "Typically in heating (included)": "",
            f"Reduced zonal load {abs(SLICE1)}% or more (excluded)": "/",
            "Small change zonal load (excluded)": "/",
            "Small change zonal load (remained high) (excluded)": "/",
            f"Increased zonal load {abs(SLICE2)}% or more (excluded)": "/",
            "Typically in heating (excluded)": "/",
        },
    },
    pattern_legend={"Excluded": ("/", 0.5)},
    dont_add_to_legend=[
        f"Reduced zonal load {abs(SLICE1)}% or more (excluded)",
        "Small change zonal load (excluded)",
        "Small change zonal load (remained high) (excluded)",
        f"Increased zonal load {abs(SLICE2)}% or more (excluded)",
        "Typically in heating (excluded)",
    ],
    y_axis_title="Fraction of Zones",
    y_range=[0, 1],
    annotations=[
        f"Reduced zonal load {abs(SLICE1)}% or more (included)",
        "Small change zonal load (included)",
    ],
    annotation_thresh=0.05,
    bar_width=0.75,
    bar_mode="stack",
    tick_vals=[i + 0.5 for i in range(len(PROJECTS_2022))],
    text_size=22,
    width=1200,
    height=600,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5, y=-0.1, xanchor="center", yanchor="top", orientation="h", font_size=20
    ),
    xaxis=dict(range=[-0.25, len(PROJECTS_2022)]),
)

In [None]:
# dz_summary * 100

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/dominant_zones.png")

## Deviation with dominant zones

In [None]:
deviation = pull_from_dataset("2022", PROJECTS_2022, "zone-deviation_coolsp")

deviation = cleaning.clean_dfs(
    dfs=deviation,
    this_var="zone-deviation_coolsp",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=True,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
    resample_rule="1h",
)

control_deviation = {}
for project in PROJECTS_2022:
    df = deviation[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    control_deviation[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
control_deviation_mean = base.run_passive_test_on_dfs(
    dfs=control_deviation, this_test="Mean", col_name="T - CSP (C)<br>Control Days"
)
control_deviation_std = base.run_passive_test_on_dfs(
    dfs=control_deviation, this_test="Std", col_name="T - CSP (C)<br>Control Days"
)

In [None]:
dzs = copy.deepcopy(dominant_zones)
for project in PROJECTS_2022:
    # dzs[project] = dzs[project].dropna()
    control_deviation_mean[project] = control_deviation_mean[project].dropna()
    control_deviation_std[project] = control_deviation_std[project].dropna()
control_deviation_mean, control_deviation_std, dzs = base.make_common_index(
    [control_deviation_mean, control_deviation_std, dzs]
)

In [None]:
fig = viz.make_dot_plot(
    y_data=control_deviation_mean,
    y_error_up_data=control_deviation_std,
    y_error_down_data=control_deviation_std,
    y_axis_title="T - Effective CSP (C)<br>Control Days",
    x_axis_title="Fraction of Zones",
    color_data=dzs,
    color_legend={
        "name": {
            0: f"Reduced zonal load {abs(SLICE1)}% or more",
            1: "Small change zonal load",
            2: "Small change zonal load (remained high)",
            3: f"Increased zonal load {abs(SLICE2)}% or more",
            4: "Typically in heating",
        },
        "color": {
            0: "ForestGreen",
            1: "RoyalBlue",
            2: "DarkOrange",
            3: "Firebrick",
            4: "Gray",
        },
    },
    normalize_x=True,
    num_cols=3,
    horizontal_spacing=0.1,
    vertical_spacing=0.075,
    marker_size=10,
    title_size=38,
    text_size=30,
    legend_size=38,
    y_range=[-5, 5],  # [-8, 8],
    width=1000,
    height=600,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.05,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
summary = pd.DataFrame(index=PROJECTS_2022, columns=["Mean", "Std"])
for project in PROJECTS_2022:
    summary.loc[project, "Mean"] = control_deviation[project].mean(axis=1).mean()
    summary.loc[project, "Std"] = control_deviation[project].mean(axis=1).std()

In [None]:
p = 0
for i in range(1, 4):  # Columns
    for j in range(1, 5):  # Rows
        if p > 9:
            continue
        project = PROJECTS_2022[p]
        fig.add_annotation(
            text=f"Building Wide T - CSP: {round(summary.loc[project, 'Mean'], 1)} +/- {round(summary.loc[project, 'Std'], 1)} C",
            xref=f"x{p+1}",
            yref=f"y{p+1}",
            x=0.02,
            y=3,
            xanchor="left",
            yanchor="top",
            showarrow=False,
            font=dict(size=30, color="black"),
        )
        p += 1

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/deviation.png")

## Tloads with dominant zones

In [None]:
tloads = pull_from_dataset("2022", PROJECTS_2022, "zone-tloads")

tloads = cleaning.clean_dfs(
    dfs=tloads,
    this_var="zone-tloads",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=True,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    resample_rule="1h",
)

control_tloads = {}
for project in PROJECTS_2022:
    df = tloads[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    control_tloads[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
control_tloads_mean = base.run_passive_test_on_dfs(
    dfs=control_tloads, this_test="Mean", col_name="Zonal Load (%)<br>Control Days"
)
control_tloads_std = base.run_passive_test_on_dfs(
    dfs=control_tloads, this_test="Std", col_name="Zonal Load (%)<br>Control Days"
)

In [None]:
dzs = copy.deepcopy(dominant_zones)
for project in PROJECTS_2022:
    # dzs[project] = dzs[project].dropna()
    control_tloads_mean[project] = control_tloads_mean[project].dropna()
    control_tloads_std[project] = control_tloads_std[project].dropna()
control_tloads_mean, control_tloads_std, dzs = base.make_common_index(
    [control_tloads_mean, control_tloads_std, dzs]
)

In [None]:
fig = viz.make_dot_plot(
    y_data=control_tloads_mean,
    y_error_up_data=control_tloads_std,
    y_error_down_data=control_tloads_std,
    y_axis_title="Zonal Load (%)<br>Control Days",
    x_axis_title="Fraction of Zones",
    color_data=dzs,
    color_legend={
        "name": {
            0: f"Reduced zonal load {abs(SLICE1)}% or more",
            1: "Small change zonal load",
            2: "Small change zonal load (remained high)",
            3: f"Increased zonal load {abs(SLICE2)}% or more",
            4: "Typically in heating",
        },
        "color": {
            0: "ForestGreen",
            1: "RoyalBlue",
            2: "DarkOrange",
            3: "Firebrick",
            4: "Gray",
        },
    },
    normalize_x=True,
    num_cols=3,
    horizontal_spacing=0.1,
    vertical_spacing=0.075,
    marker_size=10,
    title_size=38,
    text_size=30,
    legend_size=38,
    y_range=[-120, 120],
    width=1000,
    height=600,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.05,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/tloads.png")

## Persistence, cooling requests

In [None]:
years = ["2022", "2023"]
months = [5, 6, 7, 8, 9]
periods = ["Frac Zones", " "]

for year in years:
    periods.append(year)
    for month in months:
        periods.append(f"{month}/{year}")
        if year == "2022" and month == 9:
            periods.append("  ")

groups = [
    f"Reduced zonal load {abs(SLICE1)}% or more",
    "Small change zonal load",
    "Small change zonal load (remained high)",
    f"Increased zonal load {abs(SLICE2)}% or more",
    "Typically in heating",
]

order_cols = [
    f"Reduced zonal load {abs(SLICE1)}% or more",
    "Small change zonal load (remained high)",
    "Small change zonal load",
    f"Increased zonal load {abs(SLICE2)}% or more",
    "Typically in heating",
]

persistence = {}

In [None]:
for project in PROJECTS_2022:
    CRs_2022 = get_2022_control_data(
        project,
        "zone-simple_cooling_requests",
        no_weekends=True,
        only_business_hours=True,
        resample_rule="1h",
        resample_statistic="Mean",
        remove_FCUs=False,
        clean_underyling_data=True,  # cleans tload
    )
    CRs_2022 = CRs_2022.groupby(CRs_2022.index.month).sum()

    CRs_2023 = cleaning.clean_df(
        load_zones("2023", project, "zone-simple_cooling_requests", clean_data=True),
        this_var="zone-dummy",
        only_business_hours=True,
        no_weekends=True,
        remove_FCUs=False,
        start_date=pd.Timestamp("2023-05-01"),
        end_date=pd.Timestamp("2023-10-01"),
        resample_rule="1h",
        resample_statistic="Mean",
    )
    CRs_2023 = CRs_2023.groupby(CRs_2023.index.month).sum()

    this_dzs = dominant_zones[project]
    CRs_2022, CRs_2023, this_dzs = base.trim_to_common_elements(
        [CRs_2022, CRs_2023, dominant_zones[project].T],
        clean_cols=True,
        clean_idx=False,
    )
    CRs = {"2022": CRs_2022, "2023": CRs_2023}

    this_dzs = this_dzs.T
    all_zones = list(this_dzs.index)

    persistence_df = pd.DataFrame(0, index=periods, columns=groups)
    for group_i in range(len(groups)):
        group = groups[group_i]
        zones = list(
            (dominant_zones[project][dominant_zones[project] == group_i]).dropna().index
        )
        persistence_df.loc["Frac Zones", group] = len(zones) / len(
            dominant_zones[project].dropna()
        )
        # edge case correction
        zones = list(set(zones).intersection(set(all_zones)))
        for year in ["2022", "2023"]:
            persistence_df.loc[year, group] = (
                CRs[year].loc[:, zones].sum().sum()
                / CRs[year].loc[:, all_zones].sum().sum()
            )
            for month in months:
                persistence_df.loc[f"{month}/{year}", group] = (
                    CRs[year].loc[month, zones].sum()
                    / CRs[year].loc[month, all_zones].sum()
                )
    persistence[project] = persistence_df[order_cols]
    print(project)

In [None]:
fig = viz.make_bar_plot(
    y_data=persistence,
    bar_legend={
        "color": {
            f"Reduced zonal load {abs(SLICE1)}% or more": "ForestGreen",
            "Small change zonal load": "RoyalBlue",
            "Small change zonal load (remained high)": "DarkOrange",
            f"Increased zonal load {abs(SLICE2)}% or more": "Firebrick",
            "Typically in heating": "Gray",
        },
    },
    y_axis_title="Frac Zones or<br>Frac Zone-Hours<br>>70% Zonal Load",
    y_range=[0, 1],
    bar_width=0.6,
    bar_mode="stack",
    num_cols=3,
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
    width=1000,
    height=650,
    title_size=38,
    text_size=34,
    legend_size=38,
    annotation_size=16,
    annotation_angle=90,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.075,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)
fig.update_xaxes(tickangle=45)

fig = add_line_to_subplots(
    fig, (1, 1), (0, 1.2), total_subplots=len(PROJECTS_2022), dash="solid", width=4
)
fig = add_line_to_subplots(
    fig, (8, 8), (0, 1.2), total_subplots=len(PROJECTS_2022), dash="solid", width=4
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/CRs_over_time.png")

## Total (for graphical abstract)

In [None]:
persistence_total = pd.DataFrame(
    0, index=["Fraction of Zones", "Fraction of Cooling Requests"], columns=groups
)
total_zones = 0
for project in PROJECTS_2022:
    CRs_2022 = get_2022_control_data(
        project,
        "zone-simple_cooling_requests",
        no_weekends=True,
        only_business_hours=True,
        resample_rule="1h",
        resample_statistic="Mean",
        remove_FCUs=False,
        clean_underyling_data=True,  # cleans tload
    )

    this_dzs = dominant_zones[project]
    CRs_2022, this_dzs = base.trim_to_common_elements(
        [CRs_2022, dominant_zones[project].T],
        clean_cols=True,
        clean_idx=False,
    )
    this_dzs = this_dzs.T
    all_zones = list(this_dzs.index)

    for group_i in range(len(groups)):
        group = groups[group_i]
        zones = list(
            (dominant_zones[project][dominant_zones[project] == group_i]).dropna().index
        )
        persistence_total.loc["Fraction of Zones", group] += len(zones)
        # edge case correction
        zones = list(set(zones).intersection(set(all_zones)))
        persistence_total.loc["Fraction of Cooling Requests", group] += (
            CRs_2022.loc[:, zones].sum().sum()
        )
    print(project)

In [None]:
persistence_total_adj = copy.deepcopy(persistence_total)
persistence_total_adj["Small change zonal load"] += persistence_total_adj[
    "Typically in heating"
]
persistence_total_adj[
    "Small change zonal load (remained high)"
] += persistence_total_adj[f"Increased zonal load {abs(SLICE2)}% or more"]
persistence_total_adj = persistence_total_adj[
    [
        "Small change zonal load",
        f"Reduced zonal load {abs(SLICE1)}% or more",
        "Small change zonal load (remained high)",
        # f"Increased zonal load {abs(SLICE2)}% or more"
    ]
]
persistence_total_adj.columns = [
    "Dominated Zones",
    "Dominant Zones",
    "Rogue Zones",
]  # "Other"

In [None]:
persistence_total_norm = persistence_total_adj.div(
    persistence_total_adj.sum(axis=1), axis=0
)

In [None]:
fig = viz.make_bar_plot(
    y_data=persistence_total_norm,
    bar_legend={
        "color": {
            "Dominated Zones": "RoyalBlue",
            "Dominant Zones": "ForestGreen",
            "Rogue Zones": "DarkOrange",
            # "Other": "LightGray"
        },
    },
    y_axis_title="Fraction Zones or<br>Fraction Cooling Requests",
    y_range=[0, 1],
    bar_width=0.6,
    bar_mode="stack",
    num_cols=3,
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
    width=1000,
    height=650,
    text_size=30,
    legend_size=26,
    # annotations=["Dominated Zones", "Dominant Zones", "Rogue Zones"],
    # annotation_size=18,
    # annotation_angle=0,
    # annotation_thresh=.05
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.125,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
fig

## Change in CRs

In [None]:
these_projects = list(set(PROJECTS_2022).intersection(set(PROJECTS_VAV)))
these_projects.remove("LAB-2")
these_projects.sort()

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    only_business_hours=True,
    no_weekends=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
)["temperature"]

In [None]:
CRs = pull_from_dataset(
    "2022", these_projects, "zone-simple_cooling_requests", clean_data=True
)
CRs = cleaning.clean_dfs(
    dfs=CRs,
    this_var="zone-dummy",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    resample_rule="1h",
    resample_statistic="Mean",
)

In [None]:
categories = {
    0: f"Reduced zonal load {abs(SLICE1)}% or more",
    1: "Small change zonal load",
    2: "Small change zonal load (remained high)",
    3: f"Increased zonal load {abs(SLICE2)}% or more",
    4: "Typically in heating",
}

In [None]:
CRs_grouped = {}
for project in these_projects:
    this_CRs_grouped = pd.DataFrame(
        index=CRs[project].index, columns=list(categories.values())
    )
    for i in categories:
        these_zones = list(
            dominant_zones[project][dominant_zones[project] == i].dropna().index
        )
        these_zones = list(
            set(these_zones).intersection(set(list(CRs[project].columns)))
        )
        this_CRs_grouped[categories[i]] = CRs[project][these_zones].sum(axis=1)
    CRs_grouped[project] = this_CRs_grouped

In [None]:
(
    deltas_76_CRs,
    deltas_low_76_CRs,
    deltas_high_76_CRs,
    deltas_78_CRs,
    deltas_low_78_CRs,
    deltas_high_78_CRs,
) = run_group_regressions(CRs_grouped, T, "Absolute Change", summary_statistic="Mean")

In [None]:
CRs_summary_df = pd.DataFrame(index=these_projects, columns=list(categories.values()))
for project in these_projects:
    for category in list(categories.values()):
        CRs_summary_df.loc[project, category] = deltas_78_CRs[project].loc[
            category, "78"
        ]

CRs_summary_df_low = pd.DataFrame(
    index=these_projects, columns=list(categories.values())
)
for project in these_projects:
    for category in list(categories.values()):
        CRs_summary_df_low.loc[project, category] = deltas_low_78_CRs[project].loc[
            category, "78"
        ]

CRs_summary_df_high = pd.DataFrame(
    index=these_projects, columns=list(categories.values())
)
for project in these_projects:
    for category in list(categories.values()):
        CRs_summary_df_high.loc[project, category] = deltas_high_78_CRs[project].loc[
            category, "78"
        ]

In [None]:
fig = viz.plot_experiment_summary(
    y_data=CRs_summary_df,
    y_error_up_data=CRs_summary_df_high,
    y_error_down_data=CRs_summary_df_low,
    marker_legend={
        "color": {
            f"Reduced zonal load {abs(SLICE1)}% or more": "ForestGreen",
            "Small change zonal load": "RoyalBlue",
            "Small change zonal load (remained high)": "DarkOrange",
            f"Increased zonal load {abs(SLICE2)}% or more": "Firebrick",
            "Typically in heating": "Gray",
        },
    },
    y_axis_title="Absolute Change in<br>Estimated CRs/hour",
    point_start=(1 / 6),
    offset_delta=(1 / 6),
    tick_vals=[i + 0.5 for i in range(len(these_projects))],
    width=1250,
    height=650,
    text_size=22,
    x_range=[-0.25, len(these_projects)],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
    y_range=[-14, 4],
)

fig = add_vertical_boxes(
    fig, list(range(len(these_projects) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5, y=-0.1, xanchor="center", yanchor="top", orientation="h", font_size=20
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/zonal_delta_CRs.png")

## Change in airflow

In [None]:
these_projects = list(set(PROJECTS_2022).intersection(set(PROJECTS_VAV)))
these_projects.remove("LAB-2")
these_projects.sort()

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    only_business_hours=True,
    no_weekends=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
)["temperature"]

In [None]:
airflow = pull_from_dataset("2022", these_projects, "zone-airflowsp")
airflow = cleaning.clean_dfs(
    dfs=airflow,
    this_var="zone-airflowsp",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
    resample_rule="1h",
)

In [None]:
categories = {
    0: f"Reduced zonal load {abs(SLICE1)}% or more",
    1: "Small change zonal load",
    2: "Small change zonal load (remained high)",
    3: f"Increased zonal load {abs(SLICE2)}% or more",
    4: "Typically in heating",
}

In [None]:
airflow_grouped = {}
for project in these_projects:
    this_airflow_grouped = pd.DataFrame(
        index=airflow[project].index, columns=list(categories.values())
    )
    for i in categories:
        these_zones = list(
            dominant_zones[project][dominant_zones[project] == i].dropna().index
        )
        these_zones = list(
            set(these_zones).intersection(set(list(airflow[project].columns)))
        )
        this_airflow_grouped[categories[i]] = airflow[project][these_zones].sum(axis=1)
    airflow_grouped[project] = this_airflow_grouped

In [None]:
(
    deltas_76_airflow_2022,
    deltas_low_76_airflow_2022,
    deltas_high_76_airflow_2022,
    deltas_78_airflow,
    deltas_low_78_airflow,
    deltas_high_78_airflow,
) = run_group_regressions(airflow_grouped, T, "Absolute Change")

In [None]:
airflow_summary_df = pd.DataFrame(index=these_projects, columns=categories.values())
for project in these_projects:
    for category in categories.values():
        airflow_summary_df.loc[project, category] = deltas_78_airflow[project].loc[
            category, "78"
        ]

airflow_summary_df_low = pd.DataFrame(index=these_projects, columns=categories.values())
for project in these_projects:
    for category in categories.values():
        airflow_summary_df_low.loc[project, category] = deltas_low_78_airflow[
            project
        ].loc[category, "78"]

airflow_summary_df_high = pd.DataFrame(
    index=these_projects, columns=categories.values()
)
for project in these_projects:
    for category in categories.values():
        airflow_summary_df_high.loc[project, category] = deltas_high_78_airflow[
            project
        ].loc[category, "78"]

In [None]:
fig = viz.plot_experiment_summary(
    y_data=airflow_summary_df,
    y_error_up_data=airflow_summary_df_high,
    y_error_down_data=airflow_summary_df_low,
    marker_legend={
        "color": {
            f"Reduced zonal load {abs(SLICE1)}% or more": "ForestGreen",
            "Small change zonal load": "RoyalBlue",
            "Small change zonal load (remained high)": "DarkOrange",
            f"Increased zonal load {abs(SLICE2)}% or more": "Firebrick",
            "Typically in heating": "Gray",
        },
    },
    y_axis_title="Absolute Change in Airflow (m3/h)",
    point_start=(1 / 6),
    offset_delta=(1 / 6),
    tick_vals=[i + 0.5 for i in range(len(these_projects))],
    width=1200,
    height=680,
    text_size=22,
    x_range=[-0.25, len(these_projects)],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
    y_range=[-25000, 6000],
)

fig = add_vertical_boxes(
    fig, list(range(len(these_projects) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5, y=-0.1, xanchor="center", yanchor="top", orientation="h", font_size=20
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/zonal_delta_airflow.png")

## Normalized airflow 

In [None]:
categories = {
    0: f"Reduced zonal load {abs(SLICE1)}% or more",
    1: "Small change zonal load",
    2: "Small change zonal load (remained high)",
    3: f"Increased zonal load {abs(SLICE2)}% or more",
    4: "Typically in heating",
}

In [None]:
these_projects = list(set(PROJECTS_2022).intersection(set(PROJECTS_VAV)))
these_projects.remove("LAB-2")
these_projects.sort()

In [None]:
airflow = pull_from_dataset("2022", these_projects, "zone-airflowsp")
airflow = cleaning.clean_dfs(
    dfs=airflow,
    this_var="zone-airflowsp",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
    resample_rule="1h",
    hourly_filter=cleaning.get_experiment_hourly_filter(
        projects=these_projects,
        experiment_year="2022",
        filter_columns=["CSP = 74F"],
        no_weekends=True,
    ),
)

In [None]:
airflow_grouped = {}
for project in these_projects:
    this_airflow_grouped = pd.DataFrame(
        index=airflow[project].index, columns=list(categories.values())
    )
    for i in categories:
        these_zones = list(
            dominant_zones[project][dominant_zones[project] == i].dropna().index
        )
        these_zones = list(
            set(these_zones).intersection(set(list(airflow[project].columns)))
        )
        this_airflow_grouped[categories[i]] = airflow[project][these_zones].sum(axis=1)
    airflow_grouped[project] = this_airflow_grouped

for project in these_projects:
    airflow_grouped[project] = airflow_grouped[project].div(
        airflow_grouped[project].sum(axis=1), axis=0
    )

In [None]:
airflow_summary_df = pd.DataFrame(index=these_projects, columns=categories.values())
for project in these_projects:
    for category in categories.values():
        airflow_summary_df.loc[project, category] = airflow_grouped[project][
            category
        ].mean()

airflow_summary_df_low = pd.DataFrame(index=these_projects, columns=categories.values())
for project in these_projects:
    for category in categories.values():
        airflow_summary_df_low.loc[project, category] = airflow_grouped[project][
            category
        ].std()

airflow_summary_df_high = pd.DataFrame(
    index=these_projects, columns=categories.values()
)
for project in these_projects:
    for category in categories.values():
        airflow_summary_df_high.loc[project, category] = airflow_grouped[project][
            category
        ].std()

In [None]:
fig = viz.plot_experiment_summary(
    y_data=airflow_summary_df,
    y_error_up_data=airflow_summary_df_high,
    y_error_down_data=airflow_summary_df_low,
    marker_legend={
        "color": {
            f"Reduced zonal load {abs(SLICE1)}% or more": "ForestGreen",
            "Small change zonal load": "RoyalBlue",
            "Small change zonal load (remained high)": "DarkOrange",
            f"Increased zonal load {abs(SLICE2)}% or more": "Firebrick",
            "Typically in heating": "Gray",
        },
    },
    y_axis_title="Fraction of Building-Wide Airflow<br>Control Days",
    point_start=(1 / 6),
    offset_delta=(1 / 6),
    tick_vals=[i + 0.5 for i in range(len(these_projects))],
    width=1200,
    height=680,
    text_size=22,
    x_range=[-0.25, len(these_projects)],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
    y_range=[-0.1, 0.8],
)

fig = add_vertical_boxes(
    fig, list(range(len(these_projects) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5, y=-0.1, xanchor="center", yanchor="top", orientation="h", font_size=20
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/airflow_norm.png")

## CSP = 76F results

### Change in cooling

In [None]:
T_2022 = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=False,
    SI_units=True,
)["temperature"]

In [None]:
T_2021 = cleaning.clean_df(
    df=load_weather("2021")["temperature"].to_frame(),
    this_var="weather-oat",
    start_date=pd.Timestamp("05-01-2021"),
    end_date=pd.Timestamp("10-01-2021"),
    only_business_hours=True,
    no_weekends=False,
    SI_units=True,
)["temperature"]

In [None]:
cooling_df_2022 = cleaning.clean_by_column(
    df=load_building("2022", "C") * base.MW_PER_TON * 1000,  # kWh
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
)
cooling_df_2022 = cleaning.clean_df(
    df=cooling_df_2022,
    this_var="building-cooling",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=False,
)
cooling_2022 = {}
for project in PROJECTS_2022:
    cooling_2022[project] = cooling_df_2022[project].to_frame()
    for day in ["06-23-2022"]:
        cooling_2022[project].loc[day, :] = np.nan

In [None]:
cooling_df_2021 = cleaning.clean_by_column(
    df=load_building("2021", "C")[PROJECTS_2022] * base.MW_PER_TON * 1000,  # kWh
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
)
cooling_df_2021 = cleaning.clean_df(
    df=cooling_df_2021,
    this_var="building-cooling",
    start_date=pd.Timestamp("05-01-2021"),
    end_date=pd.Timestamp("10-01-2021"),
    only_business_hours=True,
    no_weekends=False,
)
cooling_2021 = {}
for project in PROJECTS_2021:
    cooling_2021[project] = cooling_df_2021[project].to_frame()

In [None]:
(
    deltas_cooling,
    deltas_high_cooling,
    deltas_low_cooling,
    percent_summary,
    fig,
) = run_building_regressions(
    cooling_2022,
    T_2022,
    mode="Percent Change",
    summary_statistic="Mean",
    y_axis_title="Average Cooling Demand<br>(kW/1000m2)",
    use_raw=False,
)

In [None]:
(
    deltas_cooling_2021,
    deltas_high_cooling_2021,
    deltas_low_cooling_2021,
    percent_summary,
    fig,
) = run_building_regressions(
    cooling_2021,
    T_2021,
    year="2021",
    mode="Percent Change",
    summary_statistic="Mean",
    y_axis_title="Average Cooling Demand<br>(kW/1000m2)",
    use_raw=False,
)

In [None]:
deltas_list = [deltas_cooling, deltas_high_cooling, deltas_low_cooling]
for i, df in enumerate(deltas_list):
    df.columns = ["CSP = 24.4C (2022)", "CSP = 25.5C (2022)"]
    df["CSP = 24.4C (2021)"] = np.nan
    df = df[["CSP = 25.5C (2022)", "CSP = 24.4C (2022)", "CSP = 24.4C (2021)"]]
    deltas_list[i] = df
deltas_cooling, deltas_high_cooling, deltas_low_cooling = deltas_list

In [None]:
# previously reported results
beta = {
    "OFF-1": -0.24,  # previously CONF-1
    "OFF-3": -0.33,  # previously OFF-2
    "OFF-4": -0.14,  # previously LIB-3
    "OFF-6": -0.23,  # previously OFF-4
    "LAB-1": -0.044,  # previously LAB-5
    "LAB-3": -0.034,  # previously LAB-6
}

err = {
    "OFF-1": 0.048,  # previously CONF-1
    "OFF-3": 0.028,  # previously OFF-2
    "OFF-4": 0.031,  # previously LIB-3
    "OFF-6": 0.022,  # previously OFF-4
    "LAB-1": 0.017,  # previously LAB-5
    "LAB-3": 0.0092,  # previously LAB-6
}

for project in beta:
    deltas_cooling.loc[project, "CSP = 24.4C (2021)"] = 100 * (
        np.exp(beta[project]) - 1
    )
    deltas_high_cooling.loc[project, "CSP = 24.4C (2021)"] = (
        100 * (np.exp(beta[project] - 1.96 * err[project]) - 1)
        - deltas_cooling.loc[project, "CSP = 24.4C (2021)"]
    )
    deltas_low_cooling.loc[project, "CSP = 24.4C (2021)"] = deltas_cooling.loc[
        project, "CSP = 24.4C (2021)"
    ] - 100 * (np.exp(beta[project] + 1.96 * err[project]) - 1)

In [None]:
fig = viz.plot_experiment_summary(
    y_data=deltas_cooling,
    y_error_up_data=deltas_high_cooling,
    y_error_down_data=deltas_low_cooling,
    marker_legend={
        "color": {
            "CSP = 24.4C (2021)": "Blue",
            "CSP = 24.4C (2022)": "Blue",
            "CSP = 25.5C (2022)": "Black",
        },
        "opacity": {
            "CSP = 24.4C (2021)": 0.5,
            "CSP = 24.4C (2022)": 1,
            "CSP = 25.5C (2022)": 1,
        },
    },
    y_axis_title="Percent Change in<br>Cooling Demand (%)",
    point_start=(1 / 4),
    offset_delta=(1 / 4),
    tick_vals=[i + 0.5 for i in range(len(PROJECTS_2022))],
    width=1200,
    height=600,
    text_size=22,
    x_range=[-0.25, len(PROJECTS_2022)],
    y_range=[-60, 30],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
)
fig = add_vertical_boxes(
    fig, list(range(len(PROJECTS_2022) + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/delta_cooling_sps.png")

### Fraction of responding zones

In [None]:
SLICE1 = 10  # delta tload 10% or more
SLICE2 = -10  # delta tload -10% or less

HIGH_THRESH = 30  # control tload 30% or more
LOW_THRESH = -10  # control tload -10% or less

In [None]:
dominant_zones_all = pd.DataFrame(
    index=PROJECTS_2022,
    columns=["CSP = 25.5C (2022)", "CSP = 24.4C (2022)", "CSP = 24.4C (2021)"],
)
dominant_zones_all.loc["TOTAL", :] = 0

#### CSP = 78F, 2022

In [None]:
T = cleaning.clean_df(
    df=load_weather("2022")["temperature"].to_frame(),
    this_var="weather-oat",
    only_business_hours=True,
    no_weekends=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    SI_units=True,
)["temperature"]

tloads = pull_from_dataset("2022", PROJECTS_2022, "zone-tloads")
tloads = cleaning.clean_dfs(
    dfs=tloads,
    this_var="zone-tloads",
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    remove_FCUs=False,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    resample_rule="1h",
)

In [None]:
(
    deltas_76_tloads_2022,
    deltas_low_76_tloads_2022,
    deltas_high_76_tloads_2022,
    deltas_78_tloads,
    deltas_low_78_tloads,
    deltas_high_78_tloads,
) = run_equip_regressions(
    tloads,
    T,
    "Absolute Change",
)

In [None]:
dominant_zones = clustering.run_1D_clustering_on_dict(
    deltas_78_tloads, slices=[SLICE1, SLICE2], mapping={2: 0, 1: 1, 0: 3}
)

In [None]:
tloads = pull_from_dataset("2022", PROJECTS_2022, "zone-tloads")

tloads = cleaning.clean_dfs(
    dfs=tloads,
    this_var="zone-tloads",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=True,
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    resample_rule="1h",
)

control_tloads = {}
for project in PROJECTS_2022:
    df = tloads[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    control_tloads[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

control_tloads = base.run_passive_test_on_dfs(
    dfs=control_tloads,
    this_test="Mean",
    col_name="Average Zonal Load (%)<br>Control Days",
)

In [None]:
for project in PROJECTS_2022:
    # small change in tload, but remained high
    these_dzs = dominant_zones[project].iloc[:, 0]
    these_dzs = list(these_dzs[these_dzs == 1].index)
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control >= HIGH_THRESH].index)
    correct = list(set(these_dzs).intersection(set(these_control)))
    dominant_zones[project].loc[correct, :] = 2

for project in PROJECTS_2022:
    # in heating
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control <= LOW_THRESH].index)
    dominant_zones[project].loc[these_control, :] = 4  # np.nan

for project in PROJECTS_2022:
    dominant_zones[project] = dominant_zones[project].dropna()

In [None]:
total = 0
for project in PROJECTS_2022:
    ser = dominant_zones[project]
    dominant_zones_all.loc[project, "CSP = 25.5C (2022)"] = len(
        ser[ser == 0].dropna()
    ) / len(ser)
    dominant_zones_all.loc["TOTAL", "CSP = 25.5C (2022)"] += len(ser[ser == 0].dropna())
    total += len(ser)
dominant_zones_all.loc["TOTAL", "CSP = 25.5C (2022)"] /= total

#### CSP = 76F, 2022

In [None]:
dominant_zones = clustering.run_1D_clustering_on_dict(
    deltas_76_tloads_2022, slices=[SLICE1, SLICE2], mapping={2: 0, 1: 1, 0: 3}
)

In [None]:
for project in PROJECTS_2022:
    # small change in tload, but remained high
    these_dzs = dominant_zones[project].iloc[:, 0]
    these_dzs = list(these_dzs[these_dzs == 1].index)
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control >= HIGH_THRESH].index)
    correct = list(set(these_dzs).intersection(set(these_control)))
    dominant_zones[project].loc[correct, :] = 2

for project in PROJECTS_2022:
    # in heating
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control <= LOW_THRESH].index)
    dominant_zones[project].loc[these_control, :] = 4  # np.nan

for project in PROJECTS_2022:
    dominant_zones[project] = dominant_zones[project].dropna()

In [None]:
total = 0
for project in PROJECTS_2022:
    ser = dominant_zones[project]
    dominant_zones_all.loc[project, "CSP = 24.4C (2022)"] = len(
        ser[ser == 0].dropna()
    ) / len(ser)
    dominant_zones_all.loc["TOTAL", "CSP = 24.4C (2022)"] += len(ser[ser == 0].dropna())
    total += len(ser)
dominant_zones_all.loc["TOTAL", "CSP = 24.4C (2022)"] /= total

#### CSP = 76F, 2021

In [None]:
T = cleaning.clean_df(
    df=load_weather("2021")["temperature"].to_frame(),
    this_var="weather-oat",
    only_business_hours=True,
    no_weekends=False,
    start_date=pd.Timestamp("05-01-2021"),
    end_date=pd.Timestamp("10-01-2021"),
    SI_units=True,
)["temperature"]

tloads = pull_from_dataset("2021", PROJECTS_2021, "zone-tloads")
tloads = cleaning.clean_dfs(
    dfs=tloads,
    this_var="zone-tloads",
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    remove_FCUs=False,
    start_date=pd.Timestamp("05-01-2021"),
    end_date=pd.Timestamp("10-01-2021"),
    resample_rule="1h",
)

In [None]:
(
    deltas_76_tloads_2021,
    deltas_low_76_tloads_2021,
    deltas_high_76_tloads_2021,
) = run_equip_regressions(tloads, T, "Absolute Change", year="2021")

In [None]:
dominant_zones = clustering.run_1D_clustering_on_dict(
    deltas_76_tloads_2021, slices=[SLICE1, SLICE2], mapping={2: 0, 1: 1, 0: 3}
)

In [None]:
tloads = pull_from_dataset("2021", PROJECTS_2021, "zone-tloads")

tloads = cleaning.clean_dfs(
    dfs=tloads,
    this_var="zone-tloads",
    remove_FCUs=False,
    only_business_hours=True,
    no_weekends=True,
    start_date=SUMMER_START_2021,
    end_date=SUMMER_END_2021,
    resample_rule="1h",
)

control_tloads = {}
for project in PROJECTS_2021:
    df = tloads[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    control_tloads[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

control_tloads = base.run_passive_test_on_dfs(
    dfs=control_tloads,
    this_test="Mean",
    col_name="Average Zonal Load (%)<br>Control Days",
)

In [None]:
for project in PROJECTS_2021:
    # small change in tload, but remained high
    these_dzs = dominant_zones[project].iloc[:, 0]
    these_dzs = list(these_dzs[these_dzs == 1].index)
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control >= HIGH_THRESH].index)
    correct = list(set(these_dzs).intersection(set(these_control)))
    dominant_zones[project].loc[correct, :] = 2

for project in PROJECTS_2021:
    # in heating
    these_control = control_tloads[project].iloc[:, 0]
    these_control = list(these_control[these_control <= LOW_THRESH].index)
    dominant_zones[project].loc[these_control, :] = 4  # np.nan

for project in PROJECTS_2021:
    dominant_zones[project] = dominant_zones[project].dropna()

In [None]:
total = 0
for project in PROJECTS_2021:
    ser = dominant_zones[project]
    dominant_zones_all.loc[project, "CSP = 24.4C (2021)"] = len(
        ser[ser == 0].dropna()
    ) / len(ser)
    dominant_zones_all.loc["TOTAL", "CSP = 24.4C (2021)"] += len(ser[ser == 0].dropna())
    total += len(ser)
dominant_zones_all.loc["TOTAL", "CSP = 24.4C (2021)"] /= total

In [None]:
fig = viz.plot_experiment_summary(
    y_data=dominant_zones_all,
    marker_legend={
        "color": {
            "CSP = 24.4C (2021)": "ForestGreen",
            "CSP = 24.4C (2022)": "ForestGreen",
            "CSP = 25.5C (2022)": "ForestGreen",
        },
        "opacity": {
            "CSP = 24.4C (2021)": 0.5,
            "CSP = 24.4C (2022)": 1,
            "CSP = 25.5C (2022)": 1,
        },
        "shape": {
            "CSP = 24.4C (2021)": "x",
            "CSP = 24.4C (2022)": "x",
            "CSP = 25.5C (2022)": "circle",
        },
    },
    y_axis_title=f"Fraction of Zones<br>Reducing Zonal Load {SLICE1}% or More",
    point_start=(1 / 4),
    offset_delta=(1 / 4),
    tick_vals=[i + 0.5 for i in range(len(PROJECTS_2022) + 1)],
    width=1200,
    height=600,
    text_size=22,
    x_range=[-0.25, len(PROJECTS_2022) + 1],
    y_range=[0, 1],
    marker_size=10,
    error_thickness=2.5,
    whisker_len=8,
)
fig = add_vertical_boxes(
    fig, list(range(len(PROJECTS_2022) + 1 + 1)), background_color="lightgray"
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.1,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/dzs_over_time.png")

## Evaluating setpoints

### Building level, command

In [None]:
these_projects = copy.deepcopy(PROJECTS_2022)
these_projects.remove("LAB-3")

In [None]:
all_results = {}
for project in these_projects:
    results = pd.DataFrame(
        index=["Schedule 74F", "Schedule 76F", "Schedule 78F"],
        columns=[
            "Correct CSP Command (Included)",
            "Incorrect CSP Command (Included)",
            "Correct CSP Command (Excluded)",
            "Incorrect CSP Command (Excluded)",
        ],
    )
    if project == "OFF-6":
        command = load_zones("2022", "OFF-6", "zone-zonesp") + 2
    else:
        command = load_zones("2022", project, "zone-deadband_top")

    command = cleaning.clean_df(
        df=command,
        this_var="zone-deadband_top",
        start_date=SUMMER_START_2022,
        end_date=SUMMER_END_2022,
        resample_rule="1h",
    )

    command = command.round(0)

    for sp in [74, 76, 78]:
        this_command = cleaning.clean_df(
            df=command,
            hourly_filter=cleaning.get_experiment_hourly_filter(
                [project], "2022", [f"CSP = {sp}F"], no_weekends=NO_WEEKENDS[project]
            )[project],
            only_business_hours=True,
            no_weekends=NO_WEEKENDS[project],
            SI_units=False,
        )

        included_zones = list(this_command.columns)
        excluded_zones = list(
            (EXCLUDED_ZONES[project][EXCLUDED_ZONES[project] == 1]).dropna().index
        )
        excluded_zones = list(set(included_zones).intersection(set(excluded_zones)))
        included_zones = list(set(included_zones) - set(excluded_zones))
        this_command_included = this_command[included_zones]
        this_command_excluded = this_command[excluded_zones]

        # included
        correct = (this_command_included == sp).sum().sum() / (
            len(this_command.index) * len(this_command.columns)
        )
        incorrect = (this_command_included != sp).sum().sum() / (
            len(this_command.index) * len(this_command.columns)
        )
        results.loc[f"Schedule {sp}F", "Correct CSP Command (Included)"] = correct
        results.loc[f"Schedule {sp}F", "Incorrect CSP Command (Included)"] = incorrect

        # excluded
        correct = (this_command_excluded == sp).sum().sum() / (
            len(this_command.index) * len(this_command.columns)
        )
        incorrect = (this_command_excluded != sp).sum().sum() / (
            len(this_command.index) * len(this_command.columns)
        )
        results.loc[f"Schedule {sp}F", "Correct CSP Command (Excluded)"] = correct
        results.loc[f"Schedule {sp}F", "Incorrect CSP Command (Excluded)"] = incorrect
    all_results[project] = results
    print(project)

In [None]:
fig = viz.make_bar_plot(
    y_data=all_results,
    bar_legend={
        "name": {
            "Correct CSP Command (Included)": "Correct CSP Command",
            "Incorrect CSP Command (Included)": "Incorrect CSP Command",
            "Correct CSP Command (Excluded)": "Correct CSP Command (Excluded)",
            "Incorrect CSP Command (Excluded)": "Incorrect CSP Command (Excluded)",
        },
        "color": {
            "Correct CSP Command (Included)": "Green",
            "Incorrect CSP Command (Included)": "Red",
            "Correct CSP Command (Excluded)": "Green",
            "Incorrect CSP Command (Excluded)": "Red",
        },
        "opacity": {
            "Correct CSP Command (Included)": 1,
            "Incorrect CSP Command (Included)": 1,
            "Correct CSP Command (Excluded)": 0.7,
            "Incorrect CSP Command (Excluded)": 0.7,
        },
        "pattern": {
            "Correct CSP Command (Included)": "",
            "Incorrect CSP Command (Included)": "",
            "Correct CSP Command (Excluded)": "/",
            "Incorrect CSP Command (Excluded)": "/",
        },
    },
    dont_add_to_legend=[
        "Correct CSP Command (Excluded)",
        "Incorrect CSP Command (Excluded)",
    ],
    pattern_legend={"Excluded": ("/", 0.7)},
    y_axis_title="Fraction of Zone Hours",
    vertical_spacing=0.1,
    title_size=32,
    text_size=24,
    legend_size=32,
    y_range=[0, 1],
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.05,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/building_CSP_command.png")

### Building level, command

In [None]:
these_projects = copy.deepcopy(PROJECTS_2022)
these_projects.remove("LAB-3")

In [None]:
command = pull_from_dataset("2022", these_projects, "zone-deadband_top")

In [None]:
command_74 = cleaning.clean_dfs(
    dfs=command,
    this_var="zone-deadband_top",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
    hourly_filter=cleaning.get_experiment_hourly_filter(
        projects=PROJECTS_2022,
        experiment_year="2022",
        filter_columns=["CSP = 74F"],
        no_weekends=NO_WEEKENDS,
        use_raw=False,
    ),
)

command_76 = cleaning.clean_dfs(
    dfs=command,
    this_var="zone-deadband_top",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
    hourly_filter=cleaning.get_experiment_hourly_filter(
        projects=PROJECTS_2022,
        experiment_year="2022",
        filter_columns=["CSP = 76F"],
        no_weekends=NO_WEEKENDS,
        use_raw=False,
    ),
)
command_78 = cleaning.clean_dfs(
    dfs=command,
    this_var="zone-deadband_top",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
    hourly_filter=cleaning.get_experiment_hourly_filter(
        projects=PROJECTS_2022,
        experiment_year="2022",
        filter_columns=["CSP = 78F"],
        no_weekends=NO_WEEKENDS,
        use_raw=False,
    ),
)

In [None]:
command_74_mean = base.run_passive_test_on_dfs(
    dfs=command_74, this_test="Mean", col_name="CSP = 74F"
)
command_76_mean = base.run_passive_test_on_dfs(
    dfs=command_76, this_test="Mean", col_name="CSP = 76F"
)
command_78_mean = base.run_passive_test_on_dfs(
    dfs=command_78, this_test="Mean", col_name="CSP = 78F"
)

command_74_std = base.run_passive_test_on_dfs(
    dfs=command_74, this_test="Std", col_name="CSP = 74F"
)
command_76_std = base.run_passive_test_on_dfs(
    dfs=command_76, this_test="Std", col_name="CSP = 76F"
)
command_78_std = base.run_passive_test_on_dfs(
    dfs=command_78, this_test="Std", col_name="CSP = 78F"
)

In [None]:
command_mean = base.combine_dicts([command_74_mean, command_76_mean, command_78_mean])
command_std = base.combine_dicts([command_74_std, command_76_std, command_78_std])

In [None]:
for project in these_projects:
    these_zones = list(command_mean[project].index)
    these_ezs = list(
        (EXCLUDED_ZONES[project][EXCLUDED_ZONES[project] == 1]).dropna().index
    )
    these_zones = list(set(these_zones) - set(these_ezs))
    command_mean[project] = command_mean[project].loc[these_zones, :]
    command_std[project] = command_std[project].loc[these_zones, :]

In [None]:
opacity_data = {}
for project in these_projects:
    opacity_data[project] = pd.Series(
        0.8, index=list(command_mean[project].index)
    ).to_frame()

In [None]:
fig = viz.make_dot_plot(
    y_data=command_mean,
    y_error_up_data=command_std,
    y_error_down_data=command_std,
    normalize_x=True,
    sort_by="CSP = 74F",
    y_axis_title="CSP Command (F)",
    x_axis_title="Fraction of Included Zones",
    horizontal_spacing=0.1,
    vertical_spacing=0.125,
    color_legend={
        "color": {
            "CSP = 74F": "RoyalBlue",
            "CSP = 76F": "DarkOrange",
            "CSP = 78F": "Firebrick",
        }
    },
    opacity_data=opacity_data,
    y_range=[70, 85],
    marker_size=10,
    title_size=38,
    text_size=30,
    legend_size=38,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.075,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/building_CSP_command_detailed.png")

### Building level, effective

In [None]:
effective = pull_from_dataset("2022", PROJECTS_2022, "zone-coolsp")

In [None]:
effective_74 = cleaning.clean_dfs(
    dfs=effective,
    this_var="zone-coolsp",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
    hourly_filter=cleaning.get_experiment_hourly_filter(
        projects=PROJECTS_2022,
        experiment_year="2022",
        filter_columns=["CSP = 74F"],
        no_weekends=NO_WEEKENDS,
        use_raw=False,
    ),
)

effective_76 = cleaning.clean_dfs(
    dfs=effective,
    this_var="zone-coolsp",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
    hourly_filter=cleaning.get_experiment_hourly_filter(
        projects=PROJECTS_2022,
        experiment_year="2022",
        filter_columns=["CSP = 76F"],
        no_weekends=NO_WEEKENDS,
        use_raw=False,
    ),
)
effective_78 = cleaning.clean_dfs(
    dfs=effective,
    this_var="zone-coolsp",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
    hourly_filter=cleaning.get_experiment_hourly_filter(
        projects=PROJECTS_2022,
        experiment_year="2022",
        filter_columns=["CSP = 78F"],
        no_weekends=NO_WEEKENDS,
        use_raw=False,
    ),
)

In [None]:
effective_74_mean = base.run_passive_test_on_dfs(
    dfs=effective_74, this_test="Mean", col_name="CSP = 74F"
)
effective_76_mean = base.run_passive_test_on_dfs(
    dfs=effective_76, this_test="Mean", col_name="CSP = 76F"
)
effective_78_mean = base.run_passive_test_on_dfs(
    dfs=effective_78, this_test="Mean", col_name="CSP = 78F"
)

effective_74_std = base.run_passive_test_on_dfs(
    dfs=effective_74, this_test="Std", col_name="CSP = 74F"
)
effective_76_std = base.run_passive_test_on_dfs(
    dfs=effective_76, this_test="Std", col_name="CSP = 76F"
)
effective_78_std = base.run_passive_test_on_dfs(
    dfs=effective_78, this_test="Std", col_name="CSP = 78F"
)

In [None]:
effective_mean = base.combine_dicts(
    [effective_74_mean, effective_76_mean, effective_78_mean]
)
effective_std = base.combine_dicts(
    [effective_74_std, effective_76_std, effective_78_std]
)

In [None]:
for project in PROJECTS_2022:
    these_zones = list(effective_mean[project].index)
    these_ezs = list(
        (EXCLUDED_ZONES[project][EXCLUDED_ZONES[project] == 1]).dropna().index
    )
    these_zones = list(set(these_zones) - set(these_ezs))
    effective_mean[project] = effective_mean[project].loc[these_zones, :]
    effective_std[project] = effective_std[project].loc[these_zones, :]

In [None]:
opacity_data = {}
for project in PROJECTS_2022:
    opacity_data[project] = pd.Series(
        0.8, index=list(effective_mean[project].index)
    ).to_frame()

In [None]:
fig = viz.make_dot_plot(
    y_data=effective_mean,
    y_error_up_data=effective_std,
    y_error_down_data=effective_std,
    normalize_x=True,
    sort_by="CSP = 74F",
    y_axis_title="Effective CSP (F)",
    x_axis_title="Fraction of Included Zones",
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
    color_legend={
        "color": {
            "CSP = 74F": "RoyalBlue",
            "CSP = 76F": "DarkOrange",
            "CSP = 78F": "Firebrick",
        }
    },
    opacity_data=opacity_data,
    y_range=[70, 85],
    marker_size=10,
    title_size=38,
    text_size=30,
    legend_size=38,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.05,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/building_CSP_effective.png")

### Zone level, effective

In [None]:
effective = pull_from_dataset("2022", PROJECTS_2022, "zone-coolsp")

In [None]:
effective = cleaning.clean_dfs(
    dfs=effective,
    this_var="zone-coolsp",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
)

In [None]:
effective_74 = {}
for project in PROJECTS_2022:
    df = effective[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    effective_74[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
effective_76 = {}
for project in PROJECTS_2022:
    df = effective[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 76F"],
        no_weekends=False,
    )
    effective_76[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
effective_78 = {}
for project in PROJECTS_2022:
    df = effective[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 78F"],
        no_weekends=False,
    )
    effective_78[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
effective_74_mean = base.run_passive_test_on_dfs(
    dfs=effective_74, this_test="Mean", col_name="CSP = 74F"
)
effective_76_mean = base.run_passive_test_on_dfs(
    dfs=effective_76, this_test="Mean", col_name="CSP = 76F"
)
effective_78_mean = base.run_passive_test_on_dfs(
    dfs=effective_78, this_test="Mean", col_name="CSP = 78F"
)

effective_74_std = base.run_passive_test_on_dfs(
    dfs=effective_74, this_test="Std", col_name="CSP = 74F"
)
effective_76_std = base.run_passive_test_on_dfs(
    dfs=effective_76, this_test="Std", col_name="CSP = 76F"
)
effective_78_std = base.run_passive_test_on_dfs(
    dfs=effective_78, this_test="Std", col_name="CSP = 78F"
)

In [None]:
effective_mean = base.combine_dicts(
    [effective_74_mean, effective_76_mean, effective_78_mean]
)
effective_std = base.combine_dicts(
    [effective_74_std, effective_76_std, effective_78_std]
)

In [None]:
for project in PROJECTS_2022:
    these_zones = list(effective_mean[project].index)
    these_ezs = list(
        (EXCLUDED_ZONES[project][EXCLUDED_ZONES[project] == 1]).dropna().index
    )
    these_zones = list(set(these_zones) - set(these_ezs))
    effective_mean[project] = effective_mean[project].loc[these_zones, :]
    effective_std[project] = effective_std[project].loc[these_zones, :]

In [None]:
opacity_data = {}
for project in these_projects:
    opacity_data[project] = pd.Series(
        0.8, index=list(effective_mean[project].index)
    ).to_frame()

In [None]:
fig = viz.make_dot_plot(
    y_data=effective_mean,
    y_error_up_data=effective_std,
    y_error_down_data=effective_std,
    normalize_x=True,
    sort_by="CSP = 74F",
    y_axis_title="Effective CSP (F)",
    x_axis_title="Fraction of Included Zones",
    horizontal_spacing=0.1,
    vertical_spacing=0.1,
    color_legend={
        "color": {
            "CSP = 74F": "RoyalBlue",
            "CSP = 76F": "DarkOrange",
            "CSP = 78F": "Firebrick",
        }
    },
    opacity_data=opacity_data,
    y_range=[70, 85],
    marker_size=10,
    title_size=38,
    text_size=30,
    legend_size=38,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.05,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/zone_CSP_effective.png")

### Zone level, command

In [None]:
these_projects = copy.deepcopy(PROJECTS_2022)
these_projects.remove("LAB-3")

In [None]:
command = pull_from_dataset("2022", these_projects, "zone-deadband_top")

In [None]:
command = cleaning.clean_dfs(
    dfs=command,
    this_var="zone-deadband_top",
    start_date=SUMMER_START_2022,
    end_date=SUMMER_END_2022,
    only_business_hours=True,
    no_weekends=NO_WEEKENDS,
    SI_units=False,
    resample_rule="1h",
)

In [None]:
command_74 = {}
for project in these_projects:
    df = command[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 74F"],
        no_weekends=False,
    )
    command_74[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
command_76 = {}
for project in these_projects:
    df = command[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 76F"],
        no_weekends=False,
    )
    command_76[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
command_78 = {}
for project in these_projects:
    df = command[project]
    zones = list(df.columns)
    hourly_filters = cleaning.get_zonal_experiment_hourly_filter(
        project,
        zones,
        ["CSP = 78F"],
        no_weekends=False,
    )
    command_78[project] = cleaning.clean_by_column(
        df,
        hourly_filter=hourly_filters,
        no_weekends=False,
    )
    print(project)

In [None]:
command_74_mean = base.run_passive_test_on_dfs(
    dfs=command_74, this_test="Mean", col_name="CSP = 74F"
)
command_76_mean = base.run_passive_test_on_dfs(
    dfs=command_76, this_test="Mean", col_name="CSP = 76F"
)
command_78_mean = base.run_passive_test_on_dfs(
    dfs=command_78, this_test="Mean", col_name="CSP = 78F"
)

command_74_std = base.run_passive_test_on_dfs(
    dfs=command_74, this_test="Std", col_name="CSP = 74F"
)
command_76_std = base.run_passive_test_on_dfs(
    dfs=command_76, this_test="Std", col_name="CSP = 76F"
)
command_78_std = base.run_passive_test_on_dfs(
    dfs=command_78, this_test="Std", col_name="CSP = 78F"
)

In [None]:
command_mean = base.combine_dicts([command_74_mean, command_76_mean, command_78_mean])
command_std = base.combine_dicts([command_74_std, command_76_std, command_78_std])

In [None]:
for project in these_projects:
    these_zones = list(command_mean[project].index)
    these_ezs = list(
        (EXCLUDED_ZONES[project][EXCLUDED_ZONES[project] == 1]).dropna().index
    )
    these_zones = list(set(these_zones) - set(these_ezs))
    command_mean[project] = command_mean[project].loc[these_zones, :]
    command_std[project] = command_std[project].loc[these_zones, :]

In [None]:
opacity_data = {}
for project in these_projects:
    opacity_data[project] = pd.Series(
        0.8, index=list(command_mean[project].index)
    ).to_frame()

In [None]:
fig = viz.make_dot_plot(
    y_data=command_mean,
    y_error_up_data=command_std,
    y_error_down_data=command_std,
    normalize_x=True,
    sort_by="CSP = 74F",
    y_axis_title="CSP Command (F)",
    x_axis_title="Fraction of Included Zones",
    horizontal_spacing=0.1,
    vertical_spacing=0.125,
    color_legend={
        "color": {
            "CSP = 74F": "RoyalBlue",
            "CSP = 76F": "DarkOrange",
            "CSP = 78F": "Firebrick",
        }
    },
    opacity_data=opacity_data,
    y_range=[70, 85],
    marker_size=10,
    title_size=38,
    text_size=30,
    legend_size=38,
)
fig = fig.update_layout(
    legend=dict(
        x=0.5,
        y=-0.075,
        xanchor="center",
        yanchor="top",
        orientation="h",
    )
)

In [None]:
# fig

In [None]:
# fig.write_image(f"{IMAGE_PATH}/zone_CSP_command.png")