# Energy use plot

In [1]:
# Set up
import pandas as pd
import numpy as np
import sys
from pathlib import Path
CODE_ROOT = Path.cwd().parents[1]
sys.path.append(str(CODE_ROOT))
import config
from openpyxl import load_workbook
from openpyxl.formatting.rule import FormulaRule
from openpyxl.styles import Font, PatternFill
from openpyxl.utils import range_boundaries
import os
import shutil
import xlwings as xw
import warnings
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
import matplotlib.pyplot as plt

In [2]:
# Load data
labs = pd.read_csv(config.BL_RAW_SAMPLE / "final_sample_with_BL_file_status.csv")

In [3]:
# List of labs to process (only those with filled files)
filled_labs = labs[(labs["file_filled"] == True)]

# List to store results
results = []

# Launch excel application
app = xw.App(visible=False)

# For each labgroupid, check for calculator and extract results
for labgroupid, group in filled_labs.groupby("labgroupid"):

    # Survey file path
    calculator = os.path.join(
        config.DATA_ROOT, 
        "13_Calculators",
        "2_Individualized",
        str(labgroupid),
        "Energy_Use_Report.xlsx"
    )

    if not os.path.exists(calculator):
        print(f"Missing calculator for {labgroupid}")
        continue


    # Load calculator workbook
    calc_wb = xw.Book(calculator)

    # Read total energy use and carbon emissions
    calc_ws = calc_wb.sheets["Introduction"]
    total_energy_use = calc_ws.range("B12").value
    total_carbon_emissions = calc_ws.range("B13").value
    calc_wb.close()

    # Save results 
    results.append({
        "labgroupid": labgroupid,
        "total_energy_use":total_energy_use,
        "total_carbon_emissions": total_carbon_emissions
    })

# Convert to DataFrame
df_results = pd.DataFrame(results)

Missing calculator for 541


In [4]:
# Merge with the labs data
labs_plot = filled_labs.merge(df_results, on="labgroupid", how="left")

# Convert total energy use to numeric (kWh)
labs_plot["total_energy_use_num"] = (
    labs_plot["total_energy_use"]
    .str.replace(" kWh", "", regex=False)
    .astype(float)
)

# Convert total energy use kWh to MWh
labs_plot["total_energy_use_mwh"] = (
    labs_plot["total_energy_use_num"] / 1000
)

# Convert total carbon emissions to numeric (tCO2e)
labs_plot["total_carbon_emissions_num"] = (
    labs_plot["total_carbon_emissions"]
    .str.replace(" tCO2e", "", regex=False)
    .astype(float)
)

In [5]:
# Plot histogram of total energy use
data = labs_plot["total_energy_use_mwh"].dropna()

bin_width = 10
bins = np.arange(
    0,
    data.max() + bin_width,
    bin_width
)

plt.figure(figsize=(6, 4))
plt.hist(
    data,
    bins=bins,
    weights=np.ones_like(data) * 100 / len(data),
    edgecolor="black"
)

plt.xlabel("Total energy use (MWh)")
plt.ylabel("Percent")
plt.title("Distribution of total energy use (excl. fume cupboards)")
plt.tight_layout()

# Save plot to output folder
output_folder = config.OUTPUT / "2_Histograms"
plt.savefig(output_folder / "total_energy_no_fume_cupboards.pdf")

plt.close()

In [6]:
# Plot histogram of total carbon emissions
data = labs_plot["total_carbon_emissions_num"].dropna()

bin_width = 1
bins = np.arange(
    0,
    data.max() + bin_width,
    bin_width
)

plt.hist(
    data,
    bins=bins,
    weights=np.ones_like(data) * 100 / len(data),
    edgecolor="black"
)
plt.xlabel("Total carbon emissions (tCOâ‚‚e)")
plt.ylabel("Percent")
plt.title("Distribution of total carbon emissions (excl. fume cupboards)")
plt.tight_layout()

# Save plot to output folder
output_folder = config.OUTPUT / "2_Histograms"
plt.savefig(output_folder / "total_emissions_no_fume_cupboards.pdf")

plt.close()