In [10]:
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

BA = 'PJM'
BA_2 = 'PJM'

df_path = f'../data/CEMS_processed/{BA}.csv'
df_gen_path = f'../data/processed/{BA_2}.csv'

df = pd.read_csv(df_path)
df_gen = pd.read_csv(df_gen_path)

# Preprocess the data
df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.groupby(['Date', 'Facility ID']).sum().reset_index()

columns = ['Local time', 'NG: SUN', 'NG: WND', 'D','NG: COL', 'NG: NG', 'NG: OIL', 'NG: WAT', 'TI', 'solar_ext_mw', 'wind_ext_mw', 'demand_ext_mw']
df_gen = df_gen[columns]

df_gen = df_gen.rename(columns={
    'Local time': 'datetime',
    'D': 'demand_mw',
    'NG: SUN': 'solar_generation_mw',
    'NG: WND': 'wind_generation_mw',
    'NG: COL': 'coal_generation_mw',
    'NG: NG': 'natural_gas_generation_mw',
    'NG: OIL': 'oil_generation_mw',
    'TI': 'imports_mw',
    'NG: WAT': 'hydro_generation_mw',
})

df_gen['Date'] = pd.to_datetime(df_gen['datetime']).dt.date
df_gen_D = df_gen.groupby('Date').sum().reset_index()
df_gen_D = df_gen_D.drop(columns=['datetime'])

# Filter between 2019 and 2023
df_gen_D = df_gen_D[(df_gen_D['Date'] >= pd.to_datetime('2019-01-01').date()) & (df_gen_D['Date'] <= pd.to_datetime('2023-12-31').date())]
df_merged = pd.merge(df, df_gen_D, left_on='Date', right_on='Date', how='left')

df_merged = df_merged.rename(columns={
    'Date': 'datetime',
    'Facility ID': 'id',
    'Operating Time': 'optime',
    'Gross Load (MW)': 'gross_load_mw',
    'CO2 Mass (short tons)': 'co2_mass_shorttons',
})

df_merged['imports_abs_mw'] = df_merged['imports_mw'].abs()
df_merged['netimports_mw'] = np.where(df_merged['imports_mw'] < 0, df_merged['imports_mw'] * -1, 0)
df_merged['netexports_mw'] = np.where(df_merged['imports_mw'] > 0, df_merged['imports_mw'], 0)
df_merged.dropna(inplace=True)

df_thermal = df_merged[['datetime','gross_load_mw']].groupby('datetime').sum().reset_index()
df_thermal = df_thermal.rename(columns={'gross_load_mw': 'thermal_generation_mw'})
df_merged = pd.merge(df_merged, df_thermal, left_on='datetime', right_on='datetime', how='left')

df_merged['month'] = pd.to_datetime(df_merged['datetime']).dt.month
df_merged['year'] = pd.to_datetime(df_merged['datetime']).dt.year

# Generate new emissions intensity column
df_merged['co2_emissions_intensity'] = df_merged['co2_mass_shorttons'] / df_merged['gross_load_mw']
df_merged['co2_emissions_intensity'] = df_merged['co2_emissions_intensity'].fillna(0).replace([np.inf, -np.inf], 0)

# change SO2 Mass (lbs) to so2_mass_kg
df_merged['so2_mass_kg'] = df_merged['SO2 Mass (lbs)'] * 0.453592
df_merged['nox_mass_kg'] = df_merged['NOx Mass (lbs)'] * 0.453592

# Generate new emissions intensity columns
df_merged['so2_emissions_intensity'] = df_merged['so2_mass_kg'] / df_merged['gross_load_mw']
df_merged['nox_emissions_intensity'] = df_merged['nox_mass_kg'] / df_merged['gross_load_mw']
df_merged['so2_emissions_intensity'] = df_merged['so2_emissions_intensity'].fillna(0).replace([np.inf, -np.inf], 0)
df_merged['nox_emissions_intensity'] = df_merged['nox_emissions_intensity'].fillna(0).replace([np.inf, -np.inf], 0)

df_wramp_mw = df_gen[['datetime','wind_generation_mw']].dropna().reset_index(drop=True)
df_wramp_mw['datetime'] = pd.to_datetime(df_wramp_mw['datetime'])
df_wramp_mw['datetime'] = df_wramp_mw['datetime'].dt.date
df_wramp_mw['datetime'] = pd.to_datetime(df_wramp_mw['datetime'])
df_wramp_mw.set_index('datetime', inplace=True)
df_wramp_mw['wind_ramp'] = df_wramp_mw['wind_generation_mw'].diff().abs()
df_wramp_mw['hour'] = df_wramp_mw.index.hour
df_wramp_mw.loc[df_wramp_mw['hour'] == 1, 'wind_ramp'] = None
daily_wind_ramp = df_wramp_mw.groupby('datetime')['wind_ramp'].sum()
df_merged['datetime'] = pd.to_datetime(df_merged['datetime'])
df_merged = pd.merge(df_merged, daily_wind_ramp, left_on='datetime', right_on='datetime', how='left')

df_sramp_mw = df_gen[['datetime','solar_generation_mw']].dropna().reset_index(drop=True)
df_sramp_mw['datetime'] = pd.to_datetime(df_sramp_mw['datetime'])
df_sramp_mw['datetime'] = df_sramp_mw['datetime'].dt.date
df_sramp_mw['datetime'] = pd.to_datetime(df_sramp_mw['datetime'])
df_sramp_mw.set_index('datetime', inplace=True)
df_sramp_mw['solar_ramp'] = df_sramp_mw['solar_generation_mw'].diff().abs()
df_sramp_mw['hour'] = df_sramp_mw.index.hour
df_sramp_mw.loc[df_sramp_mw['hour'] == 1, 'solar_ramp'] = None
daily_solar_ramp = df_sramp_mw.groupby('datetime')['solar_ramp'].sum()
df_merged = pd.merge(df_merged, daily_solar_ramp, left_on='datetime', right_on='datetime', how='left')

df_merged['solar_share'] = df_merged['solar_generation_mw'] / df_merged['demand_mw']
df_merged['wind_share'] = df_merged['wind_generation_mw'] / df_merged['demand_mw']

import pandas as pd
import numpy as np
import statsmodels.formula.api as smf

# Function to add stars based on p-value
def significance_stars(p_value):
    if p_value < 0.001:
        return '***'
    elif p_value < 0.01:
        return '**'
    elif p_value < 0.05:
        return '*'
    else:
        return ''

# Assume df_merged is already defined and includes 'residual_demand_mw'
df_merged['residual_demand_mw'] = df_merged['demand_mw'] - df_merged['hydro_generation_mw'] + df_merged['imports_mw']

dependent_vars = ['gross_load_mw', 'co2_mass_shorttons', 'co2_emissions_intensity', 'so2_mass_kg', 'so2_emissions_intensity', 'nox_mass_kg', 'nox_emissions_intensity']
ind_5 = ['solar_generation_mw', 'wind_generation_mw', 'residual_demand_mw', 'wind_ramp', 'solar_ramp']

# Initialize a dictionary to store results for all dependent variables
all_results = {}

for dependent_var in dependent_vars:
    # Initialize an empty dictionary to store results for this dependent variable
    summary_dict = {var: [] for var in ind_5}
    summary_dict['R-squared'] = []
    summary_dict['Num of Obs'] = []

    # if any row has zero, add 1
    #df_merged[ind_5] = df_merged[ind_5] + 1

    formula = f'{dependent_var} ~ {" + ".join(ind_5)} + C(id) + C(month) + C(year)'
    
    # Filter out rows where the dependent variable is not greater than zero
    df_filtered = df_merged[df_merged[dependent_var] > 0].copy()

    # Log transformation of dependent variable and independent variables
    df_filtered[dependent_var] = np.log(df_filtered[dependent_var])
    df_filtered[ind_5] = np.log(df_filtered[ind_5])

    # Change inf and -inf to nan and drop nan
    df_filtered = df_filtered.replace([np.inf, -np.inf], np.nan).dropna()

    # Fit the model
    model = smf.ols(formula, data=df_filtered).fit()

    # Store coefficients, standard errors, and p-values for the variables in ind_5
    for var in ind_5:
        coeff_value = model.params[var]
        std_err_value = model.bse[var]
        p_value = model.pvalues[var]

        # Add significance stars based on p-value
        stars = significance_stars(p_value)
        
        # Add coefficient, standard error, and significance stars to the dictionary
        result = f"{coeff_value:.4f} ({std_err_value:.4f}){stars}"
        summary_dict[var].append(result)

    # Store R-squared and number of observations
    summary_dict['R-squared'].append(f"{model.rsquared:.4f}")
    summary_dict['Num of Obs'].append(f"{int(model.nobs)}")

    # Convert the summary_dict into a pandas DataFrame
    summary_df = pd.DataFrame.from_dict(summary_dict, orient='index')

    # Rename the column to reflect the dependent variable
    summary_df.columns = [dependent_var]

    # Store the results for this dependent variable
    all_results[dependent_var] = summary_df

# Combine all results into a single DataFrame
final_summary_df = pd.concat(all_results.values(), axis=1)

final_summary_df.to_csv(f'../results/{BA}_regression_results.csv')

  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)
  result = func(self.values, **kwargs)


## Main Paper

In [None]:
import pandas as pd
import os
import re

isos = ['CAISO', 'PJM', 'ISONE', 'MISO', 'SWPP', 'ERCOT', 'NYISO']
variables = ['residual_demand_mw', 'solar_generation_mw', 'wind_generation_mw', 'solar_ramp', 'wind_ramp', 'R-squared', 'Num of Obs']
dependent_vars = ['gross_load_mw', 'co2_mass_shorttons', 'co2_emissions_intensity']

# A mapping of variable names to new labels
variable_labels = {
    'residual_demand_mw': 'Thermal Generation',
    'solar_generation_mw': 'Solar',
    'wind_generation_mw': 'Wind'
}

def format_coefficient(value, var):
    # For R-squared, limit to two decimal places
    if var == 'R-squared':
        return f"{float(value):.2f}"
    
    # For Num of Obs, format with commas
    if var == 'Num of Obs':
        return f"{int(value):,}"
    
    # Default formatting for coefficients and standard errors
    if isinstance(value, str):
        match = re.match(r'(-?\d+\.\d+)\s*\((\d+\.\d+)\)(\**)', value)
        if match:
            coef, se, stars = match.groups()
            coef_val = float(coef)
            se_val = float(se)
            if coef_val > 1000 or se_val > 1000:  # Cap values above 1000
                coef_val = se_val = 0
            return f"${coef_val:.3f}^{{\\footnotesize {stars}}}$&({se_val:.3f})"
    elif isinstance(value, (int, float)):
        if value > 1000:  # Cap value above 1000
            return "0"
        return f"{value:.4f}"
    return value

latex_code = r"""\begin{table}[htbp]
    \small
    \centering
    \caption{\textbf{Coefficient of the panel regression formulation with generation, $\text{CO}_2$ emissions, and $\text{CO}_2$ emissions intensity as the dependent variable}. Significance levels: {\footnotesize ***} $p < 0.01$, {\footnotesize **} $p < 0.05$, {\footnotesize *} $p < 0.1$.}
    \label{tab:panel-regression}
    """

for idx, dep_var in enumerate(dependent_vars):
    latex_code += r"""
    \begin{subtable}[t]{\textwidth}
        \centering
        \caption{""" + (f"Generation" if idx == 0 else f"$\\text{{CO}}_2$ {'Emissions' if idx == 1 else 'Emissions Intensity'}") + r"""}
        \small
        \begin{tabular}{l>{\raggedleft\arraybackslash}p{1cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}}
            \toprule
            & """ + " & ".join([f"\\textbf{{{iso}}}" for iso in isos]) + r""" \\
            \midrule"""

    for var in variables:
        # Use the custom labels for the first three variables
        var_label = variable_labels.get(var, var.replace('_', ' ').title())
            
        latex_code += f"\n\\textbf{{{var_label}}}"
        coefficients = []
        standard_errors = []
        for iso in isos:
            file_path = f"../results/{iso}_regression_results.csv"
            if os.path.exists(file_path):
                df = pd.read_csv(file_path, index_col=0)
                value = df.loc[var, dep_var] if var in df.index else "-"
                formatted = format_coefficient(value, var)
                if '&' in formatted:
                    coef, se = formatted.split('&')
                    coefficients.append(coef)
                    standard_errors.append(se)
                else:
                    coefficients.append(formatted)
                    standard_errors.append('')
            else:
                coefficients.append('-')
                standard_errors.append('')

        latex_code += " & " + " & ".join(coefficients) + r" \\"

        # Print standard errors for the current row, if available
        if any(standard_errors):
            latex_code += "\n            & " + " & ".join(standard_errors) + r" \\"

        # Add midrule after the standard error row for wind_ramp
        if var == 'wind_ramp':
            latex_code += r"\n            \midrule"

    latex_code += r"""
            \bottomrule
        \end{tabular}
    \end{subtable}
    
    \vspace{0.3cm}
    """

latex_code += r"\end{table}"

print(latex_code)

## Emissions

In [None]:
import pandas as pd
import os
import re

isos = ['CAISO', 'PJM', 'ISONE', 'MISO', 'SWPP', 'ERCOT', 'NYISO']
variables = ['residual_demand_mw', 'solar_generation_mw', 'wind_generation_mw', 'solar_ramp', 'wind_ramp', 'R-squared', 'Num of Obs']
dependent_vars = ['co2_mass_shorttons', 'so2_mass_kg', 'nox_mass_kg']

# A mapping of variable names to new labels
variable_labels = {
    'residual_demand_mw': 'Thermal Generation',
    'solar_generation_mw': 'Solar',
    'wind_generation_mw': 'Wind'
}

def format_coefficient(value, var):
    # For R-squared, limit to two decimal places
    if var == 'R-squared':
        return f"{float(value):.2f}"
    
    # For Num of Obs, format with commas
    if var == 'Num of Obs':
        return f"{int(value):,}"
    
    # Default formatting for coefficients and standard errors
    if isinstance(value, str):
        match = re.match(r'(-?\d+\.\d+)\s*\((\d+\.\d+)\)(\**)', value)
        if match:
            coef, se, stars = match.groups()
            coef_val = float(coef)
            se_val = float(se)
            if coef_val > 1000 or se_val > 1000:  # Cap values above 1000
                coef_val = se_val = 0
            return f"${coef_val:.3f}^{{\\footnotesize {stars}}}$&({se_val:.3f})"
    elif isinstance(value, (int, float)):
        if value > 1000:  # Cap value above 1000
            return "0"
        return f"{value:.4f}"
    return value

latex_code = r"""\begin{table}[htbp]
    \small
    \centering
    \caption{\textbf{Coefficient of the panel regression formulation with $\text{CO}_2$, $\text{SO}_2$, and $\text{NO}_x$ emissions as the dependent variable}. Significance levels: {\footnotesize ***} $p < 0.001$, {\footnotesize **} $p < 0.01$, {\footnotesize *} $p < 0.05$.}
    \label{tab:panel-regression}
    """

for idx, dep_var in enumerate(dependent_vars):
    # Set the appropriate caption for each dependent variable
    subtable_caption = ""
    if dep_var == 'co2_mass_shorttons':
        subtable_caption = "$\\text{CO}_2$ Emissions"
    elif dep_var == 'so2_mass_kg':
        subtable_caption = "$\\text{SO}_2$ Emissions"
    elif dep_var == 'nox_mass_kg':
        subtable_caption = "$\\text{NO}_x$ Emissions"

    latex_code += r"""
    \begin{subtable}[t]{\textwidth}
        \centering
        \caption{""" + subtable_caption + r"""}
        \small
        \begin{tabular}{l>{\raggedleft\arraybackslash}p{1cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}}
            \toprule
            & """ + " & ".join([f"\\textbf{{{iso}}}" for iso in isos]) + r""" \\
            \midrule"""

    for var in variables:
        # Use the custom labels for the first three variables
        var_label = variable_labels.get(var, var.replace('_', ' ').title())
            
        latex_code += f"\n\\textbf{{{var_label}}}"
        coefficients = []
        standard_errors = []
        for iso in isos:
            file_path = f"../results/{iso}_regression_results.csv"
            if os.path.exists(file_path):
                df = pd.read_csv(file_path, index_col=0)
                value = df.loc[var, dep_var] if var in df.index else "-"
                formatted = format_coefficient(value, var)
                if '&' in formatted:
                    coef, se = formatted.split('&')
                    coefficients.append(coef)
                    standard_errors.append(se)
                else:
                    coefficients.append(formatted)
                    standard_errors.append('')
            else:
                coefficients.append('-')
                standard_errors.append('')

        latex_code += " & " + " & ".join(coefficients) + r" \\"

        # Print standard errors for the current row, if available
        if any(standard_errors):
            latex_code += "\n            & " + " & ".join(standard_errors) + r" \\"

        # Add midrule after the standard error row for wind_ramp
        if var == 'wind_ramp':
            latex_code += r"\n            \midrule"

    latex_code += r"""
            \bottomrule
        \end{tabular}
    \end{subtable}
    
    \vspace{0.3cm}
    """

latex_code += r"\end{table}"

print(latex_code)

## Emissions Intensity

In [None]:
import pandas as pd
import os
import re

isos = ['CAISO', 'PJM', 'ISONE', 'MISO', 'SWPP', 'ERCOT', 'NYISO']
variables = ['residual_demand_mw', 'solar_generation_mw', 'wind_generation_mw', 'solar_ramp', 'wind_ramp', 'R-squared', 'Num of Obs']
dependent_vars = ['co2_emissions_intensity', 'so2_emissions_intensity', 'nox_emissions_intensity']

# A mapping of variable names to new labels
variable_labels = {
    'residual_demand_mw': 'Thermal Generation',
    'solar_generation_mw': 'Solar',
    'wind_generation_mw': 'Wind'
}

def format_coefficient(value, var):
    # For R-squared, limit to two decimal places
    if var == 'R-squared':
        return f"{float(value):.2f}"
    
    # For Num of Obs, format with commas
    if var == 'Num of Obs':
        return f"{int(value):,}"
    
    # Default formatting for coefficients and standard errors
    if isinstance(value, str):
        match = re.match(r'(-?\d+\.\d+)\s*\((\d+\.\d+)\)(\**)', value)
        if match:
            coef, se, stars = match.groups()
            coef_val = float(coef)
            se_val = float(se)
            if coef_val > 1000 or se_val > 1000:  # Cap values above 1000
                coef_val = se_val = 0
            return f"${coef_val:.3f}^{{\\footnotesize {stars}}}$&({se_val:.3f})"
    elif isinstance(value, (int, float)):
        if value > 1000:  # Cap value above 1000
            return "0"
        return f"{value:.4f}"
    return value

latex_code = r"""\begin{table}[htbp]
    \small
    \centering
    \caption{\textbf{Coefficient of the panel regression formulation with $\text{CO}_2$, $\text{SO}_2$, and $\text{NO}_x$ emissions intensity as the dependent variable}. Significance levels: {\footnotesize ***} $p < 0.001$, {\footnotesize **} $p < 0.01$, {\footnotesize *} $p < 0.05$.}
    \label{tab:intensity-regression}
    """

for idx, dep_var in enumerate(dependent_vars):
    # Set the appropriate caption for each dependent variable
    subtable_caption = ""
    if dep_var == 'co2_emissions_intensity':
        subtable_caption = "$\\text{CO}_2$ Emissions Intensity"
    elif dep_var == 'so2_emissions_intensity':
        subtable_caption = "$\\text{SO}_2$ Emissions Intensity"
    elif dep_var == 'nox_emissions_intensity':
        subtable_caption = "$\\text{NO}_x$ Emissions Intensity"

    latex_code += r"""
    \begin{subtable}[t]{\textwidth}
        \centering
        \caption{""" + subtable_caption + r"""}
        \small
        \begin{tabular}{l>{\raggedleft\arraybackslash}p{1cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}>{\raggedleft\arraybackslash}p{1.5cm}}
            \toprule
            & """ + " & ".join([f"\\textbf{{{iso}}}" for iso in isos]) + r""" \\
            \midrule"""

    for var in variables:
        # Use the custom labels for the first three variables
        var_label = variable_labels.get(var, var.replace('_', ' ').title())
            
        latex_code += f"\n\\textbf{{{var_label}}}"
        coefficients = []
        standard_errors = []
        for iso in isos:
            file_path = f"../results/{iso}_regression_results.csv"
            if os.path.exists(file_path):
                df = pd.read_csv(file_path, index_col=0)
                value = df.loc[var, dep_var] if var in df.index else "-"
                formatted = format_coefficient(value, var)
                if '&' in formatted:
                    coef, se = formatted.split('&')
                    coefficients.append(coef)
                    standard_errors.append(se)
                else:
                    coefficients.append(formatted)
                    standard_errors.append('')
            else:
                coefficients.append('-')
                standard_errors.append('')

        latex_code += " & " + " & ".join(coefficients) + r" \\"

        # Print standard errors for the current row, if available
        if any(standard_errors):
            latex_code += "\n            & " + " & ".join(standard_errors) + r" \\"

        # Add midrule after the standard error row for wind_ramp
        if var == 'wind_ramp':
            latex_code += r"\n            \midrule"

    latex_code += r"""
            \bottomrule
        \end{tabular}
    \end{subtable}
    
    \vspace{0.3cm}
    """

latex_code += r"\end{table}"

print(latex_code)

## Displacement

In [None]:
import re
import pandas as pd
import numpy as np

# Define function to extract coefficient from string
def extract_coefficient(value):
    match = re.match(r'(-?\d+\.\d+)', value)
    if match:
        return float(match.group(1))
    return np.nan

# Function to calculate displacement
def calculate_displacement(alpha_prime, alpha):
    """Calculate displacement fraction based on the formula:
    Displacement = alpha_prime / (alpha_prime - alpha)
    """
    if alpha_prime - alpha != 0:
        return alpha_prime / (alpha_prime - alpha)
    else:
        return np.nan  # Avoid division by zero

# Initialize dictionaries to store displacement values for solar and wind per ISO
solar_displacement = {iso: {'CO2': np.nan, 'SO2': np.nan, 'NOX': np.nan} for iso in isos}
wind_displacement = {iso: {'CO2': np.nan, 'SO2': np.nan, 'NOX': np.nan} for iso in isos}

# Loop through each ISO to calculate displacement for solar and wind
for iso in isos:
    file_path = f"../results/{iso}_regression_results.csv"
    
    if os.path.exists(file_path):
        # Load the data for the ISO
        df = pd.read_csv(file_path, index_col=0)

        # Calculate displacement for each pollutant (CO2, SO2, NOx)
        for pollutant in ['co2', 'so2', 'nox']:
            # Emissions and intensity variable names
            emissions_var = f'{pollutant}_mass_kg' if pollutant != 'co2' else f'{pollutant}_mass_shorttons'
            intensity_var = f'{pollutant}_emissions_intensity'

            # Get the solar and wind coefficients for both emissions and intensity
            solar_coef_emissions = extract_coefficient(df.loc['solar_generation_mw', emissions_var])
            solar_coef_intensity = extract_coefficient(df.loc['solar_generation_mw', intensity_var])

            wind_coef_emissions = extract_coefficient(df.loc['wind_generation_mw', emissions_var])
            wind_coef_intensity = extract_coefficient(df.loc['wind_generation_mw', intensity_var])

            # Calculate solar and wind displacement
            solar_displacement[iso][pollutant.upper()] = calculate_displacement(solar_coef_emissions, solar_coef_intensity)
            wind_displacement[iso][pollutant.upper()] = calculate_displacement(wind_coef_emissions, wind_coef_intensity)

# Convert displacement dictionaries to DataFrames for easier LaTeX table generation
solar_displacement_df = pd.DataFrame(solar_displacement).T
wind_displacement_df = pd.DataFrame(wind_displacement).T

solar_displacement_df = solar_displacement_df.applymap(lambda x: f"{x:.6f}" if x >= 0 else '0')
wind_displacement_df = wind_displacement_df.applymap(lambda x: f"{x:.6f}" if x >= 0 else '0')

In [None]:
import pandas as pd

# Sample data for solar and wind displacement DataFrame

# Function to generate LaTeX tables for solar and wind displacement
def format_displacement_subtables(solar_df, wind_df):
    latex_code = r"""\begin{table}[htbp]
    \small
    \centering
    \caption{\textbf{Displacement Effectiveness for Solar and Wind across CO\textsubscript{2}, SO\textsubscript{2}, and NO\textsubscript{x} Emissions for ISOs}.}
    \label{tab:displacement-effectiveness}
    """
    
    # Solar Displacement Subtable
    latex_code += r"""
    \begin{subtable}[t]{\textwidth}
        \centering
        \caption{Solar Displacement Effectiveness}
        \small
        \begin{tabular}{l""" + "r" * len(solar_df.index) + r"""}
            \toprule
            & """ + " & ".join([f"\\textbf{{{iso}}}" for iso in solar_df.index]) + r""" \\
            \midrule"""
    
    # Add the displacement rows (CO2, SO2, NOx) for solar
    for gas in solar_df.columns:
        gas_label = f"CO\\textsubscript{{2}}" if gas == 'CO2' else f"SO\\textsubscript{{2}}" if gas == 'SO2' else f"NO\\textsubscript{{x}}"
        latex_code += f"\n\\textbf{{{gas_label}}} & " + " & ".join([f"{float(value):.2f}" if isinstance(value, (int, float)) else value for value in solar_df[gas]]) + r" \\"
    
    latex_code += r"""
            \bottomrule
        \end{tabular}
    \end{subtable}
    
    \vspace{0.5cm}
    """
    
    # Wind Displacement Subtable
    latex_code += r"""
    \begin{subtable}[t]{\textwidth}
        \centering
        \caption{Wind Displacement Effectiveness}
        \small
        \begin{tabular}{l""" + "r" * len(wind_df.index) + r"""}
            \toprule
            & """ + " & ".join([f"\\textbf{{{iso}}}" for iso in wind_df.index]) + r""" \\
            \midrule"""
    
    # Add the displacement rows (CO2, SO2, NOx) for wind
    for gas in wind_df.columns:
        gas_label = f"CO\\textsubscript{{2}}" if gas == 'CO2' else f"SO\\textsubscript{{2}}" if gas == 'SO2' else f"NO\\textsubscript{{x}}"
        latex_code += f"\n\\textbf{{{gas_label}}} & " + " & ".join([f"{float(value):.2f}" if isinstance(value, (int, float)) else value for value in wind_df[gas]]) + r" \\"
    
    latex_code += r"""
            \bottomrule
        \end{tabular}
    \end{subtable}
    
    \vspace{0.3cm}
    \end{table}"""
    
    return latex_code

# Generate LaTeX code for the solar and wind displacement subtables
latex_code = format_displacement_subtables(solar_displacement_df, wind_displacement_df)

# Print the LaTeX table code
print(latex_code)