# Calculate thermal generator parameters

In [None]:
import os
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import seaborn as sns
import tqdm.notebook as tqdm

import nygrid.gen_params as gp

In [None]:
# Set up directories
cwd = os.getcwd()
if 'examples' in cwd:
    parent_dir = os.path.dirname(cwd)
    data_dir = os.path.join(parent_dir, 'data')
else:
    data_dir = os.path.join(cwd, 'data')

grid_data_dir = os.path.join(data_dir, 'grid', '2018Baseline')
if not os.path.exists(grid_data_dir):
    raise FileNotFoundError('Grid data directory not found.')

thermal_data_dir = os.path.join(data_dir, 'thermal')
if not os.path.exists(thermal_data_dir):
    raise FileNotFoundError('Thermal data directory not found.')

print('Grid data directory: {}'.format(grid_data_dir))
print('Thermal data directory: {}'.format(thermal_data_dir))

Grid data directory: /mnt/Bo_HDD/NYgrid-python/data/grid/2018Baseline
Thermal data directory: /mnt/Bo_HDD/NYgrid-python/data/thermal


# 1. 2018 data

## Read EPA CEMS historical generation and emissions data

In [3]:
cems_data_dir = os.path.join(thermal_data_dir, 'cems_2018')
df_list = list()
for mo in range(1, 13):
    df = pd.read_csv(os.path.join(cems_data_dir, f'2018ny{str(mo).zfill(2)}.csv'),
                     low_memory=False)
    df_list.append(df)

# Concatenate all the dataframes
cems_df = pd.concat(df_list, axis=0, ignore_index=True)

# Rename columns
cems_df = cems_df.rename(columns={
    'ORISPL_CODE': 'Plant_ID',
    'UNITID': 'Unit_ID',
})

# Add time columns from OP_DATE and OP_HOUR
cems_df['Time'] = pd.to_datetime(
    cems_df['OP_DATE']) + pd.to_timedelta(cems_df['OP_HOUR'], unit='h')
# cems_df['Month'] = cems_df['Time'].dt.month

# Drop columns that are not needed
cems_df = cems_df.drop(columns=['STATE', 'FAC_ID', 'UNIT_ID', 'OP_DATE', 'OP_HOUR',
                                'SO2_MASS_MEASURE_FLG', 'SO2_RATE_MEASURE_FLG',
                                'NOX_MASS_MEASURE_FLG', 'NOX_RATE_MEASURE_FLG',
                                'CO2_MASS_MEASURE_FLG', 'CO2_RATE_MEASURE_FLG',
                                'SLOAD (1000lb/hr)'])

## Read NYCA and CEMS generator matching table

In [4]:
gen_combiner = pd.read_excel(os.path.join(thermal_data_dir, '2018_nyca_thermal.xlsx'),
                             sheet_name='matched_with_id_filtered')
gen_combiner = gen_combiner.rename(columns={
    '   Station        Unit': 'NYISO_Name',
})

# Map unit type GT, JE to CT
gen_combiner['Unit Type'] = gen_combiner['Unit Type'].replace(
    {'GT': 'CT', 'JE': 'CT'})
gen_combiner['Fuel Type Secondary'] = gen_combiner['Fuel Type Secondary'].astype(
    str).replace({'nan': ''})
gen_combiner

Unnamed: 0,NYISO_Name,Zone,PTID,Name Plate Rating (MW),Dual Fuel,Unit Type,Fuel Type Primary,Fuel Type Secondary,2017 Net Energy (GWh),Note,...,CAMD_Facility_Name,CAMD_Plant_ID,CAMD_Unit_ID,CAMD_Generator_ID,CAMD_Nameplate_Capacity,CAMD_Fuel_Type,EIA_Latitude,EIA_Longitude,ID,eco_min_ratio_obs
0,Danskammer 1,G,23586,72.0,YES,ST,NG,FO6,0.93040,,...,Danskammer Generating Station,2480,1,1,72.0,Pipeline Natural Gas,41.571247,-73.974981,"(2480,1)",0.24
1,Danskammer 2,G,23589,73.5,YES,ST,NG,FO6,0.90830,,...,Danskammer Generating Station,2480,2,2,73.5,Pipeline Natural Gas,41.571247,-73.974981,"(2480,2)",0.22
2,Danskammer 3,G,23590,147.1,,ST,NG,,2.06448,,...,Danskammer Generating Station,2480,3,3,147.1,Pipeline Natural Gas,41.571247,-73.974981,"(2480,3)",0.36
3,Danskammer 4,G,23591,239.4,,ST,NG,,5.05390,,...,Danskammer Generating Station,2480,4,4,239.4,Pipeline Natural Gas,41.571247,-73.974981,"(2480,4)",0.28
4,Arthur Kill ST 2,J,23512,376.2,,ST,NG,,563.62800,,...,Arthur Kill,2490,20,2,376.2,Pipeline Natural Gas,40.591564,-74.200035,"(2490,20)",0.26
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
217,Astoria CC 1,J,323568,288.0,YES,CC,NG,FO2,2242.28930,( G ),...,Poletti 500 MW CC,56196,CTG7A,CA01,358.0,Pipeline Natural Gas,40.788900,-73.906900,"(56196,CTG7A)",0.40
218,Astoria CC 2,J,323569,288.0,YES,CC,NG,FO2,,,...,Poletti 500 MW CC,56196,CTG7B,CA01,358.0,Pipeline Natural Gas,40.788900,-73.906900,"(56196,CTG7B)",0.40
219,Caithness_CC_1,K,323624,375.0,YES,CC,NG,FO2,2415.15818,,...,Caithness Long Island Energy Center,56234,0001,CT01,348.9,Pipeline Natural Gas,40.814200,-72.940300,"(56234,0001)",0.60
220,EMPIRE_CC_1,F,323656,335.0,YES,CC,NG,FO2,1214.94600,,...,"Empire Generating Co, LLC",56259,CT-1,CT11,333.0,Pipeline Natural Gas,42.625239,-73.750021,"(56259,CT-1)",0.48


## Calculate generator parameters based on data in 2018

In [None]:
os.makedirs('figures/time_series', exist_ok=True)
os.makedirs('figures/heat_rate', exist_ok=True)
os.makedirs('figures/nox_rate', exist_ok=True)
os.makedirs('figures/co2_rate', exist_ok=True)
os.makedirs('figures/so2_rate', exist_ok=True)

gen_params = dict()

for ii in tqdm.tqdm(range(gen_combiner.shape[0])):
# for ii in tqdm.tqdm(range(1)):
    gen_info = gen_combiner.iloc[ii]

    if gen_info['ID'] not in gen_params:

        gen_params[gen_info['ID']] = dict()

        unit_df = cems_df[(cems_df['Plant_ID'] == gen_info['CAMD_Plant_ID'])
                          & (cems_df['Unit_ID'] == gen_info['CAMD_Unit_ID'])]
        unit_df = unit_df.set_index('Time').sort_index()
        # print(f'Number of rows: {unit_df.shape[0]}')

        # Calculate hourly ramp
        unit_df['RAMP (MW/hour)'] = unit_df['GLOAD (MW)'].diff()

        # Filter out rows with zero values
        unit_df_nonzero = unit_df[(unit_df['HEAT_INPUT (mmBtu)'] > 0) & (
            unit_df['GLOAD (MW)'] > 0)]
        unit_df_nonzero = unit_df_nonzero.drop(columns=['FACILITY_NAME',
                                                        'Plant_ID', 'Unit_ID'])
        # print(f'Number of rows with nonzero values: {unit_df_nonzero.shape[0]}')

        if unit_df_nonzero.shape[0] > 0:

            fig_name_sfx = '_'.join([gen_info["ID"], gen_info["NYISO_Name"],
                                     gen_info["PTID"].astype(str),
                                     gen_info["Unit Type"], gen_info["Fuel Type Primary"],
                                     gen_info["Fuel Type Secondary"]])

            # Time series plot
            df_plot = unit_df[['OP_TIME', 'GLOAD (MW)', 'SO2_MASS (lbs)', 'NOX_MASS (lbs)',
                               'CO2_MASS (tons)', 'HEAT_INPUT (mmBtu)']]
            fig_ts, ax_ts = plt.subplots(6, 1, figsize=(8, 6), sharex=True,
                                   layout='constrained')
            for i in range(6):
                ax_ts[i].plot(df_plot.iloc[:, i])
                ax_ts[i].set_title(df_plot.columns[i])

            fig_ts.suptitle(fig_name_sfx)
            fig_ts.savefig(f'figures/time_series/time_series_{fig_name_sfx}.png')

            # Calculate maximum generation
            max_gen =  unit_df_nonzero['GLOAD (MW)'].max()
            
            # Calculate ramp rate
            raise_rr_ratio = (unit_df_nonzero['RAMP (MW/hour)'].max() / max_gen)
            lower_rr_ratio = np.abs(unit_df_nonzero['RAMP (MW/hour)'].min() / max_gen)
            raise_rr_ratio = raise_rr_ratio if raise_rr_ratio <= 1 else 1
            lower_rr_ratio = lower_rr_ratio if lower_rr_ratio <= 1 else 1
            
            gen_params[gen_info['ID']]['NYISO_Name'] = gen_info['NYISO_Name']
            gen_params[gen_info['ID']]['PTID'] = gen_info['PTID']
            gen_params[gen_info['ID']]['Unit_Type'] = gen_info['Unit Type']
            gen_params[gen_info['ID']]['Fuel_Type_Primary'] = gen_info['Fuel Type Primary']
            gen_params[gen_info['ID']]['Fuel_Type_Secondary'] = gen_info['Fuel Type Secondary']
            gen_params[gen_info['ID']]['CAMD_Facility_Name'] = gen_info['CAMD_Facility_Name']
            gen_params[gen_info['ID']]['CAMD_Plant_ID'] = gen_info['CAMD_Plant_ID']
            gen_params[gen_info['ID']]['CAMD_Unit_ID'] = gen_info['CAMD_Unit_ID']
            gen_params[gen_info['ID']]['CAMD_Nameplate_Capacity'] = gen_info['CAMD_Nameplate_Capacity']
            gen_params[gen_info['ID']]['raise_rr_ratio'] = raise_rr_ratio
            gen_params[gen_info['ID']]['lower_rr_ratio'] = lower_rr_ratio

            # Specific treatments for certain units
            if gen_info['ID'] == '(50978,A)':
                unit_df_nonzero = unit_df_nonzero[unit_df_nonzero['GLOAD (MW)'] < 45]
            if gen_info['ID'] == '(50292,GT1)':
                unit_df_nonzero = unit_df_nonzero[unit_df_nonzero['GLOAD (MW)'] < 50]
            if gen_info['ID'] == '(50292,GT1)':
                unit_df_nonzero = unit_df_nonzero[~((unit_df_nonzero['GLOAD (MW)'] > 20)
                                                    &(unit_df_nonzero['HEAT_INPUT (mmBtu)'] < 100))]
            if gen_info['ID'] == '(50292,GT2)':
                unit_df_nonzero = unit_df_nonzero[~((unit_df_nonzero['GLOAD (MW)'] > 20)
                                                    &(unit_df_nonzero['HEAT_INPUT (mmBtu)'] < 100))]
            if gen_info['ID'] == '(7146,UGT013)':
                unit_df_nonzero = unit_df_nonzero[~((unit_df_nonzero['GLOAD (MW)'] < 45)
                                                    &(unit_df_nonzero['HEAT_INPUT (mmBtu)'] > 600))]
            if gen_info['ID'] == '(7146,UGT014)':
                unit_df_nonzero = unit_df_nonzero[~((unit_df_nonzero['GLOAD (MW)'] < 18)
                                                    &(unit_df_nonzero['HEAT_INPUT (mmBtu)'] > 250))]            
            if gen_info['ID'] == '(7869,UGT011)':
                unit_df_nonzero = unit_df_nonzero[~((unit_df_nonzero['GLOAD (MW)'] > 10)
                                                    &(unit_df_nonzero['HEAT_INPUT (mmBtu)'] < 150))]
            if gen_info['ID'] == '(2500,CT0010)':
                unit_df_nonzero = unit_df_nonzero[~((unit_df_nonzero['GLOAD (MW)'] > 20)
                                                    &(unit_df_nonzero['HEAT_INPUT (mmBtu)'] < 200))]
            if gen_info['ID'] == '(2500,CT0010)':
                unit_df_nonzero = unit_df_nonzero[~((unit_df_nonzero['GLOAD (MW)'] < 20)
                                                    &(unit_df_nonzero['HEAT_INPUT (mmBtu)'] > 190))]                  

            # Heat rate plot
            heat_1, heat_0, heat_r2, eco_min, gen_sum, heat_sum, fig_heat, ax_heat = \
                gp.calc_heat_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='GLOAD (MW)',
                               y_name='HEAT_INPUT (mmBtu)',
                               calc_eco_min=False,
                               nonneg_intercept=True,
                               keep_in_range=False)
            gen_params[gen_info['ID']]['max_gen'] = max_gen
            gen_params[gen_info['ID']]['eco_min'] = eco_min
            gen_params[gen_info['ID']]['eco_min_ratio'] = eco_min / max_gen
            gen_params[gen_info['ID']]['gen_sum'] = gen_sum
            gen_params[gen_info['ID']]['heat_sum'] = heat_sum
            gen_params[gen_info['ID']]['heat_1'] = heat_1
            gen_params[gen_info['ID']]['heat_0'] = heat_0
            gen_params[gen_info['ID']]['heat_r2'] = heat_r2
            if fig_heat is not None:
                fig_heat.savefig(f'figures/heat_rate/heat_rate_{fig_name_sfx}.png')

            # NOx emission rate plot
            nox_1, nox_0, nox_r2, nox_avg_rate, nox_sum, fig_nox, ax_nox = \
                gp.calc_emis_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='HEAT_INPUT (mmBtu)',
                               y_name='NOX_MASS (lbs)',
                               rate_name='NOX_RATE (lbs/mmBtu)')
            gen_params[gen_info['ID']]['nox_1'] = nox_1
            gen_params[gen_info['ID']]['nox_0'] = nox_0
            gen_params[gen_info['ID']]['nox_r2'] = nox_r2
            gen_params[gen_info['ID']]['nox_sum'] = nox_sum
            if fig_nox is not None:
                fig_nox.savefig(f'figures/nox_rate/nox_rate_{fig_name_sfx}.png')

            # CO2 emission rate plot
            co2_1, co2_0, co2_r2, co2_avg_rate, co2_sum, fig_co2, ax_co2 = \
                gp.calc_emis_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='HEAT_INPUT (mmBtu)',
                               y_name='CO2_MASS (tons)',
                               rate_name='CO2_RATE (tons/mmBtu)')
            gen_params[gen_info['ID']]['co2_1'] = co2_1
            gen_params[gen_info['ID']]['co2_0'] = co2_0
            gen_params[gen_info['ID']]['co2_r2'] = co2_r2
            gen_params[gen_info['ID']]['co2_sum'] = co2_sum
            if fig_co2 is not None:
                fig_co2.savefig(f'figures/co2_rate/co2_rate_{fig_name_sfx}.png')

            # SO2 emission rate plot
            so2_1, so2_0, so2_r2, so2_avg_rate, so2_sum, fig_so2, ax_so2 = \
                gp.calc_emis_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='HEAT_INPUT (mmBtu)',
                               y_name='SO2_MASS (lbs)',
                               rate_name='SO2_RATE (lbs/mmBtu)')
            gen_params[gen_info['ID']]['so2_1'] = so2_1
            gen_params[gen_info['ID']]['so2_0'] = so2_0
            gen_params[gen_info['ID']]['so2_r2'] = so2_r2
            gen_params[gen_info['ID']]['so2_sum'] = so2_sum
            if fig_so2 is not None:
                fig_so2.savefig(f'figures/so2_rate/so2_rate_{fig_name_sfx}.png')

            plt.close('all')

        else:
            print(f'No data for {fig_name_sfx}.')
            gen_params[gen_info['ID']] = None

    else:
        print(f'Data for {gen_info["ID"]} already processed.')

  0%|          | 0/222 [00:00<?, ?it/s]



In [6]:
# Convert the dictionary to a dataframe
gen_params_df = pd.DataFrame(gen_params).T
gen_params_df.index.name = 'ID'

# Ramp rate ratio: replace 0 with 1
gen_params_df['raise_rr_ratio'] = gen_params_df['raise_rr_ratio'].replace({0: 1})
gen_params_df['lower_rr_ratio'] = gen_params_df['lower_rr_ratio'].replace({0: 1})

# Treatment for bad fit in Astoria GT units
for i, row, in gen_params_df[gen_params_df['CAMD_Plant_ID']==55243].iterrows():
    default_heat_1 = gp.HEAT_RATE_DEFAULT[row['Unit_Type']][row['Fuel_Type_Primary']]
    row['heat_1'] = default_heat_1
    row['heat_0'] = 0
    row['heat_r2'] = 0
    gen_params_df.loc[i] = row

# Treatment fo bad fit in Bethpage CC units
for i, row, in gen_params_df[(gen_params_df['CAMD_Plant_ID']==50292) & (gen_params_df['Unit_Type']=='CC')].iterrows():
    default_heat_1 = gp.HEAT_RATE_DEFAULT[row['Unit_Type']][row['Fuel_Type_Primary']]
    row['heat_1'] = default_heat_1
    row['heat_0'] = 0
    row['heat_r2'] = 0
    gen_params_df.loc[i] = row

In [7]:
# Save to excel
gen_params_df.to_excel(os.path.join(thermal_data_dir, 'gen_params_2018.xlsx'))

# 2. 2024 data

## Process additional generator data

In [9]:
cems_data_dir_2 = os.path.join(thermal_data_dir, 'cems_additional_data')
file_list = [
    'Bayonne Energy Center_56964_2023.csv', # Unit 9 and 10 are not in 2018
    'Linden Cogeneration Facility_50006_2023.csv',
    'Valley Energy Center_56940_2023.csv', # Not in 2018
    'Cricket Valley Energy Center_57185_2023.csv' # Not in 2018
]

df_list_2 = list()
for file in file_list:
    df = pd.read_csv(os.path.join(cems_data_dir_2, file),
                     low_memory=False)
    df_list_2.append(df)

# Concatenate all the dataframes
cems_df_2 = pd.concat(df_list_2, axis=0, ignore_index=True)

# Add time columns from OP_DATE and OP_HOUR
cems_df_2['Time'] = pd.to_datetime(
    cems_df_2['Date']) + pd.to_timedelta(cems_df_2['Operating Time'], unit='h')
# cems_df['Month'] = cems_df['Time'].dt.month

# Drop columns that are not needed
cems_df_2 = cems_df_2.drop(columns=['State', 'Date', 'Hour',
                                    'SO2 Mass Measure Indicator', 'SO2 Rate Measure Indicator',
                                    'CO2 Mass Measure Indicator', 'CO2 Rate Measure Indicator',
                                    'NOx Mass Measure Indicator', 'NOx Rate Measure Indicator',
                                    'Heat Input Measure Indicator', 'Steam Load (1000 lb/hr)', 
                                    'Primary Fuel Type', 'Associated Stacks',
                                    'Secondary Fuel Type', 'Unit Type', 'SO2 Controls', 'NOx Controls',
                                    'PM Controls', 'Hg Controls', 'Program Code'])

## Read NYCA CEMS matching table with additional generators

In [23]:
gen_combiner_2 = pd.read_excel(os.path.join(thermal_data_dir, '2024_nyca_thermal.xlsx'),
                             sheet_name='matched_with_id_filtered')
gen_combiner_2 = gen_combiner_2.rename(columns={
    '   Station        Unit': 'NYISO_Name',
})

# Map unit type GT, JE to CT
gen_combiner_2['Unit Type'] = gen_combiner_2['Unit Type'].replace(
    {'GT': 'CT', 'JE': 'CT'})
gen_combiner_2['Fuel Type Secondary'] = gen_combiner_2['Fuel Type Secondary'].astype(
    str).replace({'nan': ''})

# Only get generators of interest
plant_ids = [56964, 50006, 56940, 57185]
gen_combiner_2 = gen_combiner_2[gen_combiner_2['CAMD_Plant_ID'].isin(plant_ids)]
gen_combiner_2

Unnamed: 0,NYISO_Name,Zone,PTID,Name Plate Rating (MW),Dual Fuel,Unit Type,Fuel Type Primary,Fuel Type Secondary,2023 Net Energy (GWh),Notes,...,CAMD_Facility_Name,CAMD_Plant_ID,CAMD_Unit_ID,CAMD_Generator_ID,CAMD_Nameplate_Capacity,CAMD_Fuel_Type,EIA_Latitude,EIA_Longitude,ID,eco_min_ratio_obs
134,Linden Cogen,J,23786,800.0,YES,CC,NG,BUT,4390.7,NJ,...,Linden Cogeneration Facility,50006,4001,GTG6,314.8,Pipeline Natural Gas,40.6322,-74.2156,"(50006,4001)",0.46
135,Linden Cogen,J,23786,800.0,YES,CC,NG,BUT,4390.7,NJ,...,Linden Cogeneration Facility,50006,5001,GTG1,230.8,Pipeline Natural Gas,40.6322,-74.2156,"(50006,5001)",0.46
136,Linden Cogen,J,23786,800.0,YES,CC,NG,BUT,4390.7,NJ,...,Linden Cogeneration Facility,50006,6001,GTG2,230.8,Pipeline Natural Gas,40.6322,-74.2156,"(50006,6001)",0.46
137,Linden Cogen,J,23786,800.0,YES,CC,NG,BUT,4390.7,NJ,...,Linden Cogeneration Facility,50006,7001,GTG3,230.8,Pipeline Natural Gas,40.6322,-74.2156,"(50006,7001)",0.46
138,Linden Cogen,J,23786,800.0,YES,CC,NG,BUT,4390.7,NJ,...,Linden Cogeneration Facility,50006,8001,GTG4,230.8,Pipeline Natural Gas,40.6322,-74.2156,"(50006,8001)",0.46
139,Linden Cogen,J,23786,800.0,YES,CC,NG,BUT,4390.7,NJ,...,Linden Cogeneration Facility,50006,9001,GTG5,230.8,Pipeline Natural Gas,40.6322,-74.2156,"(50006,9001)",0.46
203,CPV Valley CC1,G,323721,385.0,YES,CC,NG,FO2,1322.7,,...,Valley Energy Center,56940,1,CTG1,534.0,Pipeline Natural Gas,41.4122,-74.4378,"(56940,1)",0.5
204,CPV Valley CC2,G,323722,385.0,YES,CC,NG,FO2,1312.2,,...,Valley Energy Center,56940,2,CTG2,534.0,Pipeline Natural Gas,41.4122,-74.4378,"(56940,2)",0.5
205,Bayonne EC CTG1,J,323682,64.0,YES,CT,NG,KER,101.8,NJ,...,Bayonne Energy Center,56964,GT1,GT1,64.0,Pipeline Natural Gas,40.652834,-74.09155,"(56964,GT1)",0.5
206,Bayonne EC CTG10,J,323750,64.0,YES,CT,NG,KER,106.6,NJ,...,Bayonne Energy Center,56964,GT10,GT10,64.0,Pipeline Natural Gas,40.652834,-74.09155,"(56964,GT10)",0.5


## Calculate additional generator parameters

In [24]:
os.makedirs('figures/time_series_2', exist_ok=True)
os.makedirs('figures/heat_rate_2', exist_ok=True)
os.makedirs('figures/nox_rate_2', exist_ok=True)
os.makedirs('figures/co2_rate_2', exist_ok=True)
os.makedirs('figures/so2_rate_2', exist_ok=True)

gen_params_2 = dict()

for ii in tqdm.tqdm(range(gen_combiner_2.shape[0])):
# for ii in tqdm.tqdm(range(1)):
    gen_info = gen_combiner_2.iloc[ii]

    if gen_info['ID'] not in gen_params_2:

        gen_params_2[gen_info['ID']] = dict()

        unit_df = cems_df_2[(cems_df_2['Facility ID'] == gen_info['CAMD_Plant_ID'])
                          & (cems_df_2['Unit ID'] == gen_info['CAMD_Unit_ID'])]
        unit_df = unit_df.set_index('Time').sort_index()
        # print(f'Number of rows: {unit_df.shape[0]}')

        # Calculate hourly ramp
        unit_df['RAMP (MW/hour)'] = unit_df['Gross Load (MW)'].diff()

        # Filter out rows with zero values
        unit_df_nonzero = unit_df[(unit_df['Heat Input (mmBtu)'] > 0) & (
            unit_df['Gross Load (MW)'] > 0)]
        unit_df_nonzero = unit_df_nonzero.drop(columns=['Facility Name',
                                                        'Facility ID', 'Unit ID'])
        # print(f'Number of rows with nonzero values: {unit_df_nonzero.shape[0]}')

        if unit_df_nonzero.shape[0] > 0:

            fig_name_sfx = '_'.join([gen_info["ID"], gen_info["NYISO_Name"],
                                     gen_info["PTID"].astype(str),
                                     gen_info["Unit Type"], gen_info["Fuel Type Primary"],
                                     gen_info["Fuel Type Secondary"]])

            # Time series plot
            df_plot = unit_df[['Operating Time', 'Gross Load (MW)', 'SO2 Mass (lbs)', 'NOx Mass (lbs)',
                               'CO2 Mass (short tons)', 'Heat Input (mmBtu)']]
            fig_ts, ax_ts = plt.subplots(6, 1, figsize=(8, 6), sharex=True,
                                   layout='constrained')
            for i in range(6):
                ax_ts[i].plot(df_plot.iloc[:, i])
                ax_ts[i].set_title(df_plot.columns[i])

            title = (fig_name_sfx + '_' + gen_info["Unit Type"]
                     + '_' + gen_info["Fuel Type Primary"]
                     + '_' + gen_info["Fuel Type Secondary"])
            fig_ts.suptitle(title)
            fig_ts.savefig(f'figures/time_series_2/time_series_{fig_name_sfx}.png')

            # Calculate maximum generation
            max_gen =  unit_df_nonzero['Gross Load (MW)'].max()
            
            # Calculate ramp rate
            raise_rr_ratio = (unit_df_nonzero['RAMP (MW/hour)'].max() / max_gen)
            lower_rr_ratio = np.abs(unit_df_nonzero['RAMP (MW/hour)'].min() / max_gen)
            raise_rr_ratio = raise_rr_ratio if raise_rr_ratio <= 1 else 1
            lower_rr_ratio = lower_rr_ratio if lower_rr_ratio <= 1 else 1
            
            gen_params_2[gen_info['ID']]['NYISO_Name'] = gen_info['NYISO_Name']
            gen_params_2[gen_info['ID']]['PTID'] = gen_info['PTID']
            gen_params_2[gen_info['ID']]['Unit_Type'] = gen_info['Unit Type']
            gen_params_2[gen_info['ID']]['Fuel_Type_Primary'] = gen_info['Fuel Type Primary']
            gen_params_2[gen_info['ID']]['Fuel_Type_Secondary'] = gen_info['Fuel Type Secondary']
            gen_params_2[gen_info['ID']]['CAMD_Facility_Name'] = gen_info['CAMD_Facility_Name']
            gen_params_2[gen_info['ID']]['CAMD_Plant_ID'] = gen_info['CAMD_Plant_ID']
            gen_params_2[gen_info['ID']]['CAMD_Unit_ID'] = gen_info['CAMD_Unit_ID']
            gen_params_2[gen_info['ID']]['CAMD_Nameplate_Capacity'] = gen_info['CAMD_Nameplate_Capacity']
            gen_params_2[gen_info['ID']]['raise_rr_ratio'] = raise_rr_ratio
            gen_params_2[gen_info['ID']]['lower_rr_ratio'] = lower_rr_ratio

            # Heat rate plot
            heat_1, heat_0, heat_r2, eco_min, gen_sum, heat_sum, fig_heat, ax_heat = \
                gp.calc_heat_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='Gross Load (MW)',
                               y_name='Heat Input (mmBtu)',
                               calc_eco_min=False,
                               nonneg_intercept=True,
                               keep_in_range=False)
            gen_params_2[gen_info['ID']]['max_gen'] = max_gen
            gen_params_2[gen_info['ID']]['eco_min'] = eco_min
            gen_params_2[gen_info['ID']]['eco_min_ratio'] = eco_min / max_gen
            gen_params_2[gen_info['ID']]['gen_sum'] = gen_sum
            gen_params_2[gen_info['ID']]['heat_sum'] = heat_sum
            gen_params_2[gen_info['ID']]['heat_1'] = heat_1
            gen_params_2[gen_info['ID']]['heat_0'] = heat_0
            gen_params_2[gen_info['ID']]['heat_r2'] = heat_r2
            if fig_heat is not None:
                fig_heat.savefig(f'figures/heat_rate_2/heat_rate_{fig_name_sfx}.png')

            # NOx emission rate plot
            nox_1, nox_0, nox_r2, nox_avg_rate, nox_sum, fig_nox, ax_nox = \
                gp.calc_emis_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='Heat Input (mmBtu)',
                               y_name='NOx Mass (lbs)',
                               rate_name='NOx Rate (lbs/mmBtu)',
                               gload_name='Gross Load (MW)',
                               heat_input_name='Heat Input (mmBtu)')
            gen_params_2[gen_info['ID']]['nox_1'] = nox_1
            gen_params_2[gen_info['ID']]['nox_0'] = nox_0
            gen_params_2[gen_info['ID']]['nox_r2'] = nox_r2
            gen_params_2[gen_info['ID']]['nox_sum'] = nox_sum
            if fig_nox is not None:
                fig_nox.savefig(f'figures/nox_rate_2/nox_rate_{fig_name_sfx}.png')

            # CO2 emission rate plot
            co2_1, co2_0, co2_r2, co2_avg_rate, co2_sum, fig_co2, ax_co2 = \
                gp.calc_emis_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='Heat Input (mmBtu)',
                               y_name='CO2 Mass (short tons)',
                               rate_name='CO2 Rate (short tons/mmBtu)',
                               gload_name='Gross Load (MW)',
                               heat_input_name='Heat Input (mmBtu)')
            gen_params_2[gen_info['ID']]['co2_1'] = co2_1
            gen_params_2[gen_info['ID']]['co2_0'] = co2_0
            gen_params_2[gen_info['ID']]['co2_r2'] = co2_r2
            gen_params_2[gen_info['ID']]['co2_sum'] = co2_sum
            if fig_co2 is not None:
                fig_co2.savefig(f'figures/co2_rate_2/co2_rate_{fig_name_sfx}.png')

            # SO2 emission rate plot
            so2_1, so2_0, so2_r2, so2_avg_rate, so2_sum, fig_so2, ax_so2 = \
                gp.calc_emis_rate(data=unit_df_nonzero,
                               gen_info=gen_info,
                               x_name='Heat Input (mmBtu)',
                               y_name='SO2 Mass (lbs)',
                               rate_name='SO2 Rate (lbs/mmBtu)',
                               gload_name='Gross Load (MW)',
                               heat_input_name='Heat Input (mmBtu)')
            gen_params_2[gen_info['ID']]['so2_1'] = so2_1
            gen_params_2[gen_info['ID']]['so2_0'] = so2_0
            gen_params_2[gen_info['ID']]['so2_r2'] = so2_r2
            gen_params_2[gen_info['ID']]['so2_sum'] = so2_sum
            if fig_so2 is not None:
                fig_so2.savefig(f'figures/so2_rate_2/so2_rate_{fig_name_sfx}.png')

            plt.close('all')

        else:
            print(f'No data for {fig_name_sfx}.')
            gen_params_2[gen_info['ID']] = None

    else:
        print(f'Data for {gen_info["ID"]} already processed.')

  0%|          | 0/21 [00:00<?, ?it/s]

In [25]:
# Convert the dictionary to a dataframe
gen_params_df_2 = pd.DataFrame(gen_params_2).T
gen_params_df_2.index.name = 'ID'

# Ramp rate ratio: replace 0 with 1
gen_params_df_2['raise_rr_ratio'] = gen_params_df_2['raise_rr_ratio'].replace({0: 1})
gen_params_df_2['lower_rr_ratio'] = gen_params_df_2['lower_rr_ratio'].replace({0: 1})

# Treatment for bad fit in Astoria GT units
for i, row, in gen_params_df_2[gen_params_df_2['CAMD_Plant_ID']==55243].iterrows():
    default_heat_1 = gp.HEAT_RATE_DEFAULT[row['Unit_Type']][row['Fuel_Type_Primary']]
    row['heat_1'] = default_heat_1
    row['heat_0'] = 0
    row['heat_r2'] = 0
    gen_params_df_2.loc[i] = row

# Treatment fo bad fit in Bethpage CC units
for i, row, in gen_params_df_2[(gen_params_df_2['CAMD_Plant_ID']==50292) & (gen_params_df_2['Unit_Type']=='CC')].iterrows():
    default_heat_1 = gp.HEAT_RATE_DEFAULT[row['Unit_Type']][row['Fuel_Type_Primary']]
    row['heat_1'] = default_heat_1
    row['heat_0'] = 0
    row['heat_r2'] = 0
    gen_params_df_2.loc[i] = row

In [27]:
# Save to excel
gen_params_df_2.to_excel(os.path.join(thermal_data_dir, 'gen_params_2024.xlsx'))