In order to match files from our modeling work to those in the AMPD database we are working with several different names and IDs. This notebook goes through my consistency checks for these files.

In [2]:
import pandas as pd
import numpy as np

In [286]:
# Read in the lookup table where the RGGI to NYISO IDs are kept
lu_df = pd.read_csv('../ny_emis/ed_output/RGGI_to_NYISO.csv', header=1)
lu_df = lu_df.drop(columns=['Notes', 'Unnamed: 6'])
lu_df.head()

Unnamed: 0,RGGI Facility Name,ORISPL,Unit ID,NYISO Name,PTID
0,23rd & 3rd,7910,2301,Gowanus 5,24156
1,23rd & 3rd,7910,2302,Gowanus 6,24157
2,AG-Energy,10803,1,,999999999
3,AG-Energy,10803,2,,999999999
4,Allegany Station No. 133,10619,1,Allegany,23514


In [4]:
# Read in the generator info
ed_gen_info = pd.read_csv('../ny_emis/ed_output/genInfo.csv')
ed_gen_info.head()

Unnamed: 0,NYISOName,PTID,Zone,UnitType,FuelType,Latitude,Longitude,maxPower,minPower,maxRampAgc,maxRamp10,maxRamp30,maxRamp60,HeatRateLM_R2,HeatRateQM_R2,BusName,NCOST,cost_1,cost_0
0,Allegany,23514,B,Combined Cycle,Natural Gas,42.5083,-78.0661,66.0,11.0,0.733333,7.333333,22.0,44.0,0.862443,0.891407,62,1,22.682017,-40.234372
1,Arthur Kill ST 2,23512,J,Steam Turbine,Natural Gas,40.5915,-74.2027,349.0,96.0,2.85,28.5,85.5,171.0,0.995837,0.99586,82,1,25.306225,736.980064
2,Arthur Kill ST 3,23513,J,Steam Turbine,Natural Gas,40.5915,-74.2027,539.0,1.0,3.108333,31.083333,93.25,186.5,0.986235,0.99146,82,1,25.957282,993.894899
3,Astoria 2,24149,J,Steam Turbine,Natural Gas,40.7869,-73.9122,153.0,1.0,0.808333,8.083333,24.25,48.5,0.987563,0.98891,82,1,31.302863,430.832898
4,Astoria 3,23516,J,Steam Turbine,Fuel Oil 2,40.7869,-73.9122,756.0,40.0,6.033333,60.333333,181.0,362.0,0.996075,0.996269,82,1,66.084194,3680.943877


In [5]:
# Check to see if all the generators in the NYSN are in the lookup DataFrame
for name in ed_gen_info.NYISOName:
    exists = sum(lu_df['NYISO Name'] == name)
    if exists == 1:
        print(f'{name}:\t\t\tfound')
    elif exists == 0:
        print(f'{name}:\t\t\tmissing')
    elif exists > 1:
        print(f'{name}:\t\t\thas multiple entries')
    else:
        print(f'{name}:\t\t\t!problems!')

Allegany:			found
Arthur Kill ST 2:			found
Arthur Kill ST 3:			found
Astoria 2:			found
Astoria 3:			has multiple entries
Astoria 5:			has multiple entries
Astoria CC 1:			found
Astoria CC 2:			found
Astoria East Energy - CC1:			found
Astoria East Energy - CC2:			found
Astoria Energy 2 - CC3:			found
Astoria Energy 2 - CC4:			found
Astoria GT 2-1:			has multiple entries
Astoria GT 2-2:			has multiple entries
Astoria GT 2-3:			has multiple entries
Astoria GT 2-4:			has multiple entries
Astoria GT 3-1:			has multiple entries
Astoria GT 3-2:			has multiple entries
Astoria GT 3-3:			has multiple entries
Astoria GT 3-4:			has multiple entries
Astoria GT 4-1:			has multiple entries
Astoria GT 4-2:			has multiple entries
Astoria GT 4-3:			has multiple entries
Astoria GT 4-4:			has multiple entries
Athens 1:			found
Athens 2:			found
Athens 3:			found
Barrett 03:			found
Barrett 04:			found
Barrett 05:			found
Barrett 06:			found
Barrett 08:			found
Barrett GT 01:			found
Barrett GT 02:			fou

In [287]:
# Read in data from the NY Simple Net
ed_gen = pd.read_csv('../ny_emis/ed_output/thermal_with_renewable_20160805_20160815.csv', parse_dates=['TimeStamp'], infer_datetime_format=True)
ed_gen = ed_gen.set_index('TimeStamp')

In [288]:
# Drop unnecessary units from the lookup df
gen_df = lu_df[lu_df['NYISO Name'].isin(ed_gen.columns)].reset_index(drop=True)
# Format the final dataset
gen_df = pd.concat([gen_df, pd.DataFrame(index=gen_df.index, columns=ed_gen.index.values)], axis=1)

In [289]:
# Create a generation dataframe for each ORISPL, UNIT ID combination
for name in gen_df['NYISO Name']:
    # Get simple net generation 
    unit_gen = ed_gen[name]
    # Determine how many units are associated with this name
    unit_bool = gen_df['NYISO Name'] == name
    n_units = sum(unit_bool)
    # Edit the simple net generation based on this number of units
    unit_gen = unit_gen / n_units
    # Fill this unit gen profile into all matching units
    for idx in unit_bool[unit_bool].index.values:
        gen_df.loc[idx, unit_gen.index] = unit_gen

In [290]:
gen_df

Unnamed: 0,RGGI Facility Name,ORISPL,Unit ID,NYISO Name,PTID,2016-08-05 00:00:00,2016-08-05 01:00:00,2016-08-05 02:00:00,2016-08-05 03:00:00,2016-08-05 04:00:00,...,2016-08-15 14:00:00,2016-08-15 15:00:00,2016-08-15 16:00:00,2016-08-15 17:00:00,2016-08-15 18:00:00,2016-08-15 19:00:00,2016-08-15 20:00:00,2016-08-15 21:00:00,2016-08-15 22:00:00,2016-08-15 23:00:00
0,23rd & 3rd,7910,2301,Gowanus 5,24156,19.0,19.0,19.0,19.0,19.0,...,44.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0,19.0
1,23rd & 3rd,7910,2302,Gowanus 6,24157,6.0,6.0,6.0,6.0,6.0,...,43.0,43.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0
2,Allegany Station No. 133,10619,1,Allegany,23514,11.0,11.0,11.0,11.0,11.0,...,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0,66.0
3,Arthur Kill,2490,20,Arthur Kill ST 2,23512,96.0,96.0,96.0,96.0,96.0,...,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0,349.0
4,Arthur Kill,2490,30,Arthur Kill ST 3,23513,1.0,1.0,1.0,1.0,1.0,...,539.0,539.0,539.0,539.0,539.0,539.0,539.0,1.0,539.0,539.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
216,Vernon Boulevard,7909,VB02,Vernon Blvd 3,24163,15.0,15.0,15.0,15.0,15.0,...,42.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0,15.0
217,Wading River Facility,7146,UGT007,Wading River 1,23522,9.0,9.0,9.0,9.0,9.0,...,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0
218,Wading River Facility,7146,UGT008,Wading River 2,23547,7.8,7.8,7.8,7.8,7.8,...,7.8,7.8,7.8,7.8,7.8,7.8,7.8,7.8,7.8,7.8
219,Wading River Facility,7146,UGT009,Wading River 3,23601,4.0,4.0,4.0,4.0,4.0,...,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0,4.0


In [159]:
for ii, name in enumerate(ed_gen.index):
    # Get the ORISPL from the Lookup df
    match = (lu_df['NYISO Name'] == name)
    orispl = lu_df['ORISPL'][match]
    if len(orispl.unique()) > 1:
        print(f'Warning: found {len(orispl.unique())} ORISPLs for {name}')
  
    # Get the Unit ID from the Lookup df & add additional rows if necessary
    unitid = lu_df['Unit ID'][match]
    if len(unitid.unique()) > 1:
        print(f'Warning: found {len(unitid.unique())} UNIT IDs for {name}')




In [6]:
# Read in the generator data previously preprocessed by ERTAC EGU tool
base_df = pd.read_csv('../CONUS2016/inputs/camd_hourly_base.csv', low_memory=False)

In [7]:
base_df.head()

Unnamed: 0,STATE,FACILITY_NAME,ORISPL_CODE,UNITID,OP_DATE,OP_HOUR,OP_TIME,GLOAD,SLOAD,SO2_MASS,...,NOX_RATE_MEASURE_FLG,NOX_MASS,NOX_MASS_MEASURE_FLG,CO2_MASS,CO2_MASS_MEASURE_FLG,CO2_RATE,CO2_RATE_MEASURE_FLG,HEAT_INPUT,FAC_ID,UNIT_ID
0,AL,Barry,3,1,01-01-2016,14.0,0.0,,,,...,,,,,,,,,1.0,1.0
1,AL,Barry,3,1,01-01-2016,15.0,0.0,,,,...,,,,,,,,,1.0,1.0
2,AL,Barry,3,1,01-01-2016,16.0,0.0,,,,...,,,,,,,,,1.0,1.0
3,AL,Barry,3,1,01-01-2016,17.0,0.0,,,,...,,,,,,,,,1.0,1.0
4,AL,Barry,3,1,01-01-2016,18.0,0.0,,,,...,,,,,,,,,1.0,1.0


In [8]:
base_df = base_df[base_df['STATE'] == 'NY']

In [9]:
base_df['FACILITY_NAME'].unique()

array(['Danskammer Generating Station', 'Arthur Kill', 'East River',
       'Gowanus Generating Station', 'Hudson Avenue',
       'Narrows Generating Station', 'Ravenswood Generating Station',
       'Ravenswood Steam Plant', '59th Street', '74th Street',
       'E F Barrett', 'East Hampton Facility', 'Glenwood', 'Northport',
       'Port Jefferson Energy Center', 'West Babylon Facility',
       'Greenidge Generation LLC', 'Cayuga Operating Company, LLC',
       'Bethlehem Energy Center (Albany)', 'Huntley Power',
       'NRG Dunkirk Power', 'Oswego Harbor Power',
       'Bowline Generating Station', 'Hillburn', 'Shoemaker',
       'S A Carlson', 'Somerset Operating Company  (Kintigh)',
       'Wading River Facility', 'Richard M Flynn (Holtsville)',
       'Glenwood Landing Energy Center', 'Roseton Generating LLC',
       'Holtsville Facility', 'Astoria Generating Station',
       'Astoria Gas Turbine Power',
       'RED-Rochester, LLC-Eastman Business Park', 'Castleton Power, LLC',
  

In [11]:
base_df[base_df['FACILITY_NAME'] == 'Ravenswood Steam Plant'][base_df['OP_DATE'] == '04-01-2016']['UNITID'].unique()

  base_df[base_df['FACILITY_NAME'] == 'Ravenswood Steam Plant'][base_df['OP_DATE'] == '04-01-2016']['UNITID'].unique()


array(['BLR001', 'BLR002', 'BLR003', 'BLR004'], dtype=object)