In [2]:
%load_ext autoreload
%autoreload 3

In [3]:
import pandas as pd
import pudl
import sqlalchemy as sa
from pathlib import Path
import zipfile

In [4]:
pudl_engine = sa.create_engine(pudl.workspace.setup.get_defaults()['pudl_db'])
pudl_out = pudl.output.pudltabl.PudlTabl(pudl_engine)

### Generate Plant Parts List

In [53]:
# if you have it, read in pickled dataframe
plant_parts_eia = pd.read_pickle("plant_parts_eia_distinct.pkl.gz")

In [21]:
# make ppl distinct for Panda
# this was adapted from the RMI repo
# takes as input a non-distinct (includes non true grans) ppl
def get_plant_parts_distinct(plant_parts_eia):
    """Get the EIA plant-parts with only the unique granularities."""
    # We want only the records of the EIA plant-parts that are "true
    # granularies" and those which are not duplicates based on their
    # ownership  so the model doesn't get confused as to which option to
    # pick if there are many records with duplicate data
    plant_parts_eia = plant_parts_eia.assign(
        plant_id_report_year_util_id=lambda x: x.plant_id_report_year
        + "_"
        + x.utility_id_pudl.map(str)
    ).astype({"installation_year": "float"})
    plant_parts_distinct = plant_parts_eia[
        (plant_parts_eia["true_gran"]) & (~plant_parts_eia["ownership_dupe"])
    ]
    return plant_parts_distinct

In [None]:
plant_parts_eia = pudl_out.plant_parts_eia()

In [None]:
# a little patch because there was one duplicate record due to a bug in ppl generation
plant_parts_eia = plant_parts_eia[ ~plant_parts_eia.index.duplicated(keep="first")]
plant_parts_eia_distinct = get_plant_parts_distinct(plant_parts_eia)

In [54]:
# it's not necessary to remove columns any more to save on memory
# but these columns seemed non essential for Panda matching
ppl_cols_to_remove = {
    'appro_part_label',
    'appro_record_id_eia',
    'operational_status',
    'operational_status_pudl',
    'ownership_dupe',
    'retirement_date',
    'planned_retirement_date',
    'true_gran',
    'ownership',
    'fraction_owned',
    'record_count'
}
ppl_cols_to_keep = set(plant_parts_eia.columns) - ppl_cols_to_remove
plant_parts_eia = plant_parts_eia[list(ppl_cols_to_keep)]

### Generate FERC side
- Currently this function is taken from the RMI repo: `connect_ferc1_to_eia.InputManager.get_all_ferc1`
- Could add `pudl_rmi` to environment and import this

In [55]:
def get_ferc_plants(pudl_out):
    fbp_cols_to_use = [
        "report_year",
        "utility_id_ferc1",
        "plant_name_ferc1",
        "utility_id_pudl",
        "fuel_cost",
        "fuel_mmbtu",
        "primary_fuel_by_mmbtu",
    ]
    plants_ferc1_df = (
        pudl_out.plants_all_ferc1()
        .merge(
            pudl_out.fbp_ferc1()[fbp_cols_to_use],
            on=[
                "report_year",
                "utility_id_ferc1",
                "utility_id_pudl",
                "plant_name_ferc1",
            ],
            how="left",
        )
        .pipe(pudl.helpers.convert_cols_dtypes, "ferc1")
        .assign(
            installation_year=lambda x: (
                x.installation_year.astype("float")
            ),  # need for comparison vectors
            plant_id_report_year=lambda x: (
                x.plant_id_pudl.map(str) + "_" + x.report_year.map(str)
            ),
            plant_id_report_year_util_id=lambda x: (
                x.plant_id_report_year + "_" + x.utility_id_pudl.map(str)
            ),
            fuel_cost_per_mmbtu=lambda x: (x.fuel_cost / x.fuel_mmbtu),
            heat_rate_mmbtu_mwh=lambda x: (x.fuel_mmbtu / x.net_generation_mwh),
        )
        .rename(
            columns={
                "record_id": "record_id_ferc1",
                "opex_plants": "opex_plant",
                "fuel_cost": "total_fuel_cost",
                "fuel_mmbtu": "total_mmbtu",
                "opex_fuel_per_mwh": "fuel_cost_per_mwh",
                "primary_fuel_by_mmbtu": "fuel_type_code_pudl",
            }
        )
        .set_index("record_id_ferc1")
    )
    return plants_ferc1_df

In [56]:
ferc_df = get_ferc_plants(pudl_out)

In [57]:
# these columns are useful for matching with EIA
ferc_df = ferc_df[[
   'report_year', 'utility_id_pudl',
   'utility_name_ferc1', 'plant_id_pudl', 'plant_name_ferc1',
   'capacity_factor', 'capacity_mw', 'construction_type',
   'construction_year', 'installation_year', 'net_generation_mwh',
   'fuel_cost_per_mwh', 'plant_capability_mw', 'plant_type',
   'fuel_cost_per_mmbtu', 'fuel_type', 'plant_name_clean',
   'total_cost_of_plant', 'total_fuel_cost', 'total_mmbtu',
   'fuel_type_code_pudl', 'plant_id_report_year',
   'plant_id_report_year_util_id', 'heat_rate_mmbtu_mwh'
]]

### Add on utility name to EIA side

If the latest version of the plant parts list is being used then construction year and installation year should already be included.

In [58]:
# currently df is intended to be the distinct plant parts list
def add_utility_name(df, pudl_engine):
    # join on utility_name_eia
    eia_util = pd.read_sql("utilities_eia", pudl_engine)
    eia_util = eia_util.set_index('utility_id_eia')['utility_name_eia']
    non_null_df = df[~(df.utility_id_eia.isnull())]
    non_null_df = non_null_df.merge(eia_util, how="left", left_on='utility_id_eia', right_index=True, validate="m:1")
    df_util = pd.concat([non_null_df, df[df.utility_id_eia.isnull()]])
    df = df_util.reindex(df.index)
    
    return df_util

In [59]:
plant_parts_eia = add_utility_name(plant_parts_eia, pudl_engine)

### Separate the plant parts list by year

Currently Panda has a memory issue so inputs are broken out by year

In [51]:
def separate_ppl_by_year(ppl_distinct, output_dir):
    dir_path = Path(output_dir)
    for year in ppl_distinct.report_year.unique():
        (dir_path / f"ferc_eia_{year}").mkdir(parents=True, exist_ok=True)
        df = ppl_distinct[ppl_distinct.report_year == year]
        df.to_csv(f"{output_dir}/ferc_eia_{year}/right.csv")

### Prep inputs for just one plant part

In [60]:
part = "plant"

In [61]:
plant_part_df = plant_parts_eia[plant_parts_eia.plant_part == part]

In [62]:
# when breaking up the ppl into individual parts there are columns that are almost fully null
# depending on the part, let's drop these columns from the FERC and EIA side
def drop_null_cols(eia_df, ferc_df, threshold=.9):
    percent_null = eia_df.isnull().sum() / len(eia_df)
    print(percent_null)
    cols_to_drop = set(percent_null[percent_null >= threshold].index)
    eia_cols_to_keep = list(set(eia_df.columns) - cols_to_drop)
    ferc_cols_to_keep = list(set(ferc_df.columns) - cols_to_drop)
    return eia_df[eia_cols_to_keep], ferc_df[ferc_cols_to_keep]

In [None]:
small_part_df, small_ferc_df = drop_null_cols(plant_part_df, ferc_df, threshold=.8)

In [65]:
# drop the plant part column as it's filtered by plant part anyways
small_part_df = small_part_df.drop(columns=["plant_part"], axis=1)
# little patch: not sure why installation_year is a float right now
small_part_df = small_part_df.astype({"installation_year": "Int64"})

### Zip up FERC and EIA to be ready for Panda import

In [66]:
def zip_dfs_for_panda(ferc_df, eia_df, zip_name):
    with zipfile.ZipFile(f"panda_inputs/{zip_name}.zip", "w") as csv_zip:
        csv_zip.writestr("left.csv", pd.DataFrame(ferc_df).to_csv())
        csv_zip.writestr("right.csv", pd.DataFrame(eia_df).to_csv())

In [67]:
# temp: put this here, need to take out a bunch of cols to get under memory limit
eia_drop_cols = [
    "capacity_eoy_mw",
    "energy_source_code_1",
    "ferc_acct_name", 
    "generator_id",
    "operating_year",
    "plant_id_eia",
    "plant_name_new",
    "plant_part_id_eia",
    "report_date",
    "utility_id_eia"
]
ferc_drop_cols = [
    "construction_type",
    "plant_capability_mw",
    "total_cost_of_plant"
]
small_part_df = small_part_df.drop(eia_drop_cols, axis=1)
small_ferc_df = small_ferc_df.drop(ferc_drop_cols, axis=1)

In [68]:
smaller_part_df = small_part_df[small_part_df.report_year == 2020]
smaller_ferc_df = small_ferc_df[small_ferc_df.report_year == 2020]

In [71]:
zip_dfs_for_panda(smaller_ferc_df, smaller_part_df, f"2020_{part}")

### Look at full records for training data matches

In [13]:
training_labels = pd.read_csv("train_ferc1_eia.csv")

In [14]:
full_records = training_labels[["record_id_eia", "record_id_ferc1", "notes"]].join(
    plant_parts_eia, on="record_id_eia").join(ferc_df, on="record_id_ferc1", rsuffix="_ferc")

In [15]:
full_records = full_records.reindex(sorted(full_records.columns), axis=1)

In [16]:
full_records

Unnamed: 0,capacity_eoy_mw,capacity_factor,capacity_factor_ferc,capacity_mw,capacity_mw_ferc,construction_type,construction_year,construction_year_ferc,energy_source_code_1,ferc_acct_name,fuel_cost_per_mmbtu,fuel_cost_per_mmbtu_ferc,fuel_cost_per_mwh,fuel_cost_per_mwh_ferc,fuel_type,fuel_type_code_pudl,fuel_type_code_pudl_ferc,generator_id,heat_rate_mmbtu_mwh,heat_rate_mmbtu_mwh_ferc,installation_year,installation_year_ferc,net_generation_mwh,net_generation_mwh_ferc,notes,operating_year,plant_capability_mw,plant_id_eia,plant_id_pudl,plant_id_pudl_ferc,plant_id_report_year,plant_id_report_year_ferc,plant_id_report_year_util_id,plant_id_report_year_util_id_ferc,plant_name_clean,plant_name_eia,plant_name_ferc1,plant_name_new,plant_part,plant_part_id_eia,plant_type,prime_mover_code,record_id_eia,record_id_ferc1,report_date,report_year,report_year_ferc,technology_description,total_cost_of_plant,total_fuel_cost,total_fuel_cost_ferc,total_mmbtu,total_mmbtu_ferc,unit_id_pudl,utility_id_eia,utility_id_pudl,utility_id_pudl_ferc,utility_name_eia,utility_name_ferc1
0,24.60000,,0.410063,24.60000,24.60,conventional,1912,1912,WAT,Hydraulic,,,,,,hydro,,,,,1912.0,1912.0,,88367.0,,1912,,2707,60,60,60_2018,60_2018,60_2018_97,60_2018_97,,Blewett,blewett hydro,Blewett HY,plant_prime_mover,2707_HY_plant_prime_mover_total_3046,Storage,HY,2707_hy_2018_plant_prime_mover_total_3046,f1_hydro_2018_12_17_0_1,2018-01-01,2018.0,2018,Conventional Hydroelectric,,,,,,,3046,97,97,Duke Energy Progress - (NC),"Duke Energy Progress, Inc."
1,24.60000,,-0.001954,24.60000,24.60,conventional,1912,1912,WAT,Hydraulic,,,,,,hydro,,,,,1912.0,1912.0,,-421.0,,1912,,2707,60,60,60_2019,60_2019,60_2019_97,60_2019_97,,Blewett,blewett hydro,Blewett HY,plant_prime_mover,2707_HY_plant_prime_mover_total_3046,Storage,HY,2707_hy_2019_plant_prime_mover_total_3046,f1_hydro_2019_12_17_0_1,2019-01-01,2019.0,2019,Conventional Hydroelectric,,,,,,,3046,97,97,Duke Energy Progress - (NC),"Duke Energy Progress, Inc."
2,0.00000,,,28.00000,28.00,conventional,1909,1909,WAT,Hydraulic,,,,,,hydro,,,,,1909.0,1909.0,,,,1909,,3266,886,886,886_2019,886_2019,886_2019_90,886_2019_90,,Rocky Creek,rocky creek,Rocky Creek,plant,3266_plant_total_5416_retired,Run-of-River,HY,3266_2019_plant_total_5416_retired,f1_hydro_2019_12_45_2_1,2019-01-01,2019.0,2019,Conventional Hydroelectric,,,,,,,5416,90,90,"Duke Energy Carolinas, LLC","Duke Energy Carolinas, LLC"
3,3005.88395,0.604867,0.605239,3005.88395,3006.00,conventional,1975,1975,BIT,Steam,1.992445,2.040426,21.460065,21.451557,,coal,coal,,10.770721,10.513235,1982.0,1982.0,1.592707e+07,15937485.0,,,,6113,222,222,222_2018,222_2018,222_2018_92,222_2018_92,,Gibson,gibson,Gibson,plant,6113_plant_owned_15470,steam,ST,6113_2018_plant_owned_15470,f1_steam_2018_12_144_0_3,2018-01-01,2018.0,2018,Conventional Steam Coal,,3.416224e+08,3.418826e+08,1.714589e+08,1.675545e+08,,15470,92,92,"Duke Energy Indiana, LLC","Duke Energy Indiana, Inc."
4,112.50000,,-0.000159,112.50000,112.50,conventional,1993,1993,NG,Other,,10.233000,,-190.076433,,gas,gas,4,,-18.573248,1993.0,1993.0,,-157.0,,1993,,1001,97,97,97_2018,97_2018,97_2018_92,97_2018_92,,Cayuga,cayuga ct,Cayuga GT,plant_prime_mover,1001_GT_plant_prime_mover_total_15470,combustion_turbine,GT,1001_gt_2018_plant_prime_mover_total_15470,f1_steam_2018_12_144_0_4,2018-01-01,2018.0,2018,Natural Gas Fired Combustion Turbine,,,2.983943e+04,,2.916000e+03,,15470,92,92,"Duke Energy Indiana, LLC","Duke Energy Indiana, Inc."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4299,18.00000,,0.716839,18.00000,34.50,outdoor,1937,1937,WAT,Hydraulic,,,,,,hydro,,,,,1937.0,1947.0,,216643.0,upper salmon in FERC = upper salmon A + upper ...,1937,,822,1576,923,1576_2019,923_2019,1576_2019_140,923_2019_140,,Upper Salmon A,upper salmon,Upper Salmon A,plant,822_plant_total_9191,Run-of-River,HY,822_2019_plant_total_9191,f1_hydro_2019_12_70_2_1,2019-01-01,2019.0,2019,Conventional Hydroelectric,,,,,,,9191,140,140,Idaho Power Co,Idaho Power Company
4300,52.70000,,0.114766,52.70000,52.90,conventional,1935,1935,WAT,Hydraulic,,,,,,hydro,,,,,1995.0,1995.0,,53183.0,,,,821,924,924,924_2020,924_2020,924_2020_140,924_2020_140,,Twin Falls (ID),twin falls,Twin Falls (ID),plant,821_plant_total_9191,Run-of-River,HY,821_2020_plant_total_9191,f1_hydro_2020_12_70_1_5,2020-01-01,2020.0,2020,Conventional Hydroelectric,,,,,,,9191,140,140,Idaho Power Co,Idaho Power Company
4301,14.70000,,0.407092,14.70000,14.73,conventional,1921,1907,WAT,Hydraulic,,,,,,hydro,,,,,2020.0,1921.0,,52529.0,,,,818,920,920,920_2020,920_2020,920_2020_140,920_2020_140,,Shoshone Falls,shoshone falls,Shoshone Falls,plant,818_plant_total_9191,Run-of-River,HY,818_2020_plant_total_9191,f1_hydro_2020_12_70_2_2,2020-01-01,2020.0,2020,Conventional Hydroelectric,,,,,,,9191,140,140,Idaho Power Co,Idaho Power Company
4302,60.00000,,0.445449,60.00000,60.00,outdoor,1949,1949,WAT,Hydraulic,,,,,,hydro,,,,,1949.0,1949.0,,234128.0,,1949,,816,926,926,926_2020,926_2020,926_2020_140,926_2020_140,,Lower Salmon,lower salmon,Lower Salmon,plant,816_plant_total_9191,Run-of-River,HY,816_2020_plant_total_9191,f1_hydro_2020_12_70_2_4,2020-01-01,2020.0,2020,Conventional Hydroelectric,,,,,,,9191,140,140,Idaho Power Co,Idaho Power Company


In [19]:
full_records[full_records.plant_part == "plant_gen"][["capacity_mw", "capacity_mw_ferc"]]

Unnamed: 0,capacity_mw,capacity_mw_ferc
21,185.3,697.85
25,103.5,748.0
39,66.8,845.4
45,185.3,697.85
48,103.5,748.0
98,54.0,108.0
158,1.4,5.3
447,408.3312,410.82
448,408.3312,410.82
634,19.6,19.64


In [33]:
full_records[~(full_records.generator_id.isnull())]

Unnamed: 0,capacity_eoy_mw,capacity_factor,capacity_factor_ferc,capacity_mw,capacity_mw_ferc,construction_type,construction_year,construction_year_ferc,energy_source_code_1,ferc_acct_name,fuel_cost_per_mmbtu,fuel_cost_per_mmbtu_ferc,fuel_cost_per_mwh,fuel_cost_per_mwh_ferc,fuel_type,fuel_type_code_pudl,fuel_type_code_pudl_ferc,generator_id,heat_rate_mmbtu_mwh,heat_rate_mmbtu_mwh_ferc,installation_year,installation_year_ferc,net_generation_mwh,net_generation_mwh_ferc,notes,operating_year,plant_capability_mw,plant_id_eia,plant_id_pudl,plant_id_pudl_ferc,plant_id_report_year,plant_id_report_year_ferc,plant_id_report_year_util_id,plant_id_report_year_util_id_ferc,plant_name_clean,plant_name_eia,plant_name_ferc1,plant_name_new,plant_part,plant_part_id_eia,plant_type,prime_mover_code,record_id_eia,record_id_ferc1,report_date,report_year,report_year_ferc,technology_description,total_cost_of_plant,total_fuel_cost,total_fuel_cost_ferc,total_mmbtu,total_mmbtu_ferc,unit_id_pudl,utility_id_eia,utility_id_pudl,utility_id_pudl_ferc,utility_name_eia,utility_name_ferc1
4,112.5,,-0.000159,112.50,112.50,conventional,1993,1993,NG,Other,,10.233000,,-190.076433,,gas,gas,4,,-18.573248,1993.0,1993.0,,-157.0,,1993,,1001,97,97,97_2018,97_2018,97_2018_92,97_2018_92,,Cayuga,cayuga ct,Cayuga GT,plant_prime_mover,1001_GT_plant_prime_mover_total_15470,combustion_turbine,GT,1001_gt_2018_plant_prime_mover_total_15470,f1_steam_2018_12_144_0_4,2018-01-01,2018.0,2018,Natural Gas Fired Combustion Turbine,,,2.983943e+04,,2.916000e+03,,15470,92,92,"Duke Energy Indiana, LLC","Duke Energy Indiana, Inc."
6,150.0,0.130575,0.108505,150.00,300.00,conventional,1958,1958,BIT,Steam,16.898469,3.344287,220.190047,44.574729,,coal,coal,2,13.030177,13.328623,1958.0,1961.0,171576.0,285152.0,FERC capacities and generation are from R Gall...,1958,,1008,208,208,208_2018,208_2018,208_2018_92,208_2018_92,,R Gallagher,gallagher,R Gallagher 2,plant_unit,1008_2_plant_unit_total_15470,steam,ST,1008_2_2018_plant_unit_total_15470,f1_steam_2018_12_144_1_3,2018-01-01,2018.0,2018,Conventional Steam Coal,,3.777933e+07,1.271058e+07,2.235666e+06,3.800684e+06,2,15470,92,92,"Duke Energy Indiana, LLC","Duke Energy Indiana, Inc."
10,772.0,0.412962,0.415113,772.00,768.00,conventional,1981,1981,BIT,Steam,1.924199,1.873845,20.959276,20.728736,,coal,coal,2,10.892466,11.062194,1981.0,1981.0,2792745.0,2792745.0,,1981,,6018,168,168,168_2018,168_2018,168_2018_93,168_2018_93,,East Bend,east bend,East Bend,plant,6018_plant_total_55729,steam,ST,6018_2018_plant_total_55729,f1_steam_2018_12_178_0_1,2018-01-01,2018.0,2018,Conventional Steam Coal,,5.853391e+07,5.789035e+07,3.041988e+07,3.089389e+07,1,55729,93,93,Duke Energy Kentucky Inc,"Duke Energy Kentucky, Inc."
20,327.3,0.780440,0.846216,327.30,697.85,conventional,2011,2011,NG,Other,4.031229,4.066990,28.683242,28.955118,,gas,gas,ST10,7.115260,7.115477,2011.0,2011.0,2237637.0,5173061.0,,2011,,2720,78,78,78_2018,78_2018,78_2018_90,78_2018_90,,Buck,buck,Buck CA,plant_prime_mover,2720_CA_plant_prime_mover_total_5416,combined_cycle,CA,2720_ca_2018_plant_prime_mover_total_5416,f1_steam_2018_12_45_3_5,2018-01-01,2018.0,2018,Natural Gas Fired Combined Cycle,,6.418268e+07,1.497010e+08,1.592137e+07,3.680879e+07,4,5416,90,90,"Duke Energy Carolinas, LLC","Duke Energy Carolinas, LLC"
21,185.3,0.883379,0.811733,185.30,697.85,conventional,2012,2012,NG,Other,4.106095,4.139512,29.427964,29.713511,,gas,gas,CT8,7.166899,7.174784,2012.0,2012.0,1433925.0,4962258.0,,2012,,2723,144,144,144_2018,144_2018,144_2018_90,144_2018_90,,Dan River,dan river,Dan River CT8,plant_gen,2723_CT8_plant_gen_total_5416,combined_cycle,CT,2723_ct8_2018_plant_gen_total_5416,f1_steam_2018_12_45_4_1,2018-01-01,2018.0,2018,Natural Gas Fired Combined Cycle,,4.219749e+07,1.473796e+08,1.027679e+07,3.560313e+07,4,5416,90,90,"Duke Energy Carolinas, LLC","Duke Energy Carolinas, LLC"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4283,172.8,,0.209997,172.80,172.80,conventional,2005,2005,NG,Other,,2.176241,,23.207916,,gas,gas,1,,10.665324,2005.0,2005.0,,317878.0,,2005,164.0,55733,48,48,48_2019,48_2019,48_2019_140,48_2019_140,,Mountain Home Energy Project,bennett mountain,Mountain Home Energy Project,plant,55733_plant_total_9191,combustion_turbine,GT,55733_2019_plant_total_9191,f1_steam_2019_12_70_0_5,2019-01-01,2019.0,2019,Natural Gas Fired Combustion Turbine,,,7.378050e+06,,3.390272e+06,,9191,140,140,Idaho Power Co,Idaho Power Company
4284,6.8,,,6.80,6.80,,1920,1912,WAT,Hydraulic,,,,,,hydro,,3,,,1920.0,,,56953.0,,1920,,820,7,7,7_2019,7_2019,7_2019_140,7_2019_140,thousand springs,Thousand Springs,thousand springs,Thousand Springs,plant,820_plant_total_9191,,HY,820_2019_plant_total_9191,f1_gnrt_plant_2019_12_70_0_3,2019-01-01,2019.0,2019,Conventional Hydroelectric,11663284.0,,,,,,9191,140,140,Idaho Power Co,Idaho Power Company
4286,2.5,,,2.50,2.50,,1937,1937,WAT,Hydraulic,,,,,,hydro,,1,,,1937.0,,,17272.0,,1937,,814,1574,8560,1574_2019,8560_2019,1574_2019_140,8560_2019_140,clear lakes,Clear Lake,clear lakes,Clear Lake,plant,814_plant_total_9191,,HY,814_2019_plant_total_9191,f1_gnrt_plant_2019_12_70_0_2,2019-01-01,2019.0,2019,Conventional Hydroelectric,3565864.0,,,,,,9191,140,140,Idaho Power Co,Idaho Power Company
4287,0.0,0.289362,0.246454,64.22,64.20,conventional,1980,1980,SUB,Steam,2.181651,2.253966,22.719599,26.449251,,coal,coal,1,10.413946,11.689697,1980.0,1980.0,163231.8,138604.0,,1980,,6106,66,66,66_2020,66_2020,66_2020_140,66_2020_140,,Boardman,boardman,Boardman,plant,6106_plant_owned_9191,steam,ST,6106_2020_plant_owned_9191,f1_steam_2020_12_70_0_2,2020-01-01,2020.0,2020,Conventional Steam Coal,,3.708561e+06,3.651963e+06,1.699887e+06,1.620239e+06,1,9191,140,140,Idaho Power Co,Idaho Power Company


In [111]:
cap_diff = abs(full_records["capacity_mw"] - full_records["capacity_mw_ferc"])

In [119]:
cap_diff.describe()

count    3490.000000
mean       20.332479
std       111.245822
min         0.000000
25%         0.000000
50%         0.040000
75%         0.600000
max      3148.500000
dtype: float64

In [128]:
cap_diff[cap_diff < 1000].describe()

count    3474.000000
mean       14.285823
std        57.818731
min         0.000000
25%         0.000000
50%         0.040000
75%         0.600000
max       799.410000
dtype: float64

In [161]:
full_records[["plant_type", "technology_description"]].value_counts()

plant_type           technology_description                     
steam                Conventional Steam Coal                        700
Run-of-River         Conventional Hydroelectric                     498
Storage              Conventional Hydroelectric                     412
combustion_turbine   Natural Gas Fired Combustion Turbine           354
combined_cycle       Natural Gas Fired Combined Cycle               229
hydro                Conventional Hydroelectric                     225
wind                 Onshore Wind Turbine                           196
steam                Natural Gas Steam Turbine                      100
combustion_turbine   Petroleum Liquids                               78
nuclear              Nuclear                                         71
internal_combustion  Petroleum Liquids                               68
combustion_turbine   Natural Gas Fired Combined Cycle                50
Storage (Re-Reg)     Conventional Hydroelectric                      32

In [163]:
full_records[["plant_type", "prime_mover_code"]].value_counts()

plant_type           prime_mover_code
steam                ST                  840
Run-of-River         HY                  498
combustion_turbine   GT                  436
Storage              HY                  412
hydro                HY                  225
wind                 WT                  196
nuclear              ST                   71
internal_combustion  IC                   51
Storage (Re-Reg)     HY                   32
photovoltaic         PV                   30
combined_cycle       CT                   24
Run-of River         HY                   21
geothermal           ST                   15
steam                GT                   14
internal_combustion  GT                    8
combustion_turbine   CA                    7
                     CT                    7
waste_heat           ST                    7
solar_pv             PV                    7
combined_cycle       CA                    5
                     GT                    2
steam            

In [172]:
full_records[["plant_type", "fuel_type_code_pudl"]].value_counts()

plant_type           fuel_type_code_pudl
steam                coal                   702
Run-of-River         hydro                  498
Storage              hydro                  412
combustion_turbine   gas                    408
combined_cycle       gas                    235
hydro                hydro                  225
wind                 wind                   196
steam                gas                    117
combustion_turbine   oil                     78
nuclear              nuclear                 71
internal_combustion  oil                     68
Storage (Re-Reg)     hydro                   32
photovoltaic         solar                   30
Run-of River         hydro                   21
geothermal           other                   15
steam                oil                     12
solar_pv             solar                    7
waste_heat           gas                      5
steam                other                    4
internal_combustion  gas                      4