In [1]:
import sqlalchemy as sa
import pandas as pd

from dbcp.helpers import get_sql_engine

engine = get_sql_engine()



In [2]:
with engine.connect() as con:
    co2_dashboard = pd.read_sql_table("co2_dashboard", con, schema="data_mart")
    counties_long_format = pd.read_sql_table("counties_long_format", con, schema="data_mart")
    counties_wide_format = pd.read_sql_table("counties_wide_format", con, schema="data_mart")
    existing_plants = pd.read_sql_table("existing_plants", con, schema="data_mart")
    fossil_infrastructure_projects = pd.read_sql_table("fossil_infrastructure_projects", con, schema="data_mart")
    iso_projects_long_format = pd.read_sql_table("iso_projects_long_format", con, schema="data_mart")
    iso_projects_wide_format = pd.read_sql_table("iso_projects_wide_format", con, schema="data_mart")
    proposed_power_dash_local_opp = pd.read_sql_table("proposed_power_dash_local_opp", con, schema="data_mart")
    proposed_power_dash_existing_plants = pd.read_sql_table("proposed_power_dash_existing_plants", con, schema="data_mart")
    proposed_power_dash_proposed_plants = pd.read_sql_table("proposed_power_dash_proposed_plants", con, schema="data_mart")

In [3]:
counties_wide_format.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3236 entries, 0 to 3235
Data columns (total 79 columns):
 #   Column                                                           Non-Null Count  Dtype  
---  ------                                                           --------------  -----  
 0   state_id_fips                                                    3236 non-null   object 
 1   county_id_fips                                                   3236 non-null   object 
 2   state                                                            3235 non-null   object 
 3   county                                                           3236 non-null   object 
 4   has_ordinance                                                    3236 non-null   bool   
 5   state_permitting_type                                            3079 non-null   object 
 6   county_total_co2e_tonnes_per_year                                2241 non-null   float64
 7   fossil_existing_capacity_mw               

In [4]:
def dtypes_and_nulls(df):
    return pd.concat([df.dtypes, df.isna().any()], axis=1)

def convert_to_schema(df):
    type_mapping = {"int64": "Integer", "float64": "Float", "object": "String", "datetime64[ns]": "DateTime", "bool": "Boolean"}
    
    dtypes = dtypes_and_nulls(df)
    
    final_str = ""
    
    for _, row in dtypes.iterrows():
        if row[1]:
            final_str += f"Column(\"{row.name}\", {type_mapping[str(row[0])]}, nullable=True),\n"
        else:
            final_str += f"Column(\"{row.name}\", {type_mapping[str(row[0])]}),\n"
    print(final_str)

In [5]:
convert_to_schema(counties_long_format)

Column("state_id_fips", String),
Column("county_id_fips", String),
Column("state", String),
Column("county", String),
Column("facility_type", String),
Column("resource_or_sector", String),
Column("status", String),
Column("facility_count", Integer),
Column("capacity_mw", Float, nullable=True),
Column("co2e_tonnes_per_year", Float, nullable=True),
Column("pm2_5_tonnes_per_year", Float, nullable=True),
Column("nox_tonnes_per_year", Float, nullable=True),
Column("has_ordinance", Boolean),
Column("ordinance_jurisdiction_name", String, nullable=True),
Column("ordinance_jurisdiction_type", String, nullable=True),
Column("ordinance", String, nullable=True),
Column("ordinance_earliest_year_mentioned", Float, nullable=True),
Column("state_permitting_type", String, nullable=True),
Column("state_permitting_text", String),



## `proposed_power_dash_existing_plants` issues

In [6]:
proposed_power_dash_existing_plants.county_id_fips.isna().value_counts()

False    4041
True        9
Name: county_id_fips, dtype: int64

In [7]:
proposed_power_dash_existing_plants[proposed_power_dash_existing_plants.county_id_fips.isna()]

Unnamed: 0,state,county,state_id_fips,county_id_fips,resource,capacity_mw,permitting_type,has_ordinance
4041,,,,,Battery Storage,128.8,,False
4042,,,,,Coal,9224.9,,False
4043,,,,,Natural Gas,22893.6,,False
4044,,,,,Hydro,3115.4,,False
4045,,,,,Nuclear,9487.3,,False
4046,,,,,Oil,1340.7,,False
4047,,,,,Other,1197.5,,False
4048,,,,,Solar,1274.0,,False
4049,,,,,Onshore Wind,5361.1,,False


In [8]:
proposed_power_dash_existing_plants.resource.isna().value_counts()

False    4050
Name: resource, dtype: int64

## `proposed_power_dash_proposed_plants` issues

In [9]:
proposed_power_dash_proposed_plants.county_id_fips.isna().value_counts()

False    3098
True        9
Name: county_id_fips, dtype: int64

In [10]:
proposed_power_dash_proposed_plants.resource.isna().value_counts()

False    3098
True        9
Name: resource, dtype: int64

In [11]:
proposed_power_dash_proposed_plants[proposed_power_dash_proposed_plants.resource.isna()]

Unnamed: 0,state,county,state_id_fips,county_id_fips,resource,capacity_mw,project_count,permitting_type,has_ordinance
556,Illinois,Livingston County,17.0,17105.0,,,1,Local,False
1920,Ohio,Madison County,39.0,39097.0,,,1,Hybrid,False
2155,Pennsylvania,Clearfield County,42.0,42033.0,,,1,Local,False
2239,Pennsylvania,Tioga County,42.0,42117.0,,,1,Local,False
2254,Pennsylvania,York County,42.0,42133.0,,,1,Local,False
2779,Virginia,Campbell County,51.0,51031.0,,,2,Hybrid,False
2996,West Virginia,Jefferson County,54.0,54037.0,,,1,State,True
3013,West Virginia,Preston County,54.0,54077.0,,,1,State,False
3106,,,,,,,3,,False


In [12]:
proposed_power_dash_proposed_plants[proposed_power_dash_proposed_plants.county_id_fips.isna()]

Unnamed: 0,state,county,state_id_fips,county_id_fips,resource,capacity_mw,project_count,permitting_type,has_ordinance
3098,,,,,Battery Storage,6629.5,38,,False
3099,,,,,Hydro,53.0,2,,False
3100,,,,,Natural Gas,5529.0,9,,False
3101,,,,,Offshore Wind,16043.0,25,,False
3102,,,,,Onshore Wind,3859.0,11,,False
3103,,,,,Other,86.0,6,,False
3104,,,,,Solar,13565.13,133,,False
3105,,,,,Unknown,34.2,4,,False
3106,,,,,,,3,,False


## `co2_dashboard` Issues

In [13]:
co2_dashboard[["id", "county_id_fips"]].duplicated().value_counts()

False    3888
dtype: int64

In [14]:
co2_dashboard[["id"]].duplicated().value_counts()

False    3784
True      104
dtype: int64

In [15]:
co2_dashboard[co2_dashboard.id.duplicated(keep=False)].sort_values(by="id")

Unnamed: 0,state,county,state_id_fips,id,county_id_fips,co2e_tonnes_per_year,facility_type
0,California,Santa Clara County,06,7.0,06085,6.610619e+04,proposed_power
566,Alabama,Etowah County,01,7.0,01055,3.325334e+04,existing_power
1,California,Fresno County,06,130.0,06019,1.972065e+05,proposed_power
601,South Carolina,Berkeley County,45,130.0,45015,6.158802e+06,existing_power
4,Connecticut,Hartford County,09,624.0,09003,6.260524e+04,proposed_power
...,...,...,...,...,...,...,...
1819,Minnesota,Martin County,27,7925.0,27091,2.812435e+04,existing_power
282,,,,8013.0,,1.404757e+05,proposed_power
1865,Wisconsin,Sawyer County,55,8013.0,55113,6.656400e+00,existing_power
275,Tennessee,Shelby County,47,8056.0,47157,7.199602e+04,proposed_power


In [16]:
co2_dashboard[["id", "facility_type"]].duplicated().value_counts()

False    3888
dtype: int64

In [17]:
co2_dashboard[["id", "facility_type"]].isna().value_counts()

id     facility_type
False  False            3887
True   False               1
dtype: int64

In [18]:
co2_dashboard[co2_dashboard.id.isna()]

Unnamed: 0,state,county,state_id_fips,id,county_id_fips,co2e_tonnes_per_year,facility_type
564,,,,,,14190.686722,proposed_infrastructure


In [19]:
co2_dashboard.facility_type.value_counts()

existing_power             3323
proposed_power              284
proposed_infrastructure     281
Name: facility_type, dtype: int64

co2_dashboard merges proposed and existing fossil plants and facilities. They all have ids that come from different datasets which is why there are duplicates. co2_dashboard is missing one id :(

## `counties_long_format` issues

In [20]:
counties_long_format[["county_id_fips", "facility_type", "resource_or_sector", "status"]].duplicated().value_counts()

False    7087
dtype: int64

In [21]:
counties_long_format[["county_id_fips", "facility_type", "resource_or_sector", "status"]].isna().value_counts()

county_id_fips  facility_type  resource_or_sector  status
False           False          False               False     7087
dtype: int64

In [22]:
counties_long_format

Unnamed: 0,state_id_fips,county_id_fips,state,county,facility_type,resource_or_sector,status,facility_count,capacity_mw,co2e_tonnes_per_year,pm2_5_tonnes_per_year,nox_tonnes_per_year,has_ordinance,ordinance_jurisdiction_name,ordinance_jurisdiction_type,ordinance,ordinance_earliest_year_mentioned,state_permitting_type,state_permitting_text
0,01,01001,Alabama,Autauga County,power plant,Battery Storage,proposed,1,80.0,,,,False,,,,,Local,According to the Wind Energy Technology Office...
1,01,01001,Alabama,Autauga County,power plant,Solar,proposed,1,80.0,,,,False,,,,,Local,According to the Wind Energy Technology Office...
2,01,01003,Alabama,Baldwin County,power plant,Battery Storage,proposed,3,160.0,,,,True,Baldwin County,county,"Large wind energy conversion systems (WECS), U...",,Local,According to the Wind Energy Technology Office...
3,01,01003,Alabama,Baldwin County,power plant,Solar,proposed,3,240.0,,,,True,Baldwin County,county,"Large wind energy conversion systems (WECS), U...",,Local,According to the Wind Energy Technology Office...
4,01,01011,Alabama,Bullock County,power plant,Solar,proposed,1,79.0,,,,False,,,,,Local,According to the Wind Energy Technology Office...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7082,54,54051,West Virginia,Marshall County,fossil infrastructure,Petrochemicals and Plastics,proposed,2,,1.364022e+06,70.702824,162.032766,False,,,,,State,The West Virginia Public Service Commission ha...
7083,54,54073,West Virginia,Pleasants County,fossil infrastructure,Petrochemicals and Plastics,proposed,1,,,13.617754,71.697552,False,,,,,State,The West Virginia Public Service Commission ha...
7084,54,54095,West Virginia,Tyler County,fossil infrastructure,Natural Gas,proposed,1,,1.342706e+05,7.919271,124.610932,False,,,,,State,The West Virginia Public Service Commission ha...
7085,54,54103,West Virginia,Wetzel County,fossil infrastructure,Natural Gas,proposed,1,,1.211047e+05,0.000000,41.701480,False,,,,,State,The West Virginia Public Service Commission ha...


## Sorting out dtypes issues

In [23]:
from dbcp.models.data_mart import metadata

with engine.connect() as con:
    counties_long_format = pd.read_sql_query("select * from data_mart.counties_long_format", con)

In [24]:
counties_long_format.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7087 entries, 0 to 7086
Data columns (total 19 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   state_id_fips                      7087 non-null   object 
 1   county_id_fips                     7087 non-null   object 
 2   state                              7087 non-null   object 
 3   county                             7087 non-null   object 
 4   facility_type                      7087 non-null   object 
 5   resource_or_sector                 7087 non-null   object 
 6   status                             7087 non-null   object 
 7   facility_count                     7087 non-null   int64  
 8   capacity_mw                        6909 non-null   float64
 9   co2e_tonnes_per_year               1887 non-null   float64
 10  pm2_5_tonnes_per_year              178 non-null    float64
 11  nox_tonnes_per_year                178 non-null    float

In [25]:
counties_long_format.convert_dtypes().info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7087 entries, 0 to 7086
Data columns (total 19 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   state_id_fips                      7087 non-null   string 
 1   county_id_fips                     7087 non-null   string 
 2   state                              7087 non-null   string 
 3   county                             7087 non-null   string 
 4   facility_type                      7087 non-null   string 
 5   resource_or_sector                 7087 non-null   string 
 6   status                             7087 non-null   string 
 7   facility_count                     7087 non-null   Int64  
 8   capacity_mw                        6909 non-null   Float64
 9   co2e_tonnes_per_year               1887 non-null   Float64
 10  pm2_5_tonnes_per_year              178 non-null    Float64
 11  nox_tonnes_per_year                178 non-null    Float

In [26]:
str(metadata.tables["data_mart.counties_long_format"].columns[0].type)

'VARCHAR'

In [27]:
sa_to_pd_types = {"VARCHAR": "string", "INTEGER": "Int64", "FLOAT": "float", "BOOLEAN": "bool"}

def get_dtypes_from_schema(table_name, schema):
    table_name = f"{schema}.{table_name}"
    return {column.name: sa_to_pd_types[str(column.type)] for column in metadata.tables[table_name].columns}


get_dtypes_from_schema("counties_long_format", "data_mart")

{'state_id_fips': 'string',
 'county_id_fips': 'string',
 'state': 'string',
 'county': 'string',
 'facility_type': 'string',
 'resource_or_sector': 'string',
 'status': 'string',
 'facility_count': 'Int64',
 'capacity_mw': 'float',
 'co2e_tonnes_per_year': 'float',
 'pm2_5_tonnes_per_year': 'float',
 'nox_tonnes_per_year': 'float',
 'has_ordinance': 'bool',
 'ordinance_jurisdiction_name': 'string',
 'ordinance_jurisdiction_type': 'string',
 'ordinance': 'string',
 'ordinance_earliest_year_mentioned': 'Int64',
 'state_permitting_type': 'string',
 'state_permitting_text': 'string'}

In [29]:
# table_name = "data_mart.counties_long_format"
# # table_name = "counties_long_format"

# with engine.connect() as con:
#     counties_long_format = pd.read_sql_query("select * from %s", con, params=[table_name], dtype=get_dtypes_from_schema("counties_long_format", "data_mart"))
# counties_long_format.info()

## SA to BQ schema

In [30]:
metadata.tables["data_mart.counties_long_format"].columns[0].nullable

False

In [34]:
sa_to_bq_types = {"VARCHAR": "STRING", "INTEGER": "INTEGER", "FLOAT": "FLOAT", "BOOLEAN": "BOOL"}
sa_to_bq_modes = {True: "NULLABLE", False: "REQUIRED"}

def get_dtypes_from_schema(table_name, schema):
    table_name = f"{schema}.{table_name}"
    bq_schema = []
    for column in metadata.tables[table_name].columns:
        col_schema = {}
        col_schema["name"] = column.name
        col_schema["type"] = sa_to_bq_types[str(column.type)]
        col_schema["mode"] = sa_to_bq_modes[column.nullable]
        bq_schema.append(col_schema)   
    return bq_schema


get_dtypes_from_schema("counties_long_format", "data_mart")

[{'name': 'state_id_fips', 'type': 'STRING', 'mode': 'REQUIRED'},
 {'name': 'county_id_fips', 'type': 'STRING', 'mode': 'REQUIRED'},
 {'name': 'state', 'type': 'STRING', 'mode': 'REQUIRED'},
 {'name': 'county', 'type': 'STRING', 'mode': 'REQUIRED'},
 {'name': 'facility_type', 'type': 'STRING', 'mode': 'REQUIRED'},
 {'name': 'resource_or_sector', 'type': 'STRING', 'mode': 'REQUIRED'},
 {'name': 'status', 'type': 'STRING', 'mode': 'REQUIRED'},
 {'name': 'facility_count', 'type': 'INTEGER', 'mode': 'REQUIRED'},
 {'name': 'capacity_mw', 'type': 'FLOAT', 'mode': 'NULLABLE'},
 {'name': 'co2e_tonnes_per_year', 'type': 'FLOAT', 'mode': 'NULLABLE'},
 {'name': 'pm2_5_tonnes_per_year', 'type': 'FLOAT', 'mode': 'NULLABLE'},
 {'name': 'nox_tonnes_per_year', 'type': 'FLOAT', 'mode': 'NULLABLE'},
 {'name': 'has_ordinance', 'type': 'BOOL', 'mode': 'REQUIRED'},
 {'name': 'ordinance_jurisdiction_name', 'type': 'STRING', 'mode': 'NULLABLE'},
 {'name': 'ordinance_jurisdiction_type', 'type': 'STRING', 'mod

In [40]:
from sqlalchemy import (
    Boolean,
    Column,
    DateTime,
    Float,
    Integer,
    MetaData,
    String,
    Table,
)

c = Column("state_id_fips", String, nullable=False, doc="Description", info={"source": "NREL"})
c.doc

'Description'

In [42]:
c.info

{'source': 'NREL'}

## Data Type issues after dropping pandera validation calls

In [73]:
import dbcp
pudl_tables = dbcp.etl.etl_pudl_tables()



In [74]:
mcoe = pudl_tables["mcoe"]
# mcoe.query("fuel_cost_from_eiaapi == False")
print((mcoe.fuel_cost_from_eiaapi == False).value_counts())
print((mcoe.fuel_cost_from_eiaapi == 0.0).value_counts())
mcoe[(mcoe.fuel_cost_from_eiaapi == False)]

False    27627
True      2426
Name: fuel_cost_from_eiaapi, dtype: int64
False    27627
True      2426
Name: fuel_cost_from_eiaapi, dtype: int64


Unnamed: 0,plant_id_eia,generator_id,report_date,unit_id_pudl,plant_id_pudl,plant_name_eia,utility_id_eia,utility_id_pudl,utility_name_eia,associated_combined_heat_power,balancing_authority_code_eia,balancing_authority_name_eia,bga_source,bypass_heat_recovery,capacity_factor,capacity_mw,carbon_capture,city,cofire_fuels,county,current_planned_operating_date,data_source,deliver_power_transgrid,distributed_generation,duct_burners,energy_source_1_transport_1,energy_source_1_transport_2,energy_source_1_transport_3,energy_source_2_transport_1,energy_source_2_transport_2,energy_source_2_transport_3,energy_source_code_1,energy_source_code_2,energy_source_code_3,energy_source_code_4,energy_source_code_5,energy_source_code_6,ferc_cogen_status,ferc_exempt_wholesale_generator,ferc_small_power_producer,fluidized_bed_tech,fuel_cost_from_eiaapi,fuel_cost_per_mmbtu,fuel_cost_per_mwh,fuel_type_code_pudl,fuel_type_count,grid_voltage_2_kv,grid_voltage_3_kv,grid_voltage_kv,heat_rate_mmbtu_mwh,iso_rto_code,latitude,longitude,minimum_load_mw,multiple_fuels,nameplate_power_factor,net_generation_mwh,operating_date,operating_switch,operational_status,operational_status_code,original_planned_operating_date,other_combustion_tech,other_modifications_date,other_planned_modifications,owned_by_non_utility,ownership_code,planned_derate_date,planned_energy_source_code_1,planned_modifications,planned_net_summer_capacity_derate_mw,planned_net_summer_capacity_uprate_mw,planned_net_winter_capacity_derate_mw,planned_net_winter_capacity_uprate_mw,planned_new_capacity_mw,planned_new_prime_mover_code,planned_repower_date,planned_retirement_date,planned_uprate_date,previously_canceled,primary_purpose_id_naics,prime_mover_code,pulverized_coal_tech,reactive_power_output_mvar,retirement_date,rto_iso_lmp_node_id,rto_iso_location_wholesale_reporting_id,sector_id_eia,sector_name_eia,solid_fuel_gasification,startup_source_code_1,startup_source_code_2,startup_source_code_3,startup_source_code_4,state,stoker_tech,street_address,subcritical_tech,summer_capacity_estimate,summer_capacity_mw,summer_estimated_capability_mw,supercritical_tech,switch_oil_gas,syncronized_transmission_grid,technology_description,time_cold_shutdown_full_load_code,timezone,topping_bottoming_code,total_fuel_cost,total_mmbtu,turbines_inverters_hydrokinetics,turbines_num,ultrasupercritical_tech,uprate_derate_completed_date,uprate_derate_during_year,winter_capacity_estimate,winter_capacity_mw,winter_estimated_capability_mw,zip_code,state_id_fips,county_id_fips
7,3,1,2020-01-01,1,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.057793,153.1,,Bucks,False,Mobile,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0.0,2.365169,,gas,2,,,230.0,,,31.006900,-88.010300,55.0,False,0.85,7.772224e+04,1954-02-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,True,,80.0,,,,False,Natural Gas Steam Turbine,OVER,America/Chicago,X,,,,,,NaT,False,,80.0,,36512,01,01097
8,3,2,2020-01-01,2,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.057793,153.1,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0.0,2.365169,,gas,2,,,230.0,,,31.006900,-88.010300,55.0,False,0.85,7.772224e+04,1954-07-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,True,,80.0,,,,False,Natural Gas Steam Turbine,OVER,America/Chicago,X,,,,,,NaT,False,,80.0,,36512,01,01097
10,3,4,2020-01-01,4,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.250678,403.7,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,BIT,,,,,,False,False,False,,0.0,3.228131,,coal,2,,,230.0,,,31.006900,-88.010300,200.0,False,0.85,8.889303e+05,1969-12-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,True,,362.0,,,,False,Conventional Steam Coal,OVER,America/Chicago,X,,,,,,NaT,False,,362.0,,36512,01,01097
11,3,5,2020-01-01,5,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.250678,788.8,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,BIT,,,,,,False,False,False,,0.0,3.228131,,coal,2,,,230.0,,,31.006900,-88.010300,465.0,False,0.85,1.736904e+06,1971-10-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,,,756.5,,True,,False,Conventional Steam Coal,OVER,America/Chicago,X,,,,,,NaT,False,,756.5,,36512,01,01097
12,3,A1CT,2020-01-01,6,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",string_assn,False,0.850705,170.1,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0.0,2.365169,,gas,2,,,230.0,,,31.006900,-88.010300,104.0,False,0.85,1.271088e+06,2000-05-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,CT,,,NaT,,,1,Electric Utility,,,,,,AL,,North Highway 43,,,175.6,,,,False,Natural Gas Fired Combined Cycle,12H,America/Chicago,X,,,,,,NaT,False,,184.5,,36512,01,01097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26681,62116,1B,2020-01-01,1,1388,AES Huntington Beach Energy Project,61670,6988,"AES Huntington Beach Energy, LLC",False,CISO,California Independent System Operator,string_assn,False,0.210276,231.0,False,Huntington Beach,False,Orange,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0.0,,,gas,1,,,230.0,,,33.645618,-117.979372,85.0,False,0.85,4.266725e+05,2020-02-01,,existing,OP,2020-03-01,,NaT,,,J,NaT,,,,,,,,,NaT,NaT,NaT,False,22,CT,,,NaT,hunt ctg 1b,hunt ctg 1b,2,IPP Non-CHP,False,,,,,CA,,21730 Newland Street,,,194.0,,,False,False,Natural Gas Fired Combined Cycle,12H,America/Los_Angeles,X,,,,,,NaT,False,,227.0,,92646,06,06059
26682,62116,1S,2020-01-01,1,1388,AES Huntington Beach Energy Project,61670,6988,"AES Huntington Beach Energy, LLC",False,CISO,California Independent System Operator,eia860_org,False,0.207252,237.2,False,Huntington Beach,False,Orange,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0.0,,,gas,1,,,230.0,,,33.645618,-117.979372,60.0,False,0.85,4.318220e+05,2020-02-01,,existing,OP,2020-03-01,,NaT,,,J,NaT,,,,,,,,,NaT,NaT,NaT,False,22,CA,,,NaT,hunt stg 1s,hunt stg 1s,2,IPP Non-CHP,False,,,,,CA,,21730 Newland Street,,,215.0,,,False,False,Natural Gas Fired Combined Cycle,12H,America/Los_Angeles,X,,,,,,NaT,False,,236.0,,92646,06,06059
28999,64020,CTG3,2020-01-01,1,14737,West Riverside Energy Center,20856,364,Wisconsin Power & Light Co,False,MISO,Midcontinent Independent Transmission System O...,string_assn,False,0.306569,232.9,False,Beloit,,Rock,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0.0,3.304203,,gas,2,138.0,,345.0,,,42.582051,-89.041214,115.0,False,0.85,6.271770e+05,2020-05-01,,existing,OP,NaT,,NaT,False,,J,NaT,,,,,,,,,NaT,NaT,NaT,,22,CT,,,NaT,alte.cc.wrivs1,alte.cc.wrivs1,1,Electric Utility,False,,,,,WI,,4201 S. Walters Rd,,,229.5,,,,False,Natural Gas Fired Combined Cycle,12H,America/Chicago,X,,,,,,NaT,False,,238.5,,53511,55,55105
29000,64020,CTG4,2020-01-01,1,14737,West Riverside Energy Center,20856,364,Wisconsin Power & Light Co,False,MISO,Midcontinent Independent Transmission System O...,string_assn,False,0.306569,232.9,False,Beloit,,Rock,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0.0,3.304203,,gas,2,138.0,,345.0,,,42.582051,-89.041214,115.0,False,0.85,6.271770e+05,2020-05-01,,existing,OP,NaT,,NaT,False,,J,NaT,,,,,,,,,NaT,NaT,NaT,,22,CT,,,NaT,alte.cc.wrivs2,alte.cc.wrivs2,1,Electric Utility,False,,,,,WI,,4201 S. Walters Rd,,,229.4,,,,False,Natural Gas Fired Combined Cycle,12H,America/Chicago,X,,,,,,NaT,False,,238.1,,53511,55,55105


In [75]:
dtyped_mcoe = mcoe.convert_dtypes()
dtyped_mcoe[dtyped_mcoe.fuel_cost_from_eiaapi == False]

Unnamed: 0,plant_id_eia,generator_id,report_date,unit_id_pudl,plant_id_pudl,plant_name_eia,utility_id_eia,utility_id_pudl,utility_name_eia,associated_combined_heat_power,balancing_authority_code_eia,balancing_authority_name_eia,bga_source,bypass_heat_recovery,capacity_factor,capacity_mw,carbon_capture,city,cofire_fuels,county,current_planned_operating_date,data_source,deliver_power_transgrid,distributed_generation,duct_burners,energy_source_1_transport_1,energy_source_1_transport_2,energy_source_1_transport_3,energy_source_2_transport_1,energy_source_2_transport_2,energy_source_2_transport_3,energy_source_code_1,energy_source_code_2,energy_source_code_3,energy_source_code_4,energy_source_code_5,energy_source_code_6,ferc_cogen_status,ferc_exempt_wholesale_generator,ferc_small_power_producer,fluidized_bed_tech,fuel_cost_from_eiaapi,fuel_cost_per_mmbtu,fuel_cost_per_mwh,fuel_type_code_pudl,fuel_type_count,grid_voltage_2_kv,grid_voltage_3_kv,grid_voltage_kv,heat_rate_mmbtu_mwh,iso_rto_code,latitude,longitude,minimum_load_mw,multiple_fuels,nameplate_power_factor,net_generation_mwh,operating_date,operating_switch,operational_status,operational_status_code,original_planned_operating_date,other_combustion_tech,other_modifications_date,other_planned_modifications,owned_by_non_utility,ownership_code,planned_derate_date,planned_energy_source_code_1,planned_modifications,planned_net_summer_capacity_derate_mw,planned_net_summer_capacity_uprate_mw,planned_net_winter_capacity_derate_mw,planned_net_winter_capacity_uprate_mw,planned_new_capacity_mw,planned_new_prime_mover_code,planned_repower_date,planned_retirement_date,planned_uprate_date,previously_canceled,primary_purpose_id_naics,prime_mover_code,pulverized_coal_tech,reactive_power_output_mvar,retirement_date,rto_iso_lmp_node_id,rto_iso_location_wholesale_reporting_id,sector_id_eia,sector_name_eia,solid_fuel_gasification,startup_source_code_1,startup_source_code_2,startup_source_code_3,startup_source_code_4,state,stoker_tech,street_address,subcritical_tech,summer_capacity_estimate,summer_capacity_mw,summer_estimated_capability_mw,supercritical_tech,switch_oil_gas,syncronized_transmission_grid,technology_description,time_cold_shutdown_full_load_code,timezone,topping_bottoming_code,total_fuel_cost,total_mmbtu,turbines_inverters_hydrokinetics,turbines_num,ultrasupercritical_tech,uprate_derate_completed_date,uprate_derate_during_year,winter_capacity_estimate,winter_capacity_mw,winter_estimated_capability_mw,zip_code,state_id_fips,county_id_fips
7,3,1,2020-01-01,1,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.057793,153.1,,Bucks,False,Mobile,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0,2.365169,,gas,2,,,230.0,,,31.0069,-88.0103,55.0,False,0.85,77722.235,1954-02-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,True,,80.0,,,,False,Natural Gas Steam Turbine,OVER,America/Chicago,X,,,,,,NaT,False,,80.0,,36512,01,01097
8,3,2,2020-01-01,2,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.057793,153.1,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0,2.365169,,gas,2,,,230.0,,,31.0069,-88.0103,55.0,False,0.85,77722.235,1954-07-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,True,,80.0,,,,False,Natural Gas Steam Turbine,OVER,America/Chicago,X,,,,,,NaT,False,,80.0,,36512,01,01097
10,3,4,2020-01-01,4,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.250678,403.7,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,BIT,,,,,,False,False,False,,0,3.228131,,coal,2,,,230.0,,,31.0069,-88.0103,200.0,False,0.85,888930.317989,1969-12-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,True,,362.0,,,,False,Conventional Steam Coal,OVER,America/Chicago,X,,,,,,NaT,False,,362.0,,36512,01,01097
11,3,5,2020-01-01,5,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",eia860_org,False,0.250678,788.8,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,BIT,,,,,,False,False,False,,0,3.228131,,coal,2,,,230.0,,,31.0069,-88.0103,465.0,False,0.85,1736904.223011,1971-10-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,ST,True,,NaT,,,1,Electric Utility,,NG,,,,AL,,North Highway 43,,,756.5,,True,,False,Conventional Steam Coal,OVER,America/Chicago,X,,,,,,NaT,False,,756.5,,36512,01,01097
12,3,A1CT,2020-01-01,6,32,Barry,195,18,Alabama Power Co,False,SOCO,"Southern Company Services, Inc. - Trans",string_assn,False,0.850705,170.1,,Bucks,,Mobile,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0,2.365169,,gas,2,,,230.0,,,31.0069,-88.0103,104.0,False,0.85,1271087.5,2000-05-01,,existing,OP,NaT,,NaT,,,S,NaT,,,,,,,,,NaT,NaT,NaT,,22,CT,,,NaT,,,1,Electric Utility,,,,,,AL,,North Highway 43,,,175.6,,,,False,Natural Gas Fired Combined Cycle,12H,America/Chicago,X,,,,,,NaT,False,,184.5,,36512,01,01097
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26681,62116,1B,2020-01-01,1,1388,AES Huntington Beach Energy Project,61670,6988,"AES Huntington Beach Energy, LLC",False,CISO,California Independent System Operator,string_assn,False,0.210276,231.0,False,Huntington Beach,False,Orange,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0,,,gas,1,,,230.0,,,33.645618,-117.979372,85.0,False,0.85,426672.5,2020-02-01,,existing,OP,2020-03-01,,NaT,,,J,NaT,,,,,,,,,NaT,NaT,NaT,False,22,CT,,,NaT,hunt ctg 1b,hunt ctg 1b,2,IPP Non-CHP,False,,,,,CA,,21730 Newland Street,,,194.0,,,False,False,Natural Gas Fired Combined Cycle,12H,America/Los_Angeles,X,,,,,,NaT,False,,227.0,,92646,06,06059
26682,62116,1S,2020-01-01,1,1388,AES Huntington Beach Energy Project,61670,6988,"AES Huntington Beach Energy, LLC",False,CISO,California Independent System Operator,eia860_org,False,0.207252,237.2,False,Huntington Beach,False,Orange,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0,,,gas,1,,,230.0,,,33.645618,-117.979372,60.0,False,0.85,431822.0,2020-02-01,,existing,OP,2020-03-01,,NaT,,,J,NaT,,,,,,,,,NaT,NaT,NaT,False,22,CA,,,NaT,hunt stg 1s,hunt stg 1s,2,IPP Non-CHP,False,,,,,CA,,21730 Newland Street,,,215.0,,,False,False,Natural Gas Fired Combined Cycle,12H,America/Los_Angeles,X,,,,,,NaT,False,,236.0,,92646,06,06059
28999,64020,CTG3,2020-01-01,1,14737,West Riverside Energy Center,20856,364,Wisconsin Power & Light Co,False,MISO,Midcontinent Independent Transmission System O...,string_assn,False,0.306569,232.9,False,Beloit,,Rock,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0,3.304203,,gas,2,138.0,,345.0,,,42.582051,-89.041214,115.0,False,0.85,627177.0,2020-05-01,,existing,OP,NaT,,NaT,False,,J,NaT,,,,,,,,,NaT,NaT,NaT,,22,CT,,,NaT,alte.cc.wrivs1,alte.cc.wrivs1,1,Electric Utility,False,,,,,WI,,4201 S. Walters Rd,,,229.5,,,,False,Natural Gas Fired Combined Cycle,12H,America/Chicago,X,,,,,,NaT,False,,238.5,,53511,55,55105
29000,64020,CTG4,2020-01-01,1,14737,West Riverside Energy Center,20856,364,Wisconsin Power & Light Co,False,MISO,Midcontinent Independent Transmission System O...,string_assn,False,0.306569,232.9,False,Beloit,,Rock,NaT,eia860,,,False,,,,,,,NG,,,,,,False,False,False,,0,3.304203,,gas,2,138.0,,345.0,,,42.582051,-89.041214,115.0,False,0.85,627177.0,2020-05-01,,existing,OP,NaT,,NaT,False,,J,NaT,,,,,,,,,NaT,NaT,NaT,,22,CT,,,NaT,alte.cc.wrivs2,alte.cc.wrivs2,1,Electric Utility,False,,,,,WI,,4201 S. Walters Rd,,,229.4,,,,False,Natural Gas Fired Combined Cycle,12H,America/Chicago,X,,,,,,NaT,False,,238.1,,53511,55,55105


In [76]:
dtyped_mcoe.fluidized_bed_tech.value_counts()

True     134
False     96
Name: fluidized_bed_tech, dtype: Int64

In [77]:
pd.set_option('display.max_rows', 200)
dtyped_mcoe.dtypes

plant_id_eia                                        Int64
generator_id                                       string
report_date                                datetime64[ns]
unit_id_pudl                                        Int64
plant_id_pudl                                       Int64
plant_name_eia                                     string
utility_id_eia                                      Int64
utility_id_pudl                                     Int64
utility_name_eia                                   string
associated_combined_heat_power                    boolean
balancing_authority_code_eia                       string
balancing_authority_name_eia                       string
bga_source                                         string
bypass_heat_recovery                              boolean
capacity_factor                                   Float64
capacity_mw                                       Float64
carbon_capture                                    boolean
city          

In [78]:
boolean_dtyped_mcoe = dtyped_mcoe.select_dtypes("boolean")
display(boolean_dtyped_mcoe)
display(mcoe[boolean_dtyped_mcoe.columns])

Unnamed: 0,associated_combined_heat_power,bypass_heat_recovery,carbon_capture,cofire_fuels,deliver_power_transgrid,distributed_generation,duct_burners,ferc_cogen_status,ferc_exempt_wholesale_generator,ferc_small_power_producer,fluidized_bed_tech,multiple_fuels,other_combustion_tech,other_planned_modifications,owned_by_non_utility,planned_modifications,previously_canceled,pulverized_coal_tech,solid_fuel_gasification,stoker_tech,subcritical_tech,summer_capacity_estimate,supercritical_tech,switch_oil_gas,syncronized_transmission_grid,ultrasupercritical_tech,uprate_derate_during_year,winter_capacity_estimate
0,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
1,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
2,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
3,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
4,False,False,,,,,False,False,False,False,,,,,,,,,,,,,,,False,,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30048,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,
30049,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,
30050,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,
30051,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,


Unnamed: 0,associated_combined_heat_power,bypass_heat_recovery,carbon_capture,cofire_fuels,deliver_power_transgrid,distributed_generation,duct_burners,ferc_cogen_status,ferc_exempt_wholesale_generator,ferc_small_power_producer,fluidized_bed_tech,multiple_fuels,other_combustion_tech,other_planned_modifications,owned_by_non_utility,planned_modifications,previously_canceled,pulverized_coal_tech,solid_fuel_gasification,stoker_tech,subcritical_tech,summer_capacity_estimate,supercritical_tech,switch_oil_gas,syncronized_transmission_grid,ultrasupercritical_tech,uprate_derate_during_year,winter_capacity_estimate
0,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
1,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
2,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
3,False,False,,,,,False,False,False,False,,False,,,,,,,,,,,,,False,,False,
4,False,False,,,,,False,False,False,False,,,,,,,,,,,,,,,False,,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
30048,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,
30049,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,
30050,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,
30051,False,False,,,,,False,False,False,False,,,,,,,False,,,,,,,,False,,,
