# EIA-923 Cleaning/Export Tool

This notebook can be used to export cleaned EIA-923 generation and emissions data.

The emissions data includes adjusted/unadjusted:
* CO2
* CO2-eq
* NOx
* SO2

In [19]:
import sys
sys.path.append('../../hourly-egrid/')

%reload_ext autoreload
%autoreload 2

import os
import pandas as pd
import plotly.express as px
import numpy as np

import src.data_cleaning as dc
import src.load_data as ld

In [20]:
def path_to_output(rel=''):
    return os.path.join('../output', rel)

In [21]:
df_923_emissions, primary_fuel_table = dc.clean_eia923(
    2001, include_nox=True, include_so2=True, include_co2e=True)

Removing 16 plants that are not grid-connected
Removing 0 plants located in the following states: ['PR']


In [22]:
df_923_emissions

Unnamed: 0,report_date,plant_id_eia,generator_id,net_generation_mwh,fuel_consumed_mmbtu,fuel_consumed_for_electricity_mmbtu,co2_mass_tons,co2_mass_tons_adjusted,nox_mass_lbs,nox_mass_lbs_adjusted,so2_mass_lbs,so2_mass_lbs_adjusted,co2e_mass_tons,co2e_mass_tons_adjusted
0,2001-01-01,2,1,18918.0,195479.69,195479.69,0.0,0.0,,,,,0.0,0.0
1,2001-01-01,3,1,,,,,,,,,,,
2,2001-01-01,3,2,,,,,,,,,,,
3,2001-01-01,3,3,,,,,,,,,,,
4,2001-01-01,3,4,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
192399,2001-12-01,55984,TG-2,,,,,,,,,,,
192400,2001-12-01,55988,1,,,,,,,,,,,
192401,2001-12-01,55988,2,,,,,,,,,,,
192402,2001-12-01,55988,3,,,,,,,,,,,


In [27]:
def export_cleaned_eia_923(year):
    """Helper function for exporting a year of cleaned EIA-923 data."""
    print(f'[INFO] Cleaning EIA-923 data for {year}')
    df_923_generator_emissions, primary_fuel_table = dc.clean_eia923(
        year, include_nox=True, include_so2=True, include_co2e=True)
    print('[INFO] Done cleaning.')
    # Group by EIA plant ID and sum to the plant level.
    # df_923_plant_emissions = df_923_emissions.groupby(['plant_id_eia', 'report_date']).sum()
    
    # Export the data to output.
    output_path = path_to_output(f'923/eia_923_generation_and_emissions_{year}.csv')
    df_923_generator_emissions.to_csv(output_path)
    print('[INFO] Wrote to:', output_path)
    
    output_path = path_to_output(f'923/eia_923_primary_fuel_table_{year}.csv')
    primary_fuel_table.to_csv(output_path)
    print('[INFO] Wrote to:', output_path)
    print('Done')

In [28]:
# Export across all available years.
# NOTE: This is slow! Each year takes a few minutes to run.
os.makedirs(path_to_output('923'), exist_ok=True)

years_to_export = range(2001, 2021)
for year in years_to_export:
    export_cleaned_eia_923(year)

[INFO] Cleaning EIA-923 data for 2001
Removing 16 plants that are not grid-connected
Removing 0 plants located in the following states: ['PR']
[INFO] Done cleaning.
[INFO] Wrote to: ../output/923/eia_923_generation_and_emissions_2001.csv
[INFO] Wrote to: ../output/923/eia_923_primary_fuel_table_2001.csv
Done
[INFO] Cleaning EIA-923 data for 2002
Removing 17 plants that are not grid-connected
Removing 0 plants located in the following states: ['PR']
[INFO] Done cleaning.
[INFO] Wrote to: ../output/923/eia_923_generation_and_emissions_2002.csv
[INFO] Wrote to: ../output/923/eia_923_primary_fuel_table_2002.csv
Done
[INFO] Cleaning EIA-923 data for 2003
Removing 18 plants that are not grid-connected
Removing 0 plants located in the following states: ['PR']
[INFO] Done cleaning.
[INFO] Wrote to: ../output/923/eia_923_generation_and_emissions_2003.csv
[INFO] Wrote to: ../output/923/eia_923_primary_fuel_table_2003.csv
Done
[INFO] Cleaning EIA-923 data for 2004
Removing 17 plants that are not 