# Project 2: Visualizing Power Plant Emissions vs. Power Plant Generation

In this project, I explored the relationship between power plant electricity 
generation and greenhouse gas emissions using publicly available datasets 
from the U.S. Environmental Protection Agency (EPA) and the U.S. Energy 
Information Administration (EIA). I created a clean dataset linking each power 
plantâ€™s emissions with its net electricity generation, and then to visualize 
how emissions scale with electricity production across different fuel types.




In [None]:
import pandas as pd 
import plotly.express as px


In [None]:

ghg = pd.read_csv("/Users/ariakovalovich/Documents/SIPA MIA Coursework/aak2257" 
".github.io/Project 2 Datasets/ghgp_data_2023.csv", skiprows=3)

print("GHGRP columns:", ghg.columns.tolist()[:12])

In [None]:
ghg_small = ghg[
    [
        "Facility Id",
        "Facility Name",
        "State",
        "Total reported direct emissions",
    ]
]
ghg_small = ghg_small.rename(columns={"Facility Id": "Facility_Id"})


In [None]:
cross = pd.read_csv(
    "/Users/ariakovalovich/Documents/SIPA MIA Coursework/aak2257.github.io"
    "/Project 2 Datasets/ghgrp_oris_power_plant_crosswalk_12_13_21.csv"
)

print("Crosswalk columns:", cross.columns.tolist())


In [None]:
cross_small = cross[["GHGRP Facility ID", "ORIS CODE"]]

cross_small = cross_small.rename(columns={"GHGRP Facility ID": "Facility_Id"})


In [None]:
cross_small.columns

In [None]:
ghg_small.Facility_Id.dtype
cross_small.Facility_Id.dtype

In [None]:
ghg_with_oris = pd.merge(
    ghg_small, 
    cross_small, 
    on="Facility_Id", 
    how="inner"
)

print("Merged GHGRP:", ghg_with_oris.shape)


In [None]:
ghg_with_oris.head()

In [None]:
eia = pd.read_csv(
    "/Users/ariakovalovich/Documents/SIPA MIA Coursework/aak2257.github.io/"
    "Project 2 Datasets/EIA923_Schedules_2_3_4_5_M_12_2023_Final_Revision.csv",
    skiprows=5)
eia.columns


In [None]:
eia_small = eia[
    [
        "Plant Id",
        "Plant Name",
        "Plant State",
        "Net Generation\n(Megawatthours)",
        "Reported\nFuel Type Code",
    ]
]
eia_small = eia_small.rename(columns={"Plant Id": "ORIS CODE"})
eia_small.columns
eia_small.head()    

eia_agg = eia_small.groupby("ORIS CODE", as_index=False).agg(
    {
        "Net Generation\n(Megawatthours)": "sum",
        "Plant Name": "first",
        "Plant State": "first",
        "Reported\nFuel Type Code": "first",
    }
)

eia_agg.head()


In [None]:
ghg_with_oris.columns



In [None]:
ghg_with_oris["ORIS CODE"].dtype
ghg_with_oris["ORIS CODE"] = pd.to_numeric(ghg_with_oris["ORIS CODE"], 
                                           errors="coerce")
eia_agg["ORIS CODE"] = pd.to_numeric(eia_agg["ORIS CODE"], errors="coerce")


In [None]:
ghg_with_oris.head()
eia_agg.head()

In [None]:
# Before merging, check types again
print("GHG ORIS dtype:", ghg_with_oris["ORIS CODE"].dtype)
print("EIA ORIS dtype:", eia_agg["ORIS CODE"].dtype)


In [None]:
eia_agg = eia_agg.dropna(subset=["ORIS CODE"])
eia_agg["ORIS CODE"] = eia_agg["ORIS CODE"].astype(int)


In [None]:
final_merge = pd.merge(ghg_with_oris, eia_agg, on="ORIS CODE", how="inner")

print("Final merged shape:", final_merge.shape)
final_merge.head()


In [None]:
final_merge

In [None]:
fuel_map = {
    # Coal
    "BIT": "Coal",
    "LIG": "Coal",
    "SUB": "Coal",
    "WC": "Coal",
    "RC": "Coal",
    # Oil
    "DFO": "Oil",
    "RFO": "Oil",
    "KER": "Oil",
    "JF": "Oil",
    "PC": "Oil",
    "WO": "Oil",
    # Gas
    "NG": "Gas",
    "BFG": "Gas",
    "COG": "Gas",
    "LFG": "Gas",
    "OG": "Gas",
    "PRG": "Gas",
    "SGC": "Gas",
    # Biomass
    "AB": "Biomass",
    "BLQ": "Biomass",
    "LFG": "Biomass",
    "MSW": "Biomass",
    "WDL": "Biomass",
    "WDS": "Biomass",
    # Renewables
    "GEO": "Renewables",
    "H2": "Renewables",
    "NUC": "Renewables",
    "SUN": "Renewables",
    "WND": "Renewables",
    # Other
    "OTH": "Other",
}
final_merge["Fuel_Category"] = final_merge["Reported\nFuel Type Code"].map(fuel_map)


In [None]:
import plotly.express as px

fig = px.scatter(
    final_merge,
    x="Net Generation\n(Megawatthours)",


In [None]:
    y="Total reported direct emissions",
    color="Fuel_Category", 
    hover_data=[
        "Plant Name",
        "Facility Name",
        "Plant State",
        "State",
        "Reported\nFuel Type Code",
    ],
    labels={
        "Net Generation\n(Megawatthours)": "Net Generation (MWh)",
        "Total reported direct emissions": "Total Direct Emissions (Metric Tons)",
        "Fuel_Category": "Fuel Type Category",
    },
    title="Power Plant Emissions vs. Power Generation by Fuel Type Category",
)

fig.update_layout(xaxis_type="log", yaxis_type="log")
fig.show()
