In [None]:
%load_ext autoreload
%autoreload 3

In [None]:
import logging
import sys

import pandas as pd
import plotly.express as px
import sqlalchemy as sa

import energy_comms
import pudl

In [None]:
logger=logging.getLogger()
logger.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
formatter = logging.Formatter('%(message)s')
handler.setFormatter(formatter)
logger.handlers = [handler]

# Summary Stats
Note: This isn't a dynamically updating table

|    | Mines | Gens | Combined |
|----| ----- | ---- | -------- |
num qualifying records | 5143 | 4470 | 9600 |
num lat, lon pairs| 4737  | 319 | 5056 |
num Census tracts with closures| 966 | 309 | 1236 |
num Census tracts total (with adjacent tracts) | 2999 | 2486 | 4992 |
num counties with closures | 322 | 275 | 533 | 
num counties total (with adjacent counties) | 840 | 1297 | 1631 |

In [None]:
# get basic stats about mine or generators dataframe
def get_df_stats(df, census_geom):
    n_lat_lon = len(df[["latitude", "longitude"]].drop_duplicates())
    n_primary_geoms = len(df[f"{census_geom}_id_fips"].drop_duplicates())
    n_all_geoms = len(
        pd.concat([df[f"{census_geom}_id_fips"], df.adjacent_id_fips.explode()]).drop_duplicates())
    
    return n_lat_lon, n_primary_geoms, n_all_geoms

# MSHA Mines Data

In [None]:
from energy_comms.extract.msha import extract as msha_extract
from energy_comms.transform.msha import transform as msha_transform

### Start with looking at qualifying Census tracts, as specified by criteria

In [None]:
# add census tract data onto msha closed mines
msha_df = msha_transform(msha_extract())

In [None]:
n_lat_lon, n_primary_tracts, n_all_tracts = get_df_stats(msha_df, census_geom="tract")

print(f"Number of qualifying mines: {len(msha_df)}")
print(f"Number of unique lat, lon pairs: {n_lat_lon}")
print(f"Number of Census tracts with closed mines: {n_primary_tracts}")
print(f"Number of qualifying Census tracts (including adjacent tracts): {n_all_tracts}")

In [None]:
cols = msha_df.columns.to_list()
msha_df[cols[:2] + cols[-3:] + cols[2:-3]]

### Visualize qualifying census tracts

Note: Everything runs very slow if the full plot with all Census tracts is rendered in the notebook, so I don't render it here. 


The cell below it allows you to save the full map as an HTML, which seems to work better.


For the sake of visualization in this notebook, I created a non-interactive matplotlib map.

In [None]:
from energy_comms.output.basic_viz import create_geometries_df, make_plotly_map, make_matplotlib_map

In [None]:
msha_tract_geoms = create_geometries_df(msha_df)

In [None]:
make_plotly_map(msha_tract_geoms, "msha_tracts.html")

In [None]:
make_matplotlib_map(msha_tract_geoms)

### What about using county instead of tract?

In [None]:
msha_county_df = msha_transform(msha_extract(), census_geometry="county")

In [None]:
n_lat_lon, n_primary_counties, n_all_counties = get_df_stats(msha_county_df, census_geom="county")

print(f"Number of qualifying mines: {len(msha_county_df)}")
print(f"Number of unique lat, lon pairs: {n_lat_lon}")
print(f"Number of Census tracts with closed mines: {n_primary_counties}")
print(f"Number of qualifying Census tracts (including adjacent tracts): {n_all_counties}")

In [None]:
cols = msha_county_df.columns.to_list()
msha_county_df[cols[:2] + cols[-3:] + cols[2:-3]]

### Visualize qualifying counties

In [None]:
msha_county_geoms = create_geometries_df(msha_county_df, census_geometry="county")

In [None]:
make_plotly_map(msha_county_geoms, "msha_counties.html")

In [None]:
make_matplotlib_map(msha_county_geoms)

# EIA 860m Data

In [None]:
from energy_comms.extract.eia860 import extract as eia_extract
from energy_comms.transform.eia860 import transform as eia_transform

### Start with looking at Census tract level, as specified by criteria

In [None]:
raw_gens = eia_extract()

In [None]:
eia_df = eia_transform(raw_gens)

In [None]:
n_lat_lon, n_primary_tracts, n_all_tracts = get_df_stats(eia_df, census_geom="tract")

print(f"Number of qualifying generators: {len(eia_df)}")
print(f"Number of unique lat, lon pairs: {n_lat_lon}")
print(f"Number of Census tracts with closed generators: {n_primary_tracts}")
print(f"Number of qualifying Census tracts (including adjacent tracts): {n_all_tracts}")

In [None]:
cols = eia_df.columns.to_list()
eia_df[cols[:2] + cols[-3:] + cols[2:-3]]

In [None]:
eia_tract_geoms = create_geometries_df(eia_df, census_geometry="tract")

In [None]:
make_matplotlib_map(eia_tract_geoms)

There is a closed generator in Hawaii but here's an only CONUS visual.

In [None]:
make_matplotlib_map(eia_tract_geoms, only_conus=True)

### What about using county instead of Census tract?

In [None]:
eia_county_df = eia_transform(raw_gens, census_geometry="county")

In [None]:
n_lat_lon, n_primary_counties, n_all_counties = get_df_stats(eia_county_df, census_geom="county")

print(f"Number of qualifying generators: {len(eia_county_df)}")
print(f"Number of unique lat, lon pairs: {n_lat_lon}")
print(f"Number of Census counties with closed generators: {n_primary_counties}")
print(f"Number of qualifying Census counties (including adjacent counties): {n_all_counties}")

In [None]:
cols = eia_county_df.columns.to_list()
eia_county_df[cols[:2] + cols[-3:] + cols[2:-3]]

In [None]:
eia_county_geoms = create_geometries_df(eia_county_df, census_geometry="county")

In [None]:
make_matplotlib_map(eia_county_geoms)

In [None]:
make_matplotlib_map(eia_county_geoms, only_conus=True)

# Combine coal mine and generator closure data to see all qualifying areas

In [None]:
# tract
full_tract_df = pd.concat(
    [msha_df[["tract_id_fips", "adjacent_id_fips"]],
     eia_df[["tract_id_fips", "adjacent_id_fips"]]])
n_primary_geoms = len(full_tract_df["tract_id_fips"].drop_duplicates())
n_all_geoms = len(
        pd.concat([full_tract_df["tract_id_fips"], full_tract_df.adjacent_id_fips.explode()]).drop_duplicates())
print(f"Number of tracts with closed mines or generators: {n_primary_geoms}")
print(f"Number of qualifying Census tracts (including adjacent tracts): {n_all_geoms}")

### Qualifying coal community Census tracts

In [None]:
make_matplotlib_map(create_geometries_df(full_tract_df, census_geometry="tract"))

In [None]:
# county
full_county_df = pd.concat(
    [msha_county_df[["county_id_fips", "adjacent_id_fips"]],
     eia_county_df[["county_id_fips", "adjacent_id_fips"]]])
n_primary_geoms = len(full_county_df["county_id_fips"].drop_duplicates())
n_all_geoms = len(
        pd.concat([full_county_df["county_id_fips"], full_county_df.adjacent_id_fips.explode()]).drop_duplicates())
print(f"Number of counties with closed mines or generators: {n_primary_geoms}")
print(f"Number of qualifying Census counties (including adjacent counties): {n_all_geoms}")

### Qualifying coal community counties

In [None]:
make_matplotlib_map(create_geometries_df(full_county_df, census_geometry="county"))

### Qualifying coal communities broken down by generator and mine closures

In [None]:
from energy_comms.output.basic_viz import combine_gen_and_mine_geoms

Start with tract

In [None]:
full_tract_geoms = combine_gen_and_mine_geoms(msha_tract_geoms, eia_tract_geoms)

In [None]:
make_matplotlib_map(full_tract_geoms)

In [None]:
make_matplotlib_map(full_tract_geoms, only_conus=True)

By county

In [None]:
full_county_geoms = combine_gen_and_mine_geoms(msha_county_geoms, eia_county_geoms)

In [None]:
make_matplotlib_map(full_county_geoms)

In [None]:
make_matplotlib_map(full_county_geoms, only_conus=True)