In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

from dsgrid.dataformat.datafile import Datafile
from dsgrid.dataformat.datatable import Datatable

from ntbkhelp import OptionPresenter, show_enum, show_elements_with_data

dsgrid_nrel_base_path = Path("//nrelnas01/PLEXOS/Projects/Load/dsgrid_v0.2.0/")
dsgrid_oedi_base_path = None

# USER INPUT ----------------------------------------
# Choose which base path you would like to use
dsgrid_base_path = dsgrid_nrel_base_path
# ---------------------------------------------------

dsgrid_county_dataset_path = dsgrid_base_path / "data"
dsgrid_state_dataset_path = dsgrid_base_path / "products" / "state_hourly_residuals"

# USER INPUT ----------------------------------------
# Choose which dataset you would like to browse
dsgrid_dataset_path = dsgrid_county_dataset_path
# Specify a local directory for any outputs
output_dir = Path.home() / "Documents" / "dsgrid-legacy-efs"
# ---------------------------------------------------

if not output_dir.exists():
    input_str = input(f"{output_dir!r} does not exist. Would you like to create it? [Y/n] ")
    if input_str[0].lower() == "y":
        output_dir.mkdir()
        print(f"Created {output_dir!r}.")

## Choose a file

In [2]:
files = [p.name for p in dsgrid_dataset_path.glob("*.dsg")]
options = OptionPresenter(files)
options.present_options()
input_str = input("Which file would you like to examine? ")
datafile = Datafile.load(dsgrid_dataset_path / options.get_option(input_str))

  1: chp_dg.dsg
  2: commercial.dsg
  3: commercial_gaps.dsg
  4: distributedpv_sectoral.dsg
  5: eia_annual_energy_by_sector.dsg
  6: eia_annual_losses.dsg
  7: historical_hourly_load.dsg
  8: industrial.dsg
  9: industrial_gaps.dsg
 10: loss_model.dsg
 11: municipal_water.dsg
 12: outdoor_lighting.dsg
 13: residential.dsg
 14: residential_gaps.dsg
 15: trans_rail_hourly.dsg
Which file would you like to examine? 9


## Examine Basic Information

In [None]:
show_enum(datafile.sector_enum)

In [None]:
show_elements_with_data(datafile.sector_enum, list(datafile.keys()))

In [None]:
show_enum(datafile.geo_enum)

In [None]:
# WARNING: This cell may take a few minutes to run if the geography is counties
geo_ids = []
for _id in datafile.geo_enum.ids:
    for sector_id in datafile:
        if datafile[sector_id].has_data(_id):
            geo_ids.append(_id)
            break
show_elements_with_data(datafile.geo_enum, geo_ids)

In [None]:
show_enum(datafile.enduse_enum)

In [None]:
for sector_id in datafile:
    df, _geo_ids, _scaling_factors = datafile[sector_id].get_data(0)
    break
show_elements_with_data(datafile.enduse_enum, list(df.columns))

In [None]:
show_enum(datafile.time_enum)

## Get Total Energy per Sector and Enduse with Datatable

In [None]:
# WARNING: This cell may be slow and/or run out of memory for large files / county level data (e.g., 
# commercial.dsg, residential.dsg)
data_series = Datatable(datafile).data
total_energy = data_series.reset_index().pivot_table(values=0, index="sector", columns="enduse", aggfunc=np.sum)
total_energy

## Get Total Energy per Sector and Enduse by Iterating

In [4]:
total_energy = []; index = []
for sector_id in datafile:
    sector_dataset = datafile[sector_id]
    sector_energy = None
    for i in range(sector_dataset.n_geos):
        df, geos, scaling_factors = sector_dataset.get_data(i)
        tmp = df.sum()
        for factor in scaling_factors:
            if sector_energy is None:
                sector_energy = (tmp * factor)
            else:
                sector_energy += (tmp * factor)
    if sector_energy is not None:
        total_energy.append(sector_energy.to_frame().T)
        index.append(sector_id)
total_energy = pd.concat(total_energy)
total_energy.index = index
total_energy

Unnamed: 0,energy_consumption
11,36146952.0
21,86879200.0
23,63326216.0


In [5]:
total_energy.sum().sum() / 1.0E6

186.352368

## Get Total Energy per Sector and Enduse by Aggregating and Converting Units

In [8]:
from dsgrid.dataformat.dimmap import FilterToSingleFuelMap, FullAggregationMap, UnitConversionMap
from dsgrid.dataformat.enumeration import MultiFuelEndUseEnumeration, allsectors, conus, allenduses, annual

# USER INPUT ----------------------------------------
# Applying a map requires making a new HDF5 file on disk
overwrite = True
energy_units = "TWh"
# ---------------------------------------------------

mappings = [
    ('-annual', FullAggregationMap(datafile.time_enum, annual)),
    ('-conus', FullAggregationMap(datafile.geo_enum, conus)),
    ('-allsectors', FullAggregationMap(datafile.sector_enum, allsectors)),    
]

# handle multi-fuel enduse enums
enduse_enum = datafile.enduse_enum
if isinstance(enduse_enum, MultiFuelEndUseEnumeration):
    mappings.append(f'-electricity', FilterToSingleFuelMap(enduse_enum, 'Electricity'))
    enduse_enum = mappings[-1][1].to_enum

# handle different electricity units
allenduses_units = allenduses.units(allenduses.ids[0])
if not (enduse_enum.units(enduse_enum.ids[0]) == allenduses_units):
    mappings.append(f'-{allenduses_units}', UnitConversionMap(
        enduse_enum, 
        [enduse_enum.units(enduse_enum.ids[0])], 
        [allenduses_units]))
    enduse_enum = mappings[-1][1].to_enum

mappings.extend([
    ('-allenduses', FullAggregationMap(enduse_enum, allenduses)),
    (f'-{energy_units}', UnitConversionMap(allenduses, [allenduses.units(allenduses.ids[0])], [energy_units]))
])

new_datafile = datafile
for postfix, mapping in mappings:
    new_path = output_dir / (new_datafile.filepath.stem + postfix + new_datafile.filepath.suffix)
    if new_path.exists() and overwrite:
        new_path.unlink()
    print(f"Applying {mapping} and writing to {new_path}\n")
    new_datafile = new_datafile.map_dimension(new_path, mapping)
    
total_energy = Datatable(new_datafile).data.sum()
print(f"{datafile.filepath.name} describes {total_energy} {energy_units} of electricity")

Applying FullAggregationMap(TimeEnumeration(standard_2012_hourly, [2012-01-01 01:00:00-05:00, ...], [2012-01-01 01:00:00-05:00, ...]), TimeEnumeration(annual, [Annual], [Annual])) and writing to C:\Users\ehale\Documents\dsgrid-legacy-efs\industrial_gaps-annual.dsg

Applying FullAggregationMap(GeographyEnumeration(counties, [01001, ...], [Autauga County, AL, ...]), GeographyEnumeration(conus, [conus], [Continental United States])) and writing to C:\Users\ehale\Documents\dsgrid-legacy-efs\industrial_gaps-annual-conus.dsg

Applying FullAggregationMap(SectorEnumeration(industrial_gaps_sectors, [11, ...], [Agriculture, Forestry, Fishing and Hunting, ...]), SectorEnumeration(all_sectors, [All], [All Sectors])) and writing to C:\Users\ehale\Documents\dsgrid-legacy-efs\industrial_gaps-annual-conus-allsectors.dsg

Applying FullAggregationMap(SingleFuelEndUseEnumeration(industrial_gaps_enduses, [energy_consumption], [Energy Consumption], fuel = 'Electricity', units = 'MWh'), SingleFuelEndUseEnum