In [None]:
from pathlib import Path

import numpy as np
import pandas as pd

from dsgrid.dataformat.datafile import Datafile
from dsgrid.dataformat.datatable import Datatable

from ntbkhelp import OptionPresenter, show_enum, show_elements_with_data

## Choose data location

Review the choices in the first cell, select the path you want to use in the second cell, and run both cells.

In [None]:
dsgrid_nrel_base_path_windows = Path("//nrelnas01/PLEXOS/Projects/Load/dsgrid_v0.2.0/")
dsgrid_nrel_base_path_mac = Path("/Volumes/PLEXOS/Projects/Load/dsgrid_v0.2.0/")
dsgrid_oedi_base_path = None

In [None]:
dsgrid_base_path = dsgrid_nrel_base_path_windows

## Choose data set

Select whether you want to look at county-level or state-level data in the second cell. Run both cells.

In [None]:
dsgrid_county_dataset_path = dsgrid_base_path / "products" / "raw_complete"
dsgrid_state_dataset_path = dsgrid_base_path / "products" / "state_hourly_residuals"

In [None]:
dsgrid_dataset_path = dsgrid_county_dataset_path

## Specify a local directory for any outputs

Review and edit the specified path. (The default option should be available for most everyone, but feel to change as you see fit.) Then run the cell.

In [None]:
output_dir = Path.home() / "Documents" / "dsgrid-legacy-efs"

if not output_dir.exists():
    input_str = input(f"{output_dir!r} does not exist. Would you like to create it? [Y/n] ")
    if input_str[0].lower() == "y":
        output_dir.mkdir()
        print(f"Created {output_dir!r}.")

## Choose a file

In [None]:
files = [p.name for p in dsgrid_dataset_path.glob("*.dsg")]
options = OptionPresenter(files)
options.present_options()
input_str = input("Which file would you like to examine (input number)? ")
datafile = Datafile.load(dsgrid_dataset_path / options.get_option(input_str))

## Examine Basic Information

In [None]:
show_enum(datafile.sector_enum)

In [None]:
show_elements_with_data(datafile.sector_enum, list(datafile.keys()))

In [None]:
show_enum(datafile.geo_enum)

⚠️ **WARNING** ⚠️ The next cell may take a few minutes to run if the geography is counties

In [None]:
geo_ids = []
for _id in datafile.geo_enum.ids:
    for sector_id in datafile:
        if datafile[sector_id].has_data(_id):
            geo_ids.append(_id)
            break
show_elements_with_data(datafile.geo_enum, geo_ids)

In [None]:
show_enum(datafile.enduse_enum)

In [None]:
for sector_id in datafile:
    df, _geo_ids, _scaling_factors = datafile[sector_id].get_data(0)
    break
show_elements_with_data(datafile.enduse_enum, list(df.columns))

In [None]:
show_enum(datafile.time_enum)

## Get Total Energy per Sector and Enduse with Datatable

⚠️ **WARNING** ⚠️ This cell may be slow and/or run out of memory for large files / county level data (e.g., commercial.dsg, residential.dsg)

In [None]:
data_series = Datatable(datafile).data
total_energy = data_series.reset_index().pivot_table(values=0, index="sector", columns="enduse", aggfunc=np.sum)
total_energy

## Get Total Energy per Sector and Enduse by Iterating

In [None]:
total_energy = []; index = []
for sector_id in datafile:
    sector_dataset = datafile[sector_id]
    sector_energy = None
    for i in range(sector_dataset.n_geos):
        df, geos, scaling_factors = sector_dataset.get_data(i)
        tmp = df.sum()
        for factor in scaling_factors:
            if sector_energy is None:
                sector_energy = (tmp * factor)
            else:
                sector_energy += (tmp * factor)
    if sector_energy is not None:
        total_energy.append(sector_energy.to_frame().T)
        index.append(sector_id)
total_energy = pd.concat(total_energy)
total_energy.index = index
total_energy

In [None]:
total_energy.sum().sum() / 1.0E6

## Get Total Energy per Sector and Enduse by Aggregating and Converting Units

Review user input in the first cell before running this section

In [None]:
# Applying a map requires making a new HDF5 file on disk
overwrite = True      # Choices: True, False
energy_units = "TWh"  # Choices: kWh, MWh, GWh, TWh

In [None]:
from dsgrid.dataformat.dimmap import FilterToSingleFuelMap, FullAggregationMap, UnitConversionMap
from dsgrid.dataformat.enumeration import MultiFuelEndUseEnumeration, allsectors, conus, allenduses, annual

mappings = [
    ('-annual', FullAggregationMap(datafile.time_enum, annual)),
    ('-conus', FullAggregationMap(datafile.geo_enum, conus)),
    ('-allsectors', FullAggregationMap(datafile.sector_enum, allsectors)),    
]

# handle multi-fuel enduse enums
enduse_enum = datafile.enduse_enum
if isinstance(enduse_enum, MultiFuelEndUseEnumeration):
    mappings.append(f'-electricity', FilterToSingleFuelMap(enduse_enum, 'Electricity'))
    enduse_enum = mappings[-1][1].to_enum

# handle different electricity units
allenduses_units = allenduses.units(allenduses.ids[0])
if not (enduse_enum.units(enduse_enum.ids[0]) == allenduses_units):
    mappings.append(f'-{allenduses_units}', UnitConversionMap(
        enduse_enum, 
        [enduse_enum.units(enduse_enum.ids[0])], 
        [allenduses_units]))
    enduse_enum = mappings[-1][1].to_enum

mappings.extend([
    ('-allenduses', FullAggregationMap(enduse_enum, allenduses)),
    (f'-{energy_units}', UnitConversionMap(allenduses, [allenduses.units(allenduses.ids[0])], [energy_units]))
])

new_datafile = datafile
for postfix, mapping in mappings:
    new_path = output_dir / (new_datafile.filepath.stem + postfix + new_datafile.filepath.suffix)
    if new_path.exists() and overwrite:
        new_path.unlink()
    print(f"Applying {mapping} and writing to {new_path}\n")
    new_datafile = new_datafile.map_dimension(new_path, mapping)
    
total_energy = Datatable(new_datafile).data.sum()
print(f"{datafile.filepath.name} describes {total_energy} {energy_units} of electricity")