In [1]:
from pathlib import Path

import numpy as np
import pandas as pd

from dsgrid.dataformat.datafile import Datafile
from dsgrid.dataformat.datatable import Datatable

from ntbkhelp import OptionPresenter, show_enum, show_elements_with_data

dsgrid_dataset_path = Path("//nrelnas01/PLEXOS/Projects/Load/dsgrid_v0.2.0/data")

## Choose a file

In [2]:
files = [p.name for p in dsgrid_dataset_path.glob("*.dsg")]
options = OptionPresenter(files)
options.present_options()
input_str = input("Which file would you like to examine? ")
datafile = Datafile.load(dsgrid_dataset_path / options.get_option(input_str))

  1: chp_dg.dsg
  2: commercial.dsg
  3: commercial_gaps.dsg
  4: distributedpv_sectoral.dsg
  5: eia_annual_energy_by_sector.dsg
  6: eia_annual_losses.dsg
  7: historical_hourly_load.dsg
  8: industrial.dsg
  9: industrial_gaps.dsg
 10: loss_model.dsg
 11: municipal_water.dsg
 12: outdoor_lighting.dsg
 13: residential.dsg
 14: residential_gaps.dsg
 15: trans_rail_hourly.dsg
Which file would you like to examine? 2


## Examine Basic Information

In [None]:
show_enum(datafile.sector_enum)

In [None]:
show_elements_with_data(datafile.sector_enum, list(datafile.keys()))

In [None]:
show_enum(datafile.geo_enum)

In [None]:
# WARNING: This cell may take a few minutes to run if the geography is counties
geo_ids = []
for _id in datafile.geo_enum.ids:
    for sector_id in datafile:
        if datafile[sector_id].has_data(_id):
            geo_ids.append(_id)
            break
show_elements_with_data(datafile.geo_enum, geo_ids)

In [None]:
show_enum(datafile.enduse_enum)

In [None]:
for sector_id in datafile:
    df, _geo_ids, _scaling_factors = datafile[sector_id].get_data(0)
    break
show_elements_with_data(datafile.enduse_enum, list(df.columns))

In [None]:
show_enum(datafile.time_enum)

## Get Total Energy per Sector and Enduse with Datatable

In [None]:
# WARNING: This cell may be slow and/or run out of memory for large files / county level data (e.g., 
# commercial.dsg, residential.dsg)
data_series = Datatable(datafile).data
total_energy = data_series.reset_index().pivot_table(values=0, index="sector", columns="enduse", aggfunc=np.sum)
total_energy

## Get Total Energy per Sector and Enduse by Iterating

In [3]:
total_energy = []; index = []
for sector_id in datafile:
    sector_dataset = datafile[sector_id]
    sector_energy = None
    for i in range(sector_dataset.n_geos):
        df, geos, scaling_factors = sector_dataset.get_data(i)
        tmp = df.sum()
        for factor in scaling_factors:
            if sector_energy is None:
                sector_energy = (tmp * factor)
            else:
                sector_energy += (tmp * factor)
    if sector_energy is not None:
        total_energy.append(sector_energy.to_frame().T)
        index.append(sector_id)
total_energy = pd.concat(total_energy)
total_energy.index = index
total_energy

Unnamed: 0,fans,pumps,heating,cooling,interior_lights,exterior_lights,water_systems,interior_equipment,heat_rejection
com__FullServiceRestaurant,5592026.0,55930.92,913929.8,4633616.0,9906302.0,2767272.0,0.0,22215120.0,20246.23
com__Hospital,4455086.0,775179.7,7549.169,4685746.0,6702352.0,513313.3,0.0,8874651.0,359172.5
com__LargeHotel,8412271.0,463229.1,499394.2,7993656.0,9188369.0,2308821.0,0.0,14670520.0,164159.4
com__LargeOffice,61200550.0,7537129.0,1341884.0,53541560.0,79565750.0,16165270.0,0.0,92373180.0,2822176.0
com__MediumOffice,14368470.0,788579.3,740697.6,11388450.0,19489780.0,6934036.0,0.0,22236940.0,275012.8
com__Outpatient,1503822.0,169599.7,0.0,986539.2,3726250.0,1334083.0,0.0,5502770.0,75951.11
com__PrimarySchool,2240584.0,76699.77,420655.9,1870952.0,4758928.0,612290.8,0.0,3796403.0,36735.54
com__QuickServiceRestaurant,174088.7,1145.789,4317.019,125592.0,87804.55,51538.93,0.0,521078.1,861.2583
com__SmallHotel,146605.6,13870.05,87281.84,111860.8,1074873.0,715753.7,0.0,1508302.0,5497.838
com__SmallOffice,10893250.0,80494.86,1424234.0,8286468.0,15960780.0,6235034.0,0.0,21860990.0,49798.34


In [4]:
total_energy.sum().sum() / 1.0E6

1107.037184