In [1]:
from oemof import outputlib
from oemof import solph
import numpy as np
import pandas as pd
import xarray as xr

![](energysystem.png)


In [2]:
energysystem = solph.EnergySystem()
energysystem.restore(dpath='./', filename='energysystem.dump')
results = energysystem.results['main']

In [3]:
results

{("<oemof.solph.network.Bus: 'b_heat_source'>",
  "<oemof.solph.network.Transformer: 'heat_pump'>"): {'scalars': Series([], Name: 2016-01-01 00:00:00, dtype: float64),
  'sequences': variable_name            flow
  2016-01-01 00:00:00  3.918540
  2016-01-01 01:00:00  4.001186
  2016-01-01 02:00:00  4.184690
  2016-01-01 03:00:00  4.641344
  2016-01-01 04:00:00  5.800365
  2016-01-01 05:00:00  6.666667
  2016-01-01 06:00:00  6.666667
  2016-01-01 07:00:00  6.666667
  2016-01-01 08:00:00  6.666667
  2016-01-01 09:00:00  6.666667
  2016-01-01 10:00:00  6.666667
  2016-01-01 11:00:00  6.666667
  2016-01-01 12:00:00  6.495739
  2016-01-01 13:00:00  6.499551
  2016-01-01 14:00:00  6.580753
  2016-01-01 15:00:00  6.666667
  2016-01-01 16:00:00  6.666667
  2016-01-01 17:00:00  6.666667
  2016-01-01 18:00:00  6.666667
  2016-01-01 19:00:00  6.666667
  2016-01-01 20:00:00  6.666667
  2016-01-01 21:00:00  5.825812
  2016-01-01 22:00:00  4.019274
  2016-01-01 23:00:00  3.844627
  2016-01-02 00:00:

# Option in outputlib.views.node: Dataframe or multiindex dataframe

In [8]:
timeseries_multiindex = outputlib.views.node(results, 'pp_oil', multiindex=True)['sequences']
timeseries_multiindex.head()

from,oil,pp_oil
to,pp_oil,bel
type,flow,flow
2016-01-01 00:00:00,287.14939,80.401829
2016-01-01 01:00:00,287.14939,80.401829
2016-01-01 02:00:00,287.14939,80.401829
2016-01-01 03:00:00,287.14939,80.401829
2016-01-01 04:00:00,287.14939,80.401829


In [9]:
timeseries = outputlib.views.node(results, 'pp_oil')['sequences']
timeseries.head()

Unnamed: 0,"((oil, pp_oil), flow)","((pp_oil, bel), flow)"
2016-01-01 00:00:00,287.14939,80.401829
2016-01-01 01:00:00,287.14939,80.401829
2016-01-01 02:00:00,287.14939,80.401829
2016-01-01 03:00:00,287.14939,80.401829
2016-01-01 04:00:00,287.14939,80.401829


# Why not have a function to output all data as one big dataframe?

## All sequences with tuple indices

In [10]:
string_results = outputlib.processing.convert_keys_to_strings(results)

In [11]:
all_sequences = [value['sequences'].rename(columns={'flow': key}) for (key, value) in string_results.items()]
all_sequences = pd.concat(all_sequences, axis=1)
all_sequences.head()

variable_name,"(b_heat_source, heat_pump)","(bel, demand_el)","(bel, excess_el)","(bel, heat_pump)","(bth, demand_th)","(coal, pp_coal)","(gas, pp_chp)","(gas, pp_gas)","(heat_pump, bth)","(heat_source, b_heat_source)","(lignite, pp_lig)","(oil, pp_oil)","(pp_chp, bel)","(pp_chp, bth)","(pp_coal, bel)","(pp_gas, bel)","(pp_lig, bel)","(pp_oil, bel)","(pv, bel)","(wind, bel)"
2016-01-01 00:00:00,3.91854,52.169653,44.704306,1.95927,5.877809,0.0,-0.0,0.0,5.877809,3.91854,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.4314
2016-01-01 01:00:00,4.001186,52.169653,44.861883,2.000593,6.001778,0.0,-0.0,0.0,6.001779,4.001186,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.6303
2016-01-01 02:00:00,4.18469,47.392644,49.67974,2.092345,6.277035,0.0,-0.0,0.0,6.277035,4.18469,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.7629
2016-01-01 03:00:00,4.641344,45.197772,51.646285,2.320672,6.962015,0.0,-0.0,0.0,6.962015,4.641344,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.7629
2016-01-01 04:00:00,5.800365,44.189789,50.019457,2.900182,8.700547,0.0,-0.0,0.0,8.700547,5.800365,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,16.7076


## All sequences with multiindex

In [12]:
all_sequences = [value['sequences'].rename(columns={'flow': key}) for (key, value) in string_results.items()]
all_sequences = pd.concat(all_sequences, axis=1)
tuples = [key for (key, value) in string_results.items()]
multi_index = pd.MultiIndex.from_tuples(tuples, names=['from', 'to'])
all_sequences.columns = multi_index
all_sequences.head()

from,b_heat_source,bel,bel,bel,bth,coal,gas,gas,heat_pump,heat_source,lignite,oil,pp_chp,pp_chp,pp_coal,pp_gas,pp_lig,pp_oil,pv,wind
to,heat_pump,demand_el,excess_el,heat_pump,demand_th,pp_coal,pp_chp,pp_gas,bth,b_heat_source,pp_lig,pp_oil,bel,bth,bel,bel,bel,bel,bel,bel
2016-01-01 00:00:00,3.91854,52.169653,44.704306,1.95927,5.877809,0.0,-0.0,0.0,5.877809,3.91854,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.4314
2016-01-01 01:00:00,4.001186,52.169653,44.861883,2.000593,6.001778,0.0,-0.0,0.0,6.001779,4.001186,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.6303
2016-01-01 02:00:00,4.18469,47.392644,49.67974,2.092345,6.277035,0.0,-0.0,0.0,6.277035,4.18469,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.7629
2016-01-01 03:00:00,4.641344,45.197772,51.646285,2.320672,6.962015,0.0,-0.0,0.0,6.962015,4.641344,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,18.7629
2016-01-01 04:00:00,5.800365,44.189789,50.019457,2.900182,8.700547,0.0,-0.0,0.0,8.700547,5.800365,0.0,287.14939,0.0,0.0,0.0,0.0,0.0,80.401829,0.0,16.7076


### Nice to filter:

In [13]:
all_sequences['bel', 'demand_el'].head()

2016-01-01 00:00:00    52.169653
2016-01-01 01:00:00    52.169653
2016-01-01 02:00:00    47.392644
2016-01-01 03:00:00    45.197772
2016-01-01 04:00:00    44.189789
Freq: H, Name: (bel, demand_el), dtype: float64

## All scalars

In [14]:
string_results

{('b_heat_source',
  'heat_pump'): {'scalars': Series([], Name: 2016-01-01 00:00:00, dtype: float64), 'sequences': variable_name            flow
  2016-01-01 00:00:00  3.918540
  2016-01-01 01:00:00  4.001186
  2016-01-01 02:00:00  4.184690
  2016-01-01 03:00:00  4.641344
  2016-01-01 04:00:00  5.800365
  2016-01-01 05:00:00  6.666667
  2016-01-01 06:00:00  6.666667
  2016-01-01 07:00:00  6.666667
  2016-01-01 08:00:00  6.666667
  2016-01-01 09:00:00  6.666667
  2016-01-01 10:00:00  6.666667
  2016-01-01 11:00:00  6.666667
  2016-01-01 12:00:00  6.495739
  2016-01-01 13:00:00  6.499551
  2016-01-01 14:00:00  6.580753
  2016-01-01 15:00:00  6.666667
  2016-01-01 16:00:00  6.666667
  2016-01-01 17:00:00  6.666667
  2016-01-01 18:00:00  6.666667
  2016-01-01 19:00:00  6.666667
  2016-01-01 20:00:00  6.666667
  2016-01-01 21:00:00  5.825812
  2016-01-01 22:00:00  4.019274
  2016-01-01 23:00:00  3.844627
  2016-01-02 00:00:00  4.606934
  2016-01-02 01:00:00  4.731963
  2016-01-02 02:00:00  

In [15]:
all_scalars = [value['scalars'] for (key, value) in string_results.items()]
all_scalars = pd.DataFrame(all_scalars).T
tuples = [key for (key, value) in string_results.items()]
multi_index = pd.MultiIndex.from_tuples(tuples, names=['from', 'to'])
all_scalars.columns = multi_index
all_scalars

from,b_heat_source,bel,bel,bel,bth,coal,gas,gas,heat_pump,heat_source,lignite,oil,pp_chp,pp_chp,pp_coal,pp_gas,pp_lig,pp_oil,pv,wind
to,heat_pump,demand_el,excess_el,heat_pump,demand_th,pp_coal,pp_chp,pp_gas,bth,b_heat_source,pp_lig,pp_oil,bel,bth,bel,bel,bel,bel,bel,bel
invest,,,,,,,,,,,,,,,,,,80.401829,,


# All parameters

Open questions:
* What about node attributes, e.g. conversion_factors?
* Why does the label appear?


In [16]:
param = energysystem.results['param']
string_param = outputlib.processing.convert_keys_to_strings(param)
string_param

{('coal',
  'pp_coal'): {'scalars': fixed                                                                  False
  label                      EdgeLabel(input="<oemof.solph.network.Bus: 'co...
  max                                                                        1
  min                                                                        0
  negative_gradient_costs                                                    0
  positive_gradient_costs                                                    0
  variable_costs                                                             0
  dtype: object, 'sequences': Empty DataFrame
  Columns: []
  Index: []},
 ('gas',
  'pp_gas'): {'scalars': fixed                                                                  False
  label                      EdgeLabel(input="<oemof.solph.network.Bus: 'ga...
  max                                                                        1
  min                                                                 

In [17]:
all_sequences_param = [value['sequences'].rename(columns={'actual_value': key}) for (key, value) in string_param.items() if value['sequences'].empty == False]

all_sequences_param = pd.concat(all_sequences_param, axis=1)
tuples = [key for (key, value) in all_sequences_param.items()]
tuples

multi_index = pd.MultiIndex.from_tuples(tuples, names=['from', 'to'])
multi_index

all_sequences_param.columns = multi_index
all_sequences_param.head()

from,bel,bth,wind,pv
to,demand_el,demand_th,bel,bel
0,0.613761,0.146945,0.278,0.0
1,0.613761,0.150044,0.281,0.0
2,0.557561,0.156926,0.283,0.0
3,0.531738,0.17405,0.283,0.0
4,0.51988,0.217514,0.252,0.0


TODO: what about the label?

In [18]:
all_scalars_param = [value['scalars'].rename(columns={'actual_value': key}) for (key, value) in string_param.items() if value['sequences'].empty == False]
all_scalars_param = pd.DataFrame(all_scalars_param)
# tuples = [key for (key, value) in all_scalars_param.items()]
# print(tuples)
# multi_index = pd.MultiIndex.from_tuples(tuples, names=['from', 'to'])
# all_scalars_param.columns = multi_index
all_scalars_param

Unnamed: 0,fixed,nominal_value,label,max,min,negative_gradient_costs,positive_gradient_costs,variable_costs
0,True,85.0,"EdgeLabel(input=""<oemof.solph.network.Bus: 'be...",1,0,0,0,0
1,True,40.0,"EdgeLabel(input=""<oemof.solph.network.Bus: 'bt...",1,0,0,0,0
2,True,66.3,"EdgeLabel(input=""<oemof.solph.network.Source: ...",1,0,0,0,0
3,True,65.3,"EdgeLabel(input=""<oemof.solph.network.Source: ...",1,0,0,0,0


# Xarray

## DataArray
http://xarray.pydata.org/en/stable/data-structures.html#dataarray


* data: a multi-dimensional array of values (e.g., a numpy ndarray, Series, DataFrame or Panel)
* coords: a list or dictionary of coordinates. If a list, it should be a list of tuples where the first element is the dimension name and the second element is the corresponding coordinate array_like object.
* dims: a list of dimension names. If omitted and coords is a list of tuples, dimension names are taken from coords.
* attrs: a dictionary of attributes to add to the instance
* name: a string that names the instance


In [19]:
data = np.random.rand(4, 3)
locs = ['IA', 'IL', 'IN']
times = pd.date_range('2000-01-01', periods=4)
foo = xr.DataArray(data,
                   coords=[times, locs],
                   dims=['time', 'space'],
                   attrs={'purpose': 'demonstration',
                          'occasion': 'oemof dev meeting'},
                   name='example_dataarray')
foo

<xarray.DataArray 'example_dataarray' (time: 4, space: 3)>
array([[0.074032, 0.094947, 0.570282],
       [0.366172, 0.626441, 0.799658],
       [0.362065, 0.999693, 0.98126 ],
       [0.935636, 0.818623, 0.474498]])
Coordinates:
  * time     (time) datetime64[ns] 2000-01-01 2000-01-02 2000-01-03 2000-01-04
  * space    (space) <U2 'IA' 'IL' 'IN'
Attributes:
    purpose:   demonstration
    occasion:  oemof dev meeting

## DataSet
http://xarray.pydata.org/en/stable/data-structures.html#dataset

To make an Dataset from scratch, supply dictionaries for any variables (data_vars), coordinates (coords) and attributes (attrs).

    data_vars should be a dictionary with each key as the name of the variable and each value as one of:
        A DataArray or Variable
        A tuple of the form (dims, data[, attrs]), which is converted into arguments for Variable
        A pandas object, which is converted into a DataArray
        A 1D array or list, which is interpreted as values for a one dimensional coordinate variable along the same dimension as it’s name
    coords should be a dictionary of the same form as data_vars.
    attrs should be a dictionary.


In [33]:
temp = 15 + 8 * np.random.randn(2, 2, 10)

precip = 10 * np.random.rand(2, 2, 10)

lon = [[-99.83, -99.32], [-99.79, -99.23]]
lat = [[42.25, 42.21], [42.63, 42.59]]

# for real use cases, its good practice to supply array attributes such as
# units, but we won't bother here for the sake of brevity
ds = xr.Dataset({'temperature': (['x', 'y', 'time'],  temp),
                 'precipitation': (['x', 'y', 'time'], precip)},
                 coords={'lon': (['x', 'y'], lon),
                         'lat': (['x', 'y'], lat),
                         'time': pd.date_range('2014-09-06', periods=10),
                         'reference_time': pd.Timestamp('2014-09-05')})

ds

<xarray.Dataset>
Dimensions:         (time: 10, x: 2, y: 2)
Coordinates:
    lon             (x, y) float64 -99.83 -99.32 -99.79 -99.23
    lat             (x, y) float64 42.25 42.21 42.63 42.59
  * time            (time) datetime64[ns] 2014-09-06 2014-09-07 ... 2014-09-15
    reference_time  datetime64[ns] 2014-09-05
Dimensions without coordinates: x, y
Data variables:
    temperature     (x, y, time) float64 8.507 15.58 16.93 ... 17.88 12.96 15.93
    precipitation   (x, y, time) float64 2.533 6.339 0.04154 ... 3.287 9.265

## Test: save results as xarray DataArrays / DataSets

In [25]:
seq = all_sequences.unstack()
seq.index.names = ['from', 'to', 'time']
# seq

In [29]:
dataarray = xr.DataArray.from_series(seq)
dataarray

<xarray.DataArray (from: 16, to: 12, time: 240)>
array([[[        nan,         nan, ...,         nan,         nan],
        [        nan,         nan, ...,         nan,         nan],
        ...,
        [        nan,         nan, ...,         nan,         nan],
        [        nan,         nan, ...,         nan,         nan]],

       [[        nan,         nan, ...,         nan,         nan],
        [        nan,         nan, ...,         nan,         nan],
        ...,
        [        nan,         nan, ...,         nan,         nan],
        [        nan,         nan, ...,         nan,         nan]],

       ...,

       [[        nan,         nan, ...,         nan,         nan],
        [0.00000e+00, 0.00000e+00, ..., 0.00000e+00, 0.00000e+00],
        ...,
        [        nan,         nan, ...,         nan,         nan],
        [        nan,         nan, ...,         nan,         nan]],

       [[        nan,         nan, ...,         nan,         nan],
        [1.84314e+01, 

In [24]:
lon = [[-99.83, -99.32], [-99.79, -99.23]]
lat = [[42.25, 42.21], [42.63, 42.59]]

# for real use cases, its good practice to supply array attributes such as
# units, but we won't bother here for the sake of brevity
ds = xr.Dataset({'sequences': (['from', 'to', 'time'],  temp),
                 'scalars': (['from', 'to', 'time'], precip)},
                 coords={'lon': (['x', 'y'], lon),
                         'lat': (['x', 'y'], lat),
                         'time': pd.date_range('2014-09-06', periods=3),
                         'reference_time': pd.Timestamp('2014-09-05')})

ds

<xarray.Dataset>
Dimensions:         (from: 2, time: 3, to: 2, x: 2, y: 2)
Coordinates:
    lon             (x, y) float64 -99.83 -99.32 -99.79 -99.23
    lat             (x, y) float64 42.25 42.21 42.63 42.59
  * time            (time) datetime64[ns] 2014-09-06 2014-09-07 2014-09-08
    reference_time  datetime64[ns] 2014-09-05
Dimensions without coordinates: from, to, x, y
Data variables:
    sequences       (from, to, time) float64 12.32 18.74 3.195 ... 16.34 21.88
    scalars         (from, to, time) float64 0.3475 7.038 1.598 ... 5.112 2.741