In [68]:
from datetime import timedelta
from pathlib import Path

import numpy as np
import pandas as pd


DATA_DIR = Path() / "data"

class TBMEnv:
    """
    Parent class holding shared code for both scenarios.
    """

    predicted_means = []
    
    def __init__(self):
        """
        Prepare the env.
        
        Generates Attributes:
        ---------------------
        data : pandas.DataFrame
            The dataset for the env.
            Is generated by calling `self.load_data`
        obs_selectors : dict of pandas.Timestamp -> pandas.Timestamp
            Selectors for the obs data.
            Is generated by calling `self.make_selectors`
        ac_dist_selectors dict of pandas.Timestamp -> pandas.Timestamp
            Selectors for the actions and disturbances.
            Is generated by calling `self.make_selectors`
        train_ts : list of pandas.Timestamp
            List of timestamps that belong to training dataset.
            Is generated by calling `self.make_selectors`
        eval_ts : list of pandas.Timestamp
            List of timestamps that belong to evaluation dataset.
            Is generated by calling `self.make_selectors`
            
        """
        self.load_data()
        self.make_selectors()
        
    def load_data(self):
        """
        Load the data for the scenario. To be overloaded.
        """
        raise NotImplementedError("`load_data` must be overloaded by child.")
        
    def make_selectors(self):
        """
        Create selectors for obs and actions/disturbances. 
        
        Selectors match a timestamp (i.e. the state of the environment
        to one or more timestamps that correspond to the data that
        should be returned by `self.step` for that timestamp.
        """
        raise NotImplementedError("`make_selectors` must be overloaded by child.")

    def get_training_data(self):
        """
        Returns the training data that should be used by the canidate models.

        Returns:
        --------
        training_obs : list of pandas.DataFrame
            A list of obs items as would be returned by `self.step`.
        training_actions : list of pandas.DataFrame
            A list of action items as would be returned by `self.step`
        training_disturbances : list of pandas.DataFrame
            A list of disturbance items as would be returned by `self.step`
        """
        training_obs = []
        training_actions = []
        training_disturbances = []
        
        for ts in self.train_ts:
            obs, actions, disturbances = self.get_step_return_for_ts(ts)
            
            training_obs.append(obs)
            training_actions.append(actions)
            training_disturbances.append(disturbances)
            
        return training_obs, training_actions, training_disturbances

    def get_step_return_for_ts(self, ts):
        """
        Computes the stuff returned by `self.step` for a particular timestamp.
        
        Arguments:
        ----------
        ts : pandas.Timestamp
            

        Returns:
        --------
        obs : pandas.DataFrame
            The observed values corresponding to the state variables to
            predict that would have been generated since the last step.
        actions : pandas.DataFrame
            The actions that should be considered in the state prediction.
        disturbances : pandas.DataFrame
            The forcast of disturbances that should be considered in the
            state prediction.
        """
        obs_selector = self.obs_selectors[ts]
        ac_dist_selector = self.ac_dist_selectors[ts]
        
        obs = self.data.loc[obs_selector, self.obs_columns]
        actions = self.data.loc[ac_dist_selector, self.action_columns]
        disturbances = self.data.loc[ac_dist_selector, self.disturbance_columns]
        
        return obs, actions, disturbances
        
    def reset(self):
        """
        Reset the environment to the intial state.
        """
        self.i_eval = 0
        self.all_predicted_states = []
        
        ts = self.eval_ts[self.i_eval]
        return self.get_step_return_for_ts(ts)
    
    def step(self, predicted_states):
        """
        Advance the environment by one step.

        This stores the `predicted_states` for computing the performance
        measure later and returns the data required for the next prediction
        step.

        Arguments:
        ----------
        predicted_states : pandas.DataFrame
            A dataframe holding the candidate prediction of the state
            variables that should be predicted in the repsective scenario.

        Returns:
        --------
        obs : pandas.DataFrame
            The observed values corresponding to the state variables to
            predict that would have been generated since the last step.
        actions : pandas.DataFrame
            The actions that should be considered in the state prediction.
        disturbances : pandas.DataFrame
            The forcast of disturbances that should be considered in the
            state prediction.
        done : bool
            If true the final state has been reached.
        """
        if not hasattr(self, "i_eval"):
            raise RuntimeError("Reset env before calling step.")
        
        
        
        # Note that the data for the current `i_eval` has already been
        # provided to the canidate model, i.e. by `reset` if this is the
        # first call to `step`.
        if self.i_eval >= len(self.eval_ts) - 1:
            # Don't increment `i_eval` further if maximum reached to prevent
            # IndexError while fetching the ts below.
            done = True
        else:
            done = False
            self.i_eval += 1
            
        ts = self.eval_ts[self.i_eval]
        obs, actions, disturbances = self.get_step_return_for_ts(ts)
        
        # TODO: Check that predicted_states has the same index and columns
        # then obs and doesn't contain NaN values.
        # This should prevent downstream errors while computing the
        # performance measure.
        self.all_predicted_states.append(predicted_states)
        
        return obs, actions, disturbances, done

    def compute_performance_measure(self):
        """
        TODO: Add some logic here hot to compute the performance
              measure from the baseline. Likely want to not simulate
              everything using the baseline code every time.
              Just storing the predictions of the baseline should
              be sufficient? Or just the scores?
        """
    
class Scenario2(TBMEnv):
    
    obs_columns = ["T_z"]
    action_columns = ["T_zSP"]
    disturbance_columns = ["T_a", "T_s_fct_mean", "T_s_fct_lower", "T_s_fct_upper", "CO2_fct_mean", "CO2_fct_upper", "CO2_fct_lower"]
    
    obs_selectors = {}
    ac_dist_selectors = {}
    
    def load_data(self):
        """
        Load the data for the scenario 2.
        """
        self.data = pd.read_csv(
            DATA_DIR / "scenario_2_data.csv.bz2",
            index_col=0,
            parse_dates=True,
        )
        # Add setpoints, these have not been recorded but have
        # defined in the TropicalPrecooling env like this.
        self.data["T_zSP"] = 27.0
        self.data.loc[self.data.index.hour>=7, "T_zSP"] = 23.5
     
    def make_selectors(self):
        """
        Create selectors for obs and actions/disturbances. 
        
        Selectors match a timestamp (i.e. the state of the environment
        to one or more timestamps that correspond to the data that
        should be returned by `self.step` for that timestamp.
        """
        obs_selectors = {}
        ac_dist_selectors = {}
        train_ts = []
        eval_ts = []
        
        
        first_ts_of_day = env.data.index[np.logical_and(env.data.index.hour==4, env.data.index.minute==2)]
        
        for i in range(1, len(first_ts_of_day)):
            ts_previous_day = first_ts_of_day[i-1]
            ts_present_day = first_ts_of_day[i]
            
            obs_selectors[ts_present_day] = self.data.index[self.data.index.date==ts_previous_day.date()]
            
            ac_dist_selectors[ts_present_day] = self.data.index[self.data.index.date==ts_present_day.date()]
            
            if ts_present_day.month >= 7:
                train_ts.append(ts_present_day)
            else:
                eval_ts.append(ts_present_day)
            
        self.obs_selectors = obs_selectors
        self.ac_dist_selectors = ac_dist_selectors
        self.train_ts = train_ts
        self.eval_ts = eval_ts
        
        
env = Scenario2()
env.reset()
env.step(None)

(                      T_z
 2014-01-06 04:02:30  31.0
 2014-01-06 04:07:30  31.0
 2014-01-06 04:12:30  31.0
 2014-01-06 04:17:30  31.0
 2014-01-06 04:22:30  31.0
 ...                   ...
 2014-01-06 16:37:30  24.7
 2014-01-06 16:42:30  24.7
 2014-01-06 16:47:30  24.7
 2014-01-06 16:52:30  24.8
 2014-01-06 16:57:30  24.8
 
 [156 rows x 1 columns],
                      T_zSP
 2014-01-07 04:02:30   27.0
 2014-01-07 04:07:30   27.0
 2014-01-07 04:12:30   27.0
 2014-01-07 04:17:30   27.0
 2014-01-07 04:22:30   27.0
 ...                    ...
 2014-01-07 16:37:30   23.5
 2014-01-07 16:42:30   23.5
 2014-01-07 16:47:30   23.5
 2014-01-07 16:52:30   23.5
 2014-01-07 16:57:30   23.5
 
 [156 rows x 1 columns],
                       T_a  T_s_fct_mean  T_s_fct_lower  T_s_fct_upper  \
 2014-01-07 04:02:30  26.9     13.070007      10.953454      15.186561   
 2014-01-07 04:07:30  26.9     13.074223      10.974605      15.173840   
 2014-01-07 04:12:30  26.8     13.081093      11.007275      15.

In [69]:
done = False
env.reset()
while not done:
    obs, actions, disturbances, done = env.step(None)
    

In [71]:
len(env.all_predicted_states)

62