## Imports

In [1]:
import matplotlib.pyplot as plt
import numpy as np

import tensorflow as tf
from tf_agents.agents.dqn import dqn_agent
from tf_agents.drivers import dynamic_step_driver
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.agents.dqn.dqn_agent import DqnAgent

from tf_agents.environments import py_environment
from tf_agents.networks.q_network import QNetwork
from tf_agents.networks import q_network
from tf_agents.eval import metric_utils
from tf_agents.metrics import tf_metrics
from tf_agents.replay_buffers import tf_uniform_replay_buffer
from tf_agents.trajectories import trajectory
from tf_agents.trajectories import time_step as ts
from tf_agents.environments import wrappers
from tf_agents.specs import array_spec
from tf_agents.utils import common

### GPU configuration

In [2]:
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu, True)
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  2


2022-06-02 09:27:57.508557: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-02 09:27:57.509107: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-02 09:27:57.513050: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-02 09:27:57.513573: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-02 09:27:57.514086: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from S

# Classes

## Air conditioning

In [3]:
class Airconditioning():
    def __init__(self):
        self.state = 0
        
    def turnOff(self):
        self.state = 0
        return self.consumption()
         
    def smallChange(self):
        self.state = 1
        return self.consumption()
    
    def bigChange(self):
        self.state = 2
        return self.consumption()
        
    def render(self, mode="human"):
        print("Air conditioning -> State {}, Consumption {}".format(self.state, self.consumption()))
    
    def consumption(self):
        if self.state == 0:
            return -20.0
        else:

            return -round(20.0 + self.state*20.0, 2)

In [4]:
ac = Airconditioning()
ac.smallChange()
ac.render()
ac.bigChange()
ac.render()
ac.turnOff()
ac.render()

Air conditioning -> State 1, Consumption -40.0
Air conditioning -> State 2, Consumption -60.0
Air conditioning -> State 0, Consumption -20.0


## Charging Station EV

In [5]:
class ChargingStation():
    def __init__(self, power = 30.0):
        """
            Initilization method
            : param
                power - Power of the Charge Station
        """
        self.s1 = 0 # 0 - Available, 1 - Busy, 2 - Charging, 3 - Discharging
        self.s2 = 0 # 0 - Available, 1 - Busy, 2 - Charging, 3 - Discharging
        self.power = power # Default 30 Kw
        
    def carArrival(self, station):
        """
            Simulation of the arrival of a car at a charging station
            : param
                station - Station where the car is plugged 
        """
        if station == 1 and self.s1 == 0:
            self.s1 = 1
        elif station == 2 and self.s2 == 0:
            self.s2 = 1
        else:
            print("Station is busy")

    def carDeparture(self, station):
        """
            Simulation of the departure of a car at a charging station
            : param
                station - Station where the car is plugged 
        """
        if station == 1 and self.s1 != 0:
            self.s1 = 0
        elif station == 2 and self.s2 != 0:
            self.s2 = 0
        else:
            print("Station was Available yet")
            
    def charge(self, station):
        """
            The car starts charging from the station
            : param
                station - Station where the car is charging 
        """
        if station == 1 and self.s1 != 0:
            self.s1 = 2
        elif station == 2 and self.s2 != 0:
            self.s2 = 2
        return self.consumption()
            
    def discharge(self, station):
        """
            The car starts discharging from the station
            : param
                station - Station where the car is discharging 
        """
        if station == 1 and self.s1 != 0:
            self.s1 = 3
        elif station == 2 and self.s2 != 0:
            self.s2 = 3
        return self.consumption()
    
    def consumption(self):
        """
            Calculates the consumption of the charging station based on its occupancy and condition.
            : param
                station - Station where the car is charging 
            : return the consumption of the charging station
        """
        if self.s1 == 2 and self.s2 < 3:
            return -self.power
        elif self.s2 == 2 and self.s1 < 3:
            return -self.power
        if self.s1 == 3 and self.s2 == 3:
            return self.power
        if self.s1 == 3 and self.s2 != 2 or self.s1 != 2 and self.s2 == 3:
            return self.power
        else: 
            return 0.0
        
    def stop(self):
        """
            Stop charging.
        """
        
        self.s1 = 1
        self.s2 = 1
        
        
    def render(self, mode ="human"):
        """
            Print the ev charging station status
        """
        print ("EV Station -> Station 1: {}, Station 2 {}, Consumption: {}".format(self.s1, self.s2, self.consumption()))
    
    def reset(self):
        self.s1 = 0 
        self.s2 = 0 

In [6]:
cs = ChargingStation()
cs.render()
cs.carArrival(1)
cs.render()
cs.carArrival(2)
cs.render()
cs.charge(1)
cs.render()
cs.discharge(2)
cs.render()
cs.carDeparture(1)
cs.render()

EV Station -> Station 1: 0, Station 2 0, Consumption: 0.0
EV Station -> Station 1: 1, Station 2 0, Consumption: 0.0
EV Station -> Station 1: 1, Station 2 1, Consumption: 0.0
EV Station -> Station 1: 2, Station 2 1, Consumption: -30.0
EV Station -> Station 1: 2, Station 2 3, Consumption: 0.0
EV Station -> Station 1: 0, Station 2 3, Consumption: 30.0


## Storage Battery

In [7]:
class StorageBattery():
    def __init__(self, capacity = 120):
        """
            Initilization method
            : param
                capacity - Capacity of the Storage Battery
        """
        self.capacity = capacity
        self.soc = self.capacity * 0.75
        self.consumption = 0
        
    def charge12(self):
        """
            Simulation of 12Kwh charge (12kwh = 4kw per 15 minutes)
            : return consumption
        """
        c = min(self.capacity, self.soc + 4)
        self.consumption = self.soc - c 
        self.soc = c
        return self.consumption
    
    def charge24(self):
        """
            Simulation of 24Kwh charge (24kwh = 6kw per 15 minutes)
            : return consumption
        """
        c = min(self.capacity, self.soc + 6)
        self.consumption = self.soc - c 
        self.soc = c
        return self.consumption
    
    def charge36(self):
        """
            Simulation of 36Kwh charge (36kwh = 9kw per 15 minutes)
            
        """
        c = min(self.capacity, self.soc + 9)
        self.consumption = self.soc - c 
        self.soc = c
        return self.consumption
    
    def discharge12(self):
        """
            Simulation of 12Kwh discharge (12kwh = 4kw per 15 minutes)
        """
        self.consumption = min(4, self.soc - 4)
        self.soc += self.consumption
        return self.consumption
    
    def discharge24(self):
        """
            Simulation of 24Kwh discharge (24kwh = 6kw per 15 minutes)
            
        """
        self.consumption = min(6, self.soc - 6)
        self.soc += self.consumption
        return self.consumption
        
    
    def discharge36(self):
        """
            Simulation of 36Kwh discharge (36kwh = 9kw per 15 minutes)
            : return consumption
        """
        self.consumption = min(9, self.soc - 9)
        self.soc += self.consumption
        return self.consumption
    
    def stop(self):
        """
            Stop processing
        """
        self.consumption = 0
        
    def render(self, mode="human"):
        """
            Print the ev charging station status
        """
        
        print ("Ampere SB -> Capacity: {}, State of Charge: {}, Consumption: {}".format(self.capacity, self.soc, self.consumption))
    
    def reset(self):
        self.soc = self.capacity 
        self.consumption = 0

In [8]:
sb = StorageBattery()
sb.render()
sb.charge12()
sb.render()
sb.discharge12()
sb.render()
sb.charge24()
sb.render()
sb.discharge36()
sb.render()
sb.discharge24()
sb.render()
sb.charge24()
sb.render()

Ampere SB -> Capacity: 120, State of Charge: 90.0, Consumption: 0
Ampere SB -> Capacity: 120, State of Charge: 94.0, Consumption: -4.0
Ampere SB -> Capacity: 120, State of Charge: 98.0, Consumption: 4
Ampere SB -> Capacity: 120, State of Charge: 104.0, Consumption: -6.0
Ampere SB -> Capacity: 120, State of Charge: 113.0, Consumption: 9
Ampere SB -> Capacity: 120, State of Charge: 119.0, Consumption: 6
Ampere SB -> Capacity: 120, State of Charge: 120, Consumption: -1.0


### Aditional functions

In [9]:
def compute_avg_return(environment, policy, num_episodes=10):
    
    total_return = 0.0
    for _ in range(num_episodes):

        time_step = environment.reset()
        episode_return = 0.0

        while not time_step.is_last():
            action_step = policy.action(time_step)
            time_step = environment.step(action_step.action)
            episode_return += time_step.reward
            total_return += episode_return

    avg_return = total_return / num_episodes
    return avg_return.numpy()[0]

## Scenario

In [10]:
class RealScenario(py_environment.PyEnvironment):
    
    def __init__(self, objective_curve, flexibility = 15.0):
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=26, name='action')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=(1,), dtype=np.int32, minimum=0, name='observation')
        #Current step
        self._state = 0
        self._episode_ended = False        

        #Set base_consumption
        self.objective_curve = objective_curve
        #Set flexibility 
        self.flexibility = flexibility        
        #Air conditioning of the room
        self.ac = Airconditioning()
        #Storage battery
        self.sb = StorageBattery()
        # EV Charging Station
        self.cs = ChargingStation()
        self.cs.carArrival(1)
        self.cs.carArrival(2)
        self.consumption = 0
        self.cumulative_consumption = 0
    
    def get_cumulative_consumption(self):
        return self.cumulative_consumption

    def get_consumption(self):
        return self.consumption

    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec
    def current_time_step(self):
        """Returns the current `TimeStep`."""
        return self._current_time_step()

    def _reset(self):
        self._state = 0
        self._episode_ended = False
        self.ac = Airconditioning()
        #Storage battery
        self.sb = StorageBattery()
        # EV Charging Station
        self.cs = ChargingStation()
        self.cs.carArrival(1)
        self.cs.carArrival(2)
        self.consumption = 0
        self.cumulative_consumption = 0
        return ts.restart(np.array([self.objective_curve[self._state]], dtype=np.int32))

    def _step(self, action):
        self.ac.turnOff()
        self.cs.stop()
        self.sb.stop()
        if self._episode_ended:
            self._reset()
         # Do nothing.
        if action == 0:

            c = self.ac.turnOff()
        #Small change temperature
        elif action == 1:
            c = self.ac.smallChange()
        #Big Change temperature 
        elif action == 2:
            c = self.ac.bigChange()
        #EV Charge
        elif action == 3:
            c = self.cs.charge(1) + self.ac.turnOff()
        #EV Discharge
        elif action == 4:
            c = self.cs.discharge(1) + self.ac.turnOff()
        #SB Charge12
        elif action == 5:
            c = self.sb.charge12() + self.ac.turnOff()
        #SB Charge24
        elif action == 6:
            c = self.sb.charge24() + self.ac.turnOff()
        #SB Charge36
        elif action == 7:
            c = self.sb.charge36() + self.ac.turnOff()
        #SB Discharge12
        elif action == 8:
            c = self.sb.discharge12() + self.ac.turnOff()
        #SB Discharge24
        elif action == 9:
            c = self.sb.discharge24() + self.ac.turnOff()
        #SB Discharge24
        elif action == 10:
            c = self.sb.discharge24() + self.ac.turnOff()
        #Small Change Temperature and EV Charge
        elif action == 11:
            c = self.ac.smallChange() + self.cs.charge(1)
        #Small Change Temperature and EV Discharge
        elif action == 12:
            c = self.ac.smallChange() + self.cs.discharge(1)
        #Small Change Temperature and SB Charge12
        elif action == 13:
            c = self.ac.smallChange() + self.sb.charge12()
        #Small Change Temperature and SB Charge24
        elif action == 14:
            c = self.ac.smallChange() + self.sb.charge24()
        #Small Change Temperature and SB Charge36
        elif action == 15:
            c = self.ac.smallChange() + self.sb.charge36()
        #Small Change Temperature and SB Discharge12
        elif action == 16:
            c = self.ac.smallChange() + self.sb.discharge12()
        #Small Change Temperature and SB Discharge24
        elif action == 17:
            c = self.ac.smallChange() + self.sb.discharge24()
        #Small Change Temperature and SB Discharge24
        elif action == 18:
            c = self.ac.smallChange() + self.sb.discharge24()
        #Big Change Temperature and EV Charge
        elif action == 19:
            c = self.ac.bigChange() + self.cs.charge(1)
        #Big Change Temperature and EV Discharge
        elif action == 20:
            c = self.ac.bigChange() + self.cs.discharge(1)
        #Big Change Temperature and SB Charge12
        elif action == 21:
            c = self.ac.bigChange() + self.sb.charge12()
        #Big Change Temperature and SB Charge24
        elif action == 22:
            c = self.ac.bigChange() + self.sb.charge24()
        #Big Change Temperature and SB Charge36
        elif action == 23:
            c = self.ac.bigChange() + self.sb.charge36()
        #Big Change Temperature and SB Discharge12
        elif action == 24:
            c = self.ac.bigChange() + self.sb.discharge12()
        #Big Change Temperature and SB Discharge24
        elif action == 25:
            c = self.ac.bigChange() + self.sb.discharge24()
        #Big Change Temperature and SB Discharge24
        elif action == 26:
            c = self.ac.bigChange() + self.sb.discharge24()
        #EV Charge and SB Charge12
        elif action == 27:
            c = self.cs.charge(1) + self.sb.charge12() + self.ac.turnOff()
        #EV Charge and SB Charge24
        elif action == 28:
            c = self.cs.charge(1) + self.sb.charge24() + self.ac.turnOff()
        #EV Charge and SB Charge36
        elif action == 29:
            c = self.cs.charge(1) + self.sb.charge36() + self.ac.turnOff()
        #EV Charge and SB Discharge12
        elif action == 30:
            c = self.cs.charge(1) + self.sb.discharge12() + self.ac.turnOff()
        #EV Charge and SB Discharge24
        elif action == 31:
            c = self.cs.charge(1) + self.sb.discharge24() + self.ac.turnOff()
        #EV Charge and SB Discharge24
        elif action == 32:
            c = self.cs.charge(1) + self.sb.discharge24() + self.ac.turnOff()
        #EV Discharge and SB Charge12
        elif action == 33:
            c = self.cs.discharge(1) + self.sb.charge12() + self.ac.turnOff()
        #EV Discharge and SB Charge24
        elif action == 34:
            c = self.cs.discharge(1) + self.sb.charge24() + self.ac.turnOff()
        #EV Discharge and SB Charge36
        elif action == 35:
            c = self.cs.discharge(1) + self.sb.charge36() + self.ac.turnOff()
        #EV Discharge and SB Discharge12
        elif action == 36:
            c = self.cs.discharge(1) + self.sb.discharge12() + self.ac.turnOff()
        #EV Discharge and SB Discharge24
        elif action == 37:
            c = self.cs.discharge(1) + self.sb.discharge24() + self.ac.turnOff()
        #EV Discharge and SB Discharge24
        elif action == 38:
            c = self.cs.discharge(1) + self.sb.discharge24() + self.ac.turnOff()
        #Small Change Temperature, EV Charge and SB Charge12
        elif action == 39:
            c = self.ac.smallChange() + self.cs.charge(1) + self.sb.charge12()
        #Small Change Temperature, EV Charge and SB Charge24
        elif action == 40:
            c = self.ac.smallChange() + self.cs.charge(1) + self.sb.charge24()
        #Small Change Temperature, EV Charge and SB Charge36
        elif action == 41:
            c = self.ac.smallChange() + self.cs.charge(1) + self.sb.charge36()
        #Small Change Temperature, EV Charge and SB Discharge12
        elif action == 42:
            c = self.ac.smallChange() + self.cs.charge(1) + self.sb.discharge12()
        #Small Change Temperature, EV Charge and SB Discharge24
        elif action == 43:
            c = self.ac.smallChange() + self.cs.charge(1) + self.sb.discharge24()
        #Small Change Temperature, EV Charge and SB Discharge24
        elif action == 44:
            c = self.ac.smallChange() + self.cs.charge(1) + self.sb.discharge24()
        #Big Change Temperature, EV Charge and SB Charge12
        elif action == 45:
            c = self.ac.bigChange() + self.cs.charge(1) + self.sb.charge12()
        #Big Change Temperature, EV Charge and SB Charge24
        elif action == 46:
            c = self.ac.bigChange() + self.cs.charge(1) + self.sb.charge24()
        #Big Change Temperature, EV Charge and SB Charge36
        elif action == 47:
            c = self.ac.bigChange() + self.cs.charge(1) + self.sb.charge36()
        #Big Change Temperature, EV Charge and SB Discharge12
        elif action == 48:
            c = self.ac.bigChange() + self.cs.charge(1) + self.sb.discharge12()
        #Big Change Temperature, EV Charge and SB Discharge24
        elif action == 49:
            c = self.ac.bigChange() + self.cs.charge(1) + self.sb.discharge24()
        #Big Change Temperature, EV Charge and SB Discharge24
        elif action == 50:
            c = self.ac.bigChange() + self.cs.charge(1) + self.sb.discharge24()
        val = self.objective_curve[self._state] + c
        self.consumption = c
        self.cumulative_consumption += val
            

        if abs(val) > self.objective_curve[self._state] * (self.flexibility)/100 or self._state == len(self.objective_curve)-1 :
            self._episode_ended = True 
            reward = 5*len(self.objective_curve) if self._state == len(self.objective_curve)-1 else -50*(len(self.objective_curve)-self._state)
            return ts.termination(np.array([self.objective_curve[self._state]], dtype=np.int32), reward)
        elif val == 0:
            reward = 1
        else:
            reward =  -abs(val)
        
        

        self._state += 1
        return ts.transition(np.array([self.objective_curve[self._state]], dtype=np.int32), reward=reward)

        
    def render(self, mode = 'human'):
        print("\n")
        print("Current_step {}, Objective curve {},  real consumption {}, ended: {}".format(self._state, self.objective_curve[self._state], self.consumption, self._episode_ended))
        self.sb.render()
        self.ac.render()
        self.cs.render()


### Global vars

In [11]:
num_iterations = 20000  

initial_collect_steps = 10000 
collect_steps_per_iteration = 1  
replay_buffer_capacity = 10000  

fc_layer_params = (100,)

batch_size = 128  
learning_rate = 1e-3  
log_interval = 1000  

num_eval_episodes = 100  
eval_interval = 1000 
objective_curve = [30.0,  45.0, 55.0, 32.0]

env = RealScenario(objective_curve, 20.0)
utils.validate_py_environment(env, episodes=55)
action_values = ["Do nothing", "Small change temperature", "Big Change temperature", "EV Charge", "EV Discharge", "SB Charge12", "SB Charge24" ,"SB Charge36", "SB Discharge12" ,"SB Discharge24", "SB Discharge36", "Small Change Temperature and EV Charge", "Small Change Temperature and EV Discharge", "Small Change Temperature and SB Charge12", "Small Change Temperature and SB Charge24", "Small Change Temperature and SB Charge36", "Small Change Temperature and SB Discharge12", "Small Change Temperature and SB Discharge24", "Small Change Temperature and SB Discharge36", "Big Change Temperature and EV Charge", "Big Change Temperature and EV Discharge", "Big Change Temperature and SB Charge12", "Big Change Temperature and SB Charge24", "#Big Change Temperature and SB Charge36", "Big Change Temperature and SB Discharge12", "Big Change Temperature and SB Discharge24", "Big Change Temperature and SB Discharge36", "EV Charge and SB Charge12", "EV Charge and SB Charge24", "EV Charge and SB Charge36", "EV Charge and SB Discharge12", "EV Charge and SB Discharge24", "EV Charge and SB Discharge36", "EV Discharge and SB Charge12", "EV Discharge and SB Charge24", "EV Discharge and SB Charge36", "EV Discharge and SB Discharge12", "EV Discharge and SB Discharge24", "EV Discharge and SB Discharge36", "Small Change Temperature, EV Charge and SB Charge12", "Small Change Temperature, EV Charge and SB Charge24", "Small Change Temperature, EV Charge and SB Charge36", "EV Charge and SB Discharge12", "Small Change Temperature, EV Charge and SB Discharge24", "Small Change Temperature, EV Charge and SB Discharge36", "Big Change Temperature, EV Charge and SB Charge12", "Big Change Temperature, EV Charge and SB Charge24", "Big Change Temperature, EV Charge and SB Charge36", "Big Change Temperature, EV Charge and SB Discharge12", "Big Change Temperature, EV Charge and SB Discharge24",  "Big Change Temperature, EV Charge and SB Discharge36"]


In [12]:
train_py_env = wrappers.TimeLimit(RealScenario(objective_curve, 20.0), duration=500)
eval_py_env = wrappers.TimeLimit(RealScenario(objective_curve, 20.0), duration=500)
train_env = tf_py_environment.TFPyEnvironment(train_py_env)
eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)
optimizer = tf.compat.v1.train.AdamOptimizer(learning_rate=learning_rate)

train_step_counter = tf.compat.v2.Variable(0)

q_net = q_network.QNetwork(
    train_env.observation_spec(),
    train_env.action_spec(),
    fc_layer_params=fc_layer_params)

tf_agent = DqnAgent(
        train_env.time_step_spec(),
        train_env.action_spec(),
        q_network=q_net,
        optimizer=optimizer,
        td_errors_loss_fn = common.element_wise_squared_loss,
        train_step_counter=train_step_counter)



tf_agent.initialize()
eval_policy = tf_agent.policy
collect_policy = tf_agent.collect_policy

replay_buffer = tf_uniform_replay_buffer.TFUniformReplayBuffer(
        data_spec=tf_agent.collect_data_spec,
        batch_size=train_env.batch_size,
        max_length=replay_buffer_capacity)

replay_observer = [replay_buffer.add_batch]

train_metrics = [
            tf_metrics.EnvironmentSteps(),
            tf_metrics.AverageReturnMetric(),

]
dataset = replay_buffer.as_dataset(
        num_parallel_calls=3,
        sample_batch_size=batch_size, single_deterministic_pass=False,
num_steps=2).prefetch(3)
driver = dynamic_step_driver.DynamicStepDriver(
            train_env,
            collect_policy,
            observers=replay_observer + train_metrics,
    num_steps=1)
print(compute_avg_return(eval_env, tf_agent.policy, num_eval_episodes))

tf_agent.train = common.function(tf_agent.train)
tf_agent.train_step_counter.assign(0)
final_time_step, policy_state = driver.run()
iterator = iter(dataset)

2022-06-02 09:27:57.672772: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-06-02 09:27:57.798324: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-02 09:27:57.798832: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-06-02 09:27:57.799317: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:939] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zer

Instructions for updating:
Use `as_dataset(..., single_deterministic_pass=False) instead.


2022-06-02 09:27:58.747777: I tensorflow/stream_executor/cuda/cuda_blas.cc:1774] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.


-200.0


### Model training

In [13]:
for i in range(10000):
    final_time_step, _ = driver.run(final_time_step, policy_state)

episode_len = []
step_len = []
for i in range(num_iterations):
    final_time_step, _ = driver.run(final_time_step, policy_state)
    experience, _ = next(iterator)
    train_loss = tf_agent.train(experience=experience)
    step = tf_agent.train_step_counter.numpy()

    if step % log_interval == 0:
        print('step = {0}: loss = {1}'.format(step, train_loss.loss))
        step_len.append(step)

    if step % eval_interval == 0:
        avg_return = compute_avg_return(eval_env, tf_agent.policy, num_eval_episodes)
        print('step = {0}: AVG REWARDS = {1}'.format(step, avg_return))



Instructions for updating:
back_prop=False is deprecated. Consider using tf.stop_gradient instead.
Instead of:
results = tf.foldr(fn, elems, back_prop=False)
Use:
results = tf.nest.map_structure(tf.stop_gradient, tf.foldr(fn, elems))
step = 1000: loss = 222.59190368652344
step = 1000: AVG REWARDS = -136.0
step = 2000: loss = 1227.199462890625
step = 2000: AVG REWARDS = -136.0
step = 3000: loss = 1619.2086181640625
step = 3000: AVG REWARDS = -136.0
step = 4000: loss = 1410.5048828125
step = 4000: AVG REWARDS = -200.0
step = 5000: loss = 883.6013793945312
step = 5000: AVG REWARDS = -136.0
step = 6000: loss = 755.2884521484375
step = 6000: AVG REWARDS = -136.0
step = 7000: loss = 563.525390625
step = 7000: AVG REWARDS = -115.0
step = 8000: loss = 495.7237854003906
step = 8000: AVG REWARDS = -130.0
step = 9000: loss = 681.1240844726562
step = 9000: AVG REWARDS = -121.0
step = 10000: loss = 270.25518798828125
step = 10000: AVG REWARDS = -130.0
step = 11000: loss = 222.82810974121094
step = 

### Model evaluation

In [14]:
action_list = []
env.reset()
cont = 0


time_step = eval_env._reset()

while not time_step.is_last():
    action_step = tf_agent.policy.action(time_step)
    action_list.append(tf.get_static_value(action_step.action[0]))
    time_step = eval_env.step(action_step.action)
    env.step(action_step.action)
    print("Current consumption: {}".format(objective_curve[cont]))
    print("Consumption: {}".format(env.get_consumption()))
    print("Cumulative consumption: {}".format(env.get_cumulative_consumption()))
    print("Action done: {}".format(action_values[action_step.action[0]]))
    print("")
    cont+=1


Current consumption: 30.0
Consumption: -36.0
Cumulative consumption: -6.0
Action done: Small Change Temperature and SB Discharge12

Current consumption: 45.0
Consumption: -54.0
Cumulative consumption: -15.0
Action done: Big Change Temperature and SB Discharge36

Current consumption: 55.0
Consumption: -44.0
Cumulative consumption: -4.0
Action done: Small Change Temperature and SB Charge12

Current consumption: 32.0
Consumption: -36.0
Cumulative consumption: -8.0
Action done: Small Change Temperature and SB Discharge12

