In [1]:
import os
import numpy as np
import random as rn
import environment
import brain
import dqn
from ray import tune, air
from ray.air import session
from ray.tune.search.optuna import OptunaSearch
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune.search import ConcurrencyLimiter
from ray.tune import CLIReporter
# from tqdm import tqdm
os.environ["PYTHONHASHSEED"] = '0'
np.random.seed(42)
rn.seed(12345)

#Setting the parameters
# epsilon is exploration parameter
epsilon = 0.3
number_actions=5
direction_boundary = (number_actions -1)/2
number_epochs = 100
max_memory = 3000
batch_size = 512
temperature_step = 1.5


# choosing the model
train = True

2023-04-18 05:57:53.279303: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [2]:
def train_model(config):
    # building the environment by simply creating an object of the environment class
    env = environment.Environment(optimal_temperature = (18.0,24.0), initial_month = 0, initial_number_users = 20, initial_rate_data = 30)
    # building the brain by simpy creating an object of the brain class
    brain_model = brain.Brain(learning_rate = config['learning_rate'], weight_decay=config['weight_decay'] , layers=config['layers'],num_actions=5, dropout=config['dropout'], activation=config['activation'], optimizer = config['optimizer'])
    # building the DQN model by simpy creating an object of the DQN class
    dqn_model = dqn.DQN(max_memory = max_memory, discount = config['discount_factor'])
    env.train = train
    model = brain_model.model
    early_stopping = True
    patience = 10
    best_total_reward = -np.inf
    patience_count = 0
    if (env.train):
        # STARTING THE LOOP OVER ALL THE EPOCHS (1 Epoch = 5 Months)
        for epoch in range(1, number_epochs):
            # INITIALIAZING ALL THE VARIABLES OF BOTH THE ENVIRONMENT AND THE TRAINING LOOP
            total_reward = 0
            loss = 0.
            new_month = np.random.randint(0, 12)
            env.reset(new_month = new_month)
            game_over = False
            current_state, _, _ = env.observe()
            timestep = 0
            # STARTING THE LOOP OVER ALL THE TIMESTEPS (1 Timestep = 1 Minute) IN ONE EPOCH
            while ((not game_over) and timestep <= 5 * 30 * 24 * 60):
                # PLAYING THE NEXT ACTION BY EXPLORATION
                if np.random.rand() <= epsilon:
                    action = np.random.randint(0, number_actions)
                    if (action - direction_boundary < 0):
                        direction = -1
                    else:
                        direction = 1
                    energy_ai = abs(action - direction_boundary) * temperature_step
                # PLAYING THE NEXT ACTION BY INFERENCE
                else:
                    q_values = model.predict(current_state)
                    action = np.argmax(q_values[0])
                    if (action - direction_boundary < 0):
                        direction = -1
                    else:
                        direction = 1
                    energy_ai = abs(action - direction_boundary) * temperature_step
                # UPDATING THE ENVIRONMENT AND REACHING THE NEXT STATE
                next_state, reward, game_over = env.update_env(direction, energy_ai, int(timestep / (30 * 24 * 60)))
                total_reward += reward
                session.report({'total_reward':total_reward})
                # STORING THIS NEW TRANSITION INTO THE MEMORY
                dqn_model.remember([current_state, action, reward, next_state], game_over)
                # GATHERING IN TWO SEPARATE BATCHES THE INPUTS AND THE TARGETS
                inputs, targets = dqn_model.get_batch(model, batch_size = batch_size)
                # COMPUTING THE LOSS OVER THE TWO WHOLE BATCHES OF INPUTS AND TARGETS
                loss += model.train_on_batch(inputs, targets)
                timestep += 1
                current_state = next_state
            # PRINTING THE TRAINING RESULTS FOR EACH EPOCH
            print("\n")
            print("Epoch: {:03d}/{:03d}".format(epoch, number_epochs))
            print("Total Energy spent with an AI: {:.0f}".format(env.total_energy_ai))
            print("Total Energy spent with no AI: {:.0f}".format(env.total_energy_noai))
            
            # EARLY STOPPING
            if (early_stopping):
                if (total_reward <= best_total_reward):
                    patience_count += 1
                elif (total_reward > best_total_reward):
                    best_total_reward = total_reward
                    patience_count = 0
                if (patience_count >= patience):
                    print("Early Stopping")
                    break
    return {'total_reward':total_reward}
            

In [3]:
config = {
    "learning_rate": tune.loguniform(1e-4, 1e-1),
    "weight_decay": tune.loguniform(1e-4, 1e-1),
    "layers": tune.choice([[32, 20], [64, 32], [64, 64],[128, 32], [128, 64], [128, 128]]),
    "dropout": tune.uniform(0.1, 0.5),
    "activation": tune.choice(['relu', 'sigmoid']),
    'optimizer': tune.choice(['adam', 'sgd', 'rmsprop', 'adamw']),
    'discount_factor': tune.choice([0.7, 0.8, 0.9, 0.95, 0.99]),
}

algo = OptunaSearch()
algo = ConcurrencyLimiter(algo, max_concurrent=4)

resources_per_trial = {"cpu": 8, "gpu": 1}
scheduler = AsyncHyperBandScheduler(max_t=100, grace_period=3, reduction_factor=2)

reporter = CLIReporter(
    parameter_columns=list(config.keys()),
    metric_columns=["total_reward", "training_iteration"],
)
analysis = tune.run(
    train_model,
    resources_per_trial=resources_per_trial,
    metric="total_reward",
    mode="max",
    config=config,
    num_samples=100,
    search_alg=algo,
    scheduler=scheduler,
    name="reinforce_optuna",
    local_dir="~/ray_results",
    progress_reporter=reporter,
    verbose=1,
)

print('Best hyperparameters found were: ', analysis.best_config)


2023-04-18 05:36:53,249	INFO worker.py:1553 -- Started a local Ray instance.
[32m[I 2023-04-18 05:36:53,930][0m A new study created in memory with name: optuna[0m
[2m[36m(pid=975522)[0m 2023-04-18 05:36:54.976447: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


== Status ==
Current time: 2023-04-18 05:36:53 (running for 00:00:00.04)
Memory usage on this node: 3.9/31.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Resources requested: 8.0/8 CPUs, 1.0/1 GPUs, 0.0/16.88 GiB heap, 0.0/8.44 GiB objects (0.0/1.0 accelerator_type:A10G)
Result logdir: /home/ubuntu/ray_results/reinforce_optuna
Number of trials: 1/100 (1 RUNNING)
+----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------+
| Trial name           | status   | loc                  |   learning_rate |   weight_decay | layers     |   dropout | activation   | optimizer   |   discount_factor |
|----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------|
| train_mo

[2m[36m(train_model pid=975522)[0m 2023-04-18 05:36:56.370245: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
[2m[36m(train_model pid=975522)[0m 2023-04-18 05:36:56.370888: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
[2m[36m(train_model pid=975522)[0m 2023-04-18 05:36:56.406738: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[2m[36m(train_model pid=975522)[0m 2023-04-18 05:36:56.406897: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
[2m[36m(train_model pid=975522)[0m pciBusID: 0000:00:1e.0 name: NVIDIA A10G computeCapability: 8.6
[2m[36m(train_model pid=975522)[0m coreClock: 1.71GHz coreCount: 80 deviceMemorySize: 22.20GiB deviceMemoryBandwidth: 558.88GiB/

== Status ==
Current time: 2023-04-18 05:37:01 (running for 00:00:07.43)
Memory usage on this node: 4.3/31.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Resources requested: 8.0/8 CPUs, 1.0/1 GPUs, 0.0/16.88 GiB heap, 0.0/8.44 GiB objects (0.0/1.0 accelerator_type:A10G)
Result logdir: /home/ubuntu/ray_results/reinforce_optuna
Number of trials: 2/100 (1 PENDING, 1 RUNNING)
+----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------+
| Trial name           | status   | loc                  |   learning_rate |   weight_decay | layers     |   dropout | activation   | optimizer   |   discount_factor |
|----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------|

[2m[36m(train_model pid=975522)[0m 2023-04-18 05:40:45.330925: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1261] Device interconnect StreamExecutor with strength 1 edge matrix:
[2m[36m(train_model pid=975522)[0m 2023-04-18 05:40:45.330951: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1267]      0 
[2m[36m(train_model pid=975522)[0m 2023-04-18 05:40:45.330957: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1280] 0:   N 
[2m[36m(train_model pid=975522)[0m 2023-04-18 05:40:45.331209: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[2m[36m(train_model pid=975522)[0m 2023-04-18 05:40:45.331393: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[2m[36m(train_model pid=975522)[

== Status ==
Current time: 2023-04-18 05:40:50 (running for 00:03:56.62)
Memory usage on this node: 5.3/31.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Resources requested: 8.0/8 CPUs, 1.0/1 GPUs, 0.0/16.88 GiB heap, 0.0/8.44 GiB objects (0.0/1.0 accelerator_type:A10G)
Current best trial: 2d8ff7df with total_reward=-0.003 and parameters={'learning_rate': 0.0003255299575050117, 'weight_decay': 0.026921804480005525, 'layers': [128, 128], 'dropout': 0.3244561792079558, 'activation': 'tanh', 'optimizer': 'sgd', 'discount_factor': 0.8}
Result logdir: /home/ubuntu/ray_results/reinforce_optuna
Number of trials: 2/100 (1 PENDING, 1 RUNNING)
+----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------+----------------+----------------------+
| Trial name           | sta

[2m[36m(train_model pid=975522)[0m 2023-04-18 05:42:40.168090: W tensorflow/core/framework/op_kernel.cc:1763] OP_REQUIRES failed at cwise_op_gpu_base.cc:89 : Internal: Failed to load in-memory CUBIN: CUDA_ERROR_NO_BINARY_FOR_GPU: no kernel image is available for execution on the device
2023-04-18 05:42:40,240	ERROR trial_runner.py:1062 -- Trial train_model_2d8ff7df: Error processing event.
ray.exceptions.RayTaskError(InternalError): [36mray::ImplicitFunc.train()[39m (pid=975522, ip=172.31.19.218, repr=train_model)
  File "/home/ubuntu/anaconda3/envs/tf2/lib/python3.9/site-packages/ray/tune/trainable/trainable.py", line 368, in train
    raise skipped from exception_cause(skipped)
  File "/home/ubuntu/anaconda3/envs/tf2/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 337, in entrypoint
    return self._trainable_func(
  File "/home/ubuntu/anaconda3/envs/tf2/lib/python3.9/site-packages/ray/tune/trainable/function_trainable.py", line 654, in _trainable_fun

== Status ==
Current time: 2023-04-18 05:42:40 (running for 00:05:46.78)
Memory usage on this node: 3.7/31.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Resources requested: 8.0/8 CPUs, 1.0/1 GPUs, 0.0/16.88 GiB heap, 0.0/8.44 GiB objects (0.0/1.0 accelerator_type:A10G)
Current best trial: 2d8ff7df with total_reward=-0.003 and parameters={'learning_rate': 0.0003255299575050117, 'weight_decay': 0.026921804480005525, 'layers': [128, 128], 'dropout': 0.3244561792079558, 'activation': 'tanh', 'optimizer': 'sgd', 'discount_factor': 0.8}
Result logdir: /home/ubuntu/ray_results/reinforce_optuna
Number of trials: 2/100 (1 ERROR, 1 RUNNING)
+----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------+----------------+----------------------+
| Trial name           | statu

[2m[36m(train_model pid=975890)[0m 2023-04-18 05:42:43.123015: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
[2m[36m(train_model pid=975890)[0m 2023-04-18 05:42:43.123904: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
[2m[36m(train_model pid=975890)[0m 2023-04-18 05:42:43.175232: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:941] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
[2m[36m(train_model pid=975890)[0m 2023-04-18 05:42:43.175387: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
[2m[36m(train_model pid=975890)[0m pciBusID: 0000:00:1e.0 name: NVIDIA A10G computeCapability: 8.6
[2m[36m(train_model pid=975890)[0m coreClock: 1.71GHz coreCount: 80 deviceMemorySize: 22.20GiB deviceMemoryBandwidth: 558.88GiB/

== Status ==
Current time: 2023-04-18 05:42:48 (running for 00:05:54.18)
Memory usage on this node: 4.2/31.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Resources requested: 8.0/8 CPUs, 1.0/1 GPUs, 0.0/16.88 GiB heap, 0.0/8.44 GiB objects (0.0/1.0 accelerator_type:A10G)
Current best trial: 2d8ff7df with total_reward=-0.003 and parameters={'learning_rate': 0.0003255299575050117, 'weight_decay': 0.026921804480005525, 'layers': [128, 128], 'dropout': 0.3244561792079558, 'activation': 'tanh', 'optimizer': 'sgd', 'discount_factor': 0.8}
Result logdir: /home/ubuntu/ray_results/reinforce_optuna
Number of trials: 3/100 (1 ERROR, 1 PENDING, 1 RUNNING)
+----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------+----------------+----------------------+
| Trial name       



== Status ==
Current time: 2023-04-18 05:44:53 (running for 00:07:59.27)
Memory usage on this node: 4.8/31.0 GiB 
Using AsyncHyperBand: num_stopped=0
Bracket: Iter 96.000: None | Iter 48.000: None | Iter 24.000: None | Iter 12.000: None | Iter 6.000: None | Iter 3.000: None
Resources requested: 8.0/8 CPUs, 1.0/1 GPUs, 0.0/16.88 GiB heap, 0.0/8.44 GiB objects (0.0/1.0 accelerator_type:A10G)
Current best trial: 2d8ff7df with total_reward=-0.003 and parameters={'learning_rate': 0.0003255299575050117, 'weight_decay': 0.026921804480005525, 'layers': [128, 128], 'dropout': 0.3244561792079558, 'activation': 'tanh', 'optimizer': 'sgd', 'discount_factor': 0.8}
Result logdir: /home/ubuntu/ray_results/reinforce_optuna
Number of trials: 3/100 (1 ERROR, 1 PENDING, 1 RUNNING)
+----------------------+----------+----------------------+-----------------+----------------+------------+-----------+--------------+-------------+-------------------+----------------+----------------------+
| Trial name       