#

## Homeo:
#### Control System For Greenhouse Thermoregulation Using Water-Based Solar Thermal Storage

#####

Haixiao (Harry) Feng

#

Resouces/References:
- [OpenAI Gym Documentation](https://www.gymlibrary.ml/)

#

#### Installations & Dependencies

In [None]:
# '''
# Install dependencies
# '''

# !pip install gym
# !pip install gym[classic_control]

# # !pip install tensorflow==2.3.0
# !pip install tensorflow
# !pip install keras
# !pip install keras-rl2

In [None]:
'''
Import dependencies
'''

import numpy as np
import random
import warnings
warnings.filterwarnings("ignore")

import gym
from gym import Env
from gym.spaces import Discrete, Box

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

#

#### Custom Environment _(OpenAI Gym)_

In [None]:
'''
Possible actions to take:
    - Increase water flow
    - Decrease water flow
    - Maintain water flow
'''

class ShowerEnv(Env):
    def __init__(self):
        # Actions we can take, down, stay, up
        self.action_space = Discrete(3)
        # Temperature array
        self.observation_space = Box(low=np.array([0]), high=np.array([100]))
        # Set start temp
        self.state = 38 + random.randint(-3,3)
        # Set shower length
        self.shower_length = 60
        
    def step(self, action):
        # Apply action
        # 0 -1 = -1 temperature
        # 1 -1 = 0 
        # 2 -1 = 1 temperature 
        self.state += action -1 
        # Reduce shower length by 1 second
        self.shower_length -= 1 
        
        # Calculate reward
        if self.state >=37 and self.state <=39: 
            reward =1 
        else: 
            reward = -1 
        
        # Check if shower is done
        if self.shower_length <= 0: 
            done = True
        else:
            done = False
        
        # Apply temperature noise
        #self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        # Reset shower temperature
        self.state = 38 + random.randint(-3,3)
        # Reset shower time
        self.shower_length = 60 
        return self.state
    

In [None]:
env = ShowerEnv()

In [None]:
env.observation_space.sample()

array([50.94729], dtype=float32)

In [None]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

Episode:1 Score:-52
Episode:2 Score:-52
Episode:3 Score:-38
Episode:4 Score:-48
Episode:5 Score:-28
Episode:6 Score:-28
Episode:7 Score:-52
Episode:8 Score:-60
Episode:9 Score:-20
Episode:10 Score:-40


In [None]:
states = env.observation_space.shape
actions = env.action_space.n

#

#### Neural Network Modeling

In [None]:
def build_model(states, actions):
    model = Sequential()    
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
del model

NameError: name 'model' is not defined

In [None]:
model = build_model(states, actions)

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 24)                48        
                                                                 
 dense_1 (Dense)             (None, 24)                600       
                                                                 
 dense_2 (Dense)             (None, 3)                 75        
                                                                 
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________


#

#### Agent Bulid _(Keras-RL)_

In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 35:22 - reward: -1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   21/10000 [..............................] - ETA: 16:57 - reward: -0.4286

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   26/10000 [..............................] - ETA: 14:22 - reward: -0.153

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   36/10000 [..............................] - ETA: 11:22 - reward: 0.0000e+00

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)


166 episodes - episode_reward: -46.855 [-60.000, 6.000] - loss: 2.890 - mae: 10.864 - mean_q: -12.647

Interval 2 (10000 steps performed)
167 episodes - episode_reward: -39.473 [-60.000, 38.000] - loss: 2.875 - mae: 11.523 - mean_q: -16.503

Interval 3 (20000 steps performed)
167 episodes - episode_reward: -32.491 [-60.000, 44.000] - loss: 2.929 - mae: 12.390 - mean_q: -17.848

Interval 4 (30000 steps performed)
166 episodes - episode_reward: -28.663 [-60.000, 40.000] - loss: 3.453 - mae: 13.760 - mean_q: -19.942

Interval 5 (40000 steps performed)

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=True)

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

#

#### Test

In [None]:
del model
del dqn
del env

In [None]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
_ = dqn.test(env, nb_episodes=5, visualize=True)

#