<a href="https://colab.research.google.com/github/itsmepriyabrata/priyabrata_ai_python/blob/main/Reinforcement%20learning%20algorithms.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Soft Actor Critic

In [None]:
import gym
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

class SACAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.gamma = 0.99
        self.tau = 0.005
        self.alpha = 0.2
        self.learning_rate = 0.001
        self.buffer_size = 100000
        self.batch_size = 64
        self.policy_net, self.q1_net, self.q2_net, self.target_q1_net, self.target_q2_net = self.build_networks()
        self.replay_buffer = deque(maxlen=self.buffer_size)

    def build_networks(self):
        policy_net = self.build_model()
        q1_net = self.build_model()
        q2_net = self.build_model()
        target_q1_net = self.build_model()
        target_q2_net = self.build_model()
        target_q1_net.set_weights(q1_net.get_weights())
        target_q2_net.set_weights(q2_net.get_weights())
        return policy_net, q1_net, q2_net, target_q1_net, target_q2_net

    def build_model(self):
        model = Sequential()
        model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def act(self, state):
        probs = self.policy_net.predict(state)[0]
        action = np.random.choice(self.action_size, p=probs)
        return action

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            done = False
            while not done:
                action = self.act(state[None, :])
                next_state, reward, done, _ = self.env.step(action)
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if len(self.replay_buffer) >= self.batch_size:
                    self.update_policy()
            print(f"Episode: {episode}, Reward: {reward}")

    def update_policy(self):
        batch = np.random.sample(self.replay_buffer, self.batch_size)
        states, actions, rewards, next_states, dones = map(np.array, zip(*batch))
        q1_target = self.q1_net.predict(states)
        q2_target = self.q2_net.predict(states)
        q1_target[np.arange(self.batch_size), actions] = rewards + self.gamma * (1 - dones) * np.min(
            [self.target_q1_net.predict(next_states), self.target_q2_net.predict(next_states)], axis=0).max(axis=1)
        q2_target[np.arange(self.batch_size), actions] = rewards + self.gamma * (1 - dones) * np.min(
            [self.target_q1_net.predict(next_states), self.target_q2_net.predict(next_states)], axis=0).max(axis=1)
        self.q1_net.fit(states, q1_target, epochs=1, verbose=0)
        self.q2_net.fit(states, q2_target, epochs=1, verbose=0)
        q_value = np.min([q1_target, q2_target], axis=0).mean()
        policy_loss = -q_value
        self.policy_net.fit(states, actions, epochs=1, verbose=0)
        self.update_target_networks()

    def update_target_networks(self):
        q1_weights = self.q1_net.get_weights()
        q2_weights = self.q2_net.get_weights()
        target_q1_weights = self.target_q1_net.get_weights()
        target_q2_weights = self.target_q2_net.get_weights()
        for i in range(len(q1_weights)):
            target_q1_weights[i] = self.tau * q1_weights[i] + (1 - self.tau) * target_q1_weights[i]
            target_q2_weights[i] = self.tau * q2_weights[i] + (1 - self.tau) * target_q2_weights[i]
        self.target_q1_net.set_weights(target_q1_weights)
        self.target_q2_net.set_weights(target_q2_weights)

if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    agent = SACAgent(env)
    agent.train(episodes=1000)

SARSA( state action reward state action)

In [None]:
import gym
import numpy as np

class SARSAAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.1
        self.q_table = np.zeros((self.state_size, self.action_size))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return self.env.action_space.sample()
        else:
            return np.argmax(self.q_table[state])

    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            action = self.act(state)
            done = False
            while not done:
                next_state, reward, done, _ = self.env.step(action)
                next_action = self.act(next_state)
                self.q_table[state][action] += self.learning_rate * (reward + self.gamma * self.q_table[next_state][next_action] - self.q_table[state][action])
                state = next_state
                action = next_action
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            print(f"Episode: {episode}, Reward: {reward}")

if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    agent = SARSAAgent(env)
    agent.train(episodes=1000)

Double DQN

In [None]:
import gym
import numpy as np
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

class DoubleDQNAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.buffer_size = 100000
        self.batch_size = 64
        self.model, self.target_model = self.build_model()
        self.replay_buffer = deque(maxlen=self.buffer_size)

    def build_model(self):
        model = Sequential()
        model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        target_model = Sequential()
        target_model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        target_model.add(Dense(64, activation='relu'))
        target_model.add(Dense(self.action_size, activation='linear'))
        target_model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model, target_model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return self.env.action_space.sample()
        else:
            return np.argmax(self.model.predict(state)[0])

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            state = np.reshape(state, [1, self.state_size])
            done = False
            while not done:
                action = self.act(state)
                next_state, reward, done, _ = self.env.step(action)
                next_state = np.reshape(next_state, [1, self.state_size])
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if len(self.replay_buffer) >= self.batch_size:
                    self.update_policy()
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            print(f"Episode: {episode}, Reward: {reward}")

    def update_policy(self):
        batch = np.random.sample(self.replay_buffer, self.batch_size)
        states, actions, rewards, next_states, dones = map(np.array, zip(*batch))
        q_values = self.model.predict(states)
        next_q_values = self.model.predict(next_states)
        next_actions = np.argmax(next_q_values, axis=1)
        target_q_values = self.target_model.predict(next_states)
        q_values[np.arange(self.batch_size), actions] = rewards + self.gamma * (1 - dones) * target_q_values[np.arange(self.batch_size), next_actions]
        self.model.fit(states, q_values, epochs=1, verbose=0)
        self.update_target_model()

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    agent = DoubleDQNAgent(env)
    agent.train(episodes=1000)

Dueling DQN

In [None]:
import gym
import numpy as np
from collections import deque
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input, Add
from tensorflow.keras.optimizers import Adam

class DuelingDQNAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.buffer_size = 100000
        self.batch_size = 64
        self.model, self.target_model = self.build_model()
        self.replay_buffer = deque(maxlen=self.buffer_size)

    def build_model(self):
        state_input = Input((self.state_size,))
        x = Dense(64, activation='relu')(state_input)
        x = Dense(64, activation='relu')(x)
        value = Dense(1)(x)
        advantage = Dense(self.action_size)(x)
        output = Add()([value, advantage])
        model = Model(inputs=state_input, outputs=output)
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        target_model = Model(inputs=state_input, outputs=output)
        target_model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model, target_model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return self.env.action_space.sample()
        else:
            return np.argmax(self.model.predict(state)[0])

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            state = np.reshape(state, [1, self.state_size])
            done = False
            while not done:
                action = self.act(state)
                next_state, reward, done, _ = self.env.step(action)
                next_state = np.reshape(next_state, [1, self.state_size])
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if len(self.replay_buffer) >= self.batch_size:
                    self.update_policy()
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            print(f"Episode: {episode}, Reward: {reward}")

    def update_policy(self):
        batch = np.random.sample(self.replay_buffer, self.batch_size)
        states, actions, rewards, next_states, dones = map(np.array, zip(*batch))
        q_values = self.model.predict(states)
        next_q_values = self.target_model.predict(next_states)
        q_values[np.arange(self.batch_size), actions] = rewards + self.gamma * (1 - dones) * np.max(next_q_values, axis=1)
        self.model.fit(states, q_values, epochs=1, verbose=0)
        self.update_target_model()

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    agent = DuelingDQNAgent(env)
    agent.train(episodes=1000)

Rainbow DQN

In [None]:
import gym
import numpy as np
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

class RainbowDQNAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.buffer_size = 100000
        self.batch_size = 64
        self.model, self.target_model = self.build_model()
        self.replay_buffer = deque(maxlen=self.buffer_size)

    def build_model(self):
        model = Sequential()
        model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        model.add(Dense(64, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        target_model = Sequential()
        target_model.add(Dense(64, input_dim=self.state_size, activation='relu'))
        target_model.add(Dense(64, activation='relu'))
        target_model.add(Dense(self.action_size, activation='linear'))
        target_model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model, target_model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return self.env.action_space.sample()
        else:
            return np.argmax(self.model.predict(state)[0])

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            state = np.reshape(state, [1, self.state_size])
            done = False
            while not done:
                action = self.act(state)
                next_state, reward, done, _ = self.env.step(action)
                next_state = np.reshape(next_state, [1, self.state_size])
                self.remember(state, action, reward, next_state, done)
                state = next_state
                if len(self.replay_buffer) >= self.batch_size:
                    self.update_policy()
            if self.epsilon > self.epsilon_min:
                self.epsilon *= self.epsilon_decay
            print(f"Episode: {episode}, Reward: {reward}")

    def update_policy(self):
        batch = np.random.sample(self.replay_buffer, self.batch_size)
        states, actions, rewards, next_states, dones = map(np.array, zip(*batch))
        q_values = self.model.predict(states)
        next_q_values = self.target_model.predict(next_states)
        q_values[np.arange(self.batch_size), actions] = rewards + self.gamma * (1 - dones) * np.max(next_q_values, axis=1)
        self.model.fit(states, q_values, epochs=1, verbose=0)
        self.update_target_model()

    def update_target_model(self):
        self.target_model.set_weights(self.model.get_weights())

if __name__ == "__main__":
    env = gym.make('CartPole-v1')
    agent = RainbowDQNAgent(env)
    agent.train(episodes=1000)

TD-Lambda

In [None]:
import gym
import numpy as np
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

class TDLambdaAgent:
    def __init__(self, env):
        self.env = env
        self.state_size = env.observation_space.shape[0]
        self.action_size = env.action_space.n
        self.gamma = 0.99
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.buffer_size = 100000
        self.batch_size = 64
        self.model, self.target_model = self.build_model()
        self.replay_buffer = deque(maxlen=self.buffer_size)
        self.replay_buffer.append((state, action, reward, next_state, done))

    def build_model(self):
        model = Sequential()
        model.add(Dense(self.state_size, input_dim=self.state_size, activation='relu')
        model.add(Dense(self.state_size, input_dim=self.state_size, activation='relu')
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return self.env.action_space.sample()
        else:
            return np.argmax(self.model.predict(state)[0])

    def remember(self, state, action, reward, next_state, done):
        self.replay_buffer.append((state, action, reward, next_state, done))

    def train(self, episodes):
        for episode in range(episodes):
            state = self.env.reset()
            state = np.reshape(state, [1, self.state_size]
            done = False
            while not done:
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
state = next_state
                action = self.act(state)
                next_state, reward, done, _ = env.step(action)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done)
                state = next_state
                action = self.act(state)
                next_state, reward, next_state, done
                state = next_state, done
                action = self.act(state)
                next_state, reward, next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done.                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done
                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state = next_state, done.                state. done.                state = next_state, done.                state = next_state, done.                state = next_state. done.                state = next_state. done.                state = next_state. done.                state = next_state. done.                state. done.                state = next_state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done.                state. done. done.                state. done.                state. done. done.                state. done.                state. done.                state. done. done. done.                state. done.                state. done. done.                state. done. done.                state. done. done.                state. done.                state. done. done.                state. done. done.                state. done.                state. done.                state. done. done. done.                state. done.                state. done. done.                state. done.                state. done. done. done.                state. done. done.                state. done.                state. done.                state. done. done.                state. done.                state. done.                state. done. done.                state. done.                state. done. done.                state. done. done.                state. done.                state. done.                state. done. done.                state. done. done.                state. done.                state. done.                state. done. done.                state. done.                state. done.                state. done. done.                state. done.                state. done.                state. done.                state. done. done.                state. done.                state. done. done.                state. done.                state. done. done.                state. done.                state. done. done.                state. done. done.                state. done. done.                state. done.                state. done. done.                state. done. done.                state. done. done.                state. done.                state. done. done. done. done.                state. done. done.                state. done. done. done.                state. done. done.                state. done. done.                state. done.                state. done. done.                state. done. done.                state. done. done.                state. done. done. done. done.                state. done. done. done.                state. done. done. done.                state. done. done. done.                state. done.                state. done. done. done. done. done. done.                state. done. done.                state. done. done. done. done.                state. done. done. done.                state. done. done. done. done. done. done.                state. done. done. done. done.                state. done. done. done.                state. done. done. done.                state. done. done. done. done. done.                state. done. done. done. done.                state. done. done. done.   [ ]                state. done. done. done. done. done. done. done. done. done. done.  done. done. done. done. done. done. done.