In [2]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [534]:
!pip install imageio

Collecting imageio
  Downloading imageio-2.9.0-py3-none-any.whl (3.3 MB)
[K     |################################| 3.3 MB 1.3 MB/s eta 0:00:01
Installing collected packages: imageio
Successfully installed imageio-2.9.0
You should consider upgrading via the '/usr/bin/python3 -m pip install --upgrade pip' command.[0m


In [535]:
import matplotlib.pyplot as plt
import cv2
import os
import glob
import imageio
from IPython.display import HTML, Image

## Environment

In [5]:
UP = (-1, 0)
DOWN = (1, 0)
LEFT = (0, -1)
RIGHT = (0, 1)
STAY = (0, 0)
ACTIONS = [UP, DOWN, LEFT, RIGHT, STAY]

In [6]:
def num2act(num):
    return ACTIONS[num]

In [582]:
class Pangea:
    def __init__(self, field_size):
        self.field_size = field_size
        self.field = np.random.rand(field_size+4, field_size+4)
        self.reset_field()
    
    def perception(self):
        px = self.agent_pos[0]
        py = self.agent_pos[1]
        percep = self.field[px-2:px+3, py-2:py+3].copy()
        return percep
        
    def move_and_consume(self, action):
        self.agent_pos[0] += action[0]
        self.agent_pos[1] += action[1]
        food = self.field[self.agent_pos[0], self.agent_pos[1]]
        self.field[self.agent_pos[0], self.agent_pos[1]] = 0
        return self.perception(), food
        
    def act(self, action):
        y = self.agent_pos[0]
        x = self.agent_pos[1]
        y += action[0]
        x += action[1]
        if self.field[y, x] == -1:
            return self.perception(), 0
        return self.move_and_consume(action)
    
    def show_field(self):
        field = self.field.copy()
        field[self.agent_pos[0], self.agent_pos[1]] = -3
        plt.imshow(field)
    
    def get_field(self):
        field = self.field.copy()
        field[self.agent_pos[0], self.agent_pos[1]] = -3
        return field
        
    def reset_field(self):
        fs = self.field_size
        self.field = np.random.rand(fs+4, fs+4)
        self.field[self.field < 0.8] = 0
        self.field[0:2,:] = -1
        self.field[fs+2:,:] = -1
        self.field[:,0:2] = -1
        self.field[:,fs+2:] = -1
        self.agent_pos = [15, 15]
        return self.perception()

## Agent 

In [259]:
class Zoe:
    def __init__(self):
        input_size = (5, 5)
        num_actions = 4 # up, down, left, right, stay
        num_hidden = 256
        inputs = layers.Input(shape=(input_size))
        x = layers.Flatten()(inputs)
        x = layers.Dense(num_hidden, activation="relu")(x)
        common = layers.Dense(num_hidden, activation="relu")(x)
        action = layers.Dense(num_actions, activation="softmax")(common)
        critic = layers.Dense(1)(common)
        self.model = keras.Model(inputs=inputs, outputs=[action, critic])
        self.optimizer = keras.optimizers.Adam(learning_rate=0.0005)
        self.huber_loss = keras.losses.Huber()
        self.action_probs_history = []
        self.critic_value_history = []
        self.rewards_history = []
        self.digested = 0
    
    def decision(self, percep):
        return self.model(np.expand_dims(percep, 0))

## Train

In [604]:
gamma = 0.99
steps_per_day = 500
eps = np.finfo(np.float32).eps.item()  # Smallest number such that 1.0 + eps != 1.0

In [605]:
num_days = 1000
pan = Pangea(32)
zoe = Zoe()

In [591]:
zoe.model = tf.keras.models.load_model("model")



In [606]:
running_reward = 0
for d in range(num_days):
    percep = pan.reset_field()
    day_reward = 0
    with tf.GradientTape() as tape:
        zoe.digested = 10
        for timestep in range(1, steps_per_day):
            if percep.shape != (5,5):
                print(percep.shape)
                print(pan.agent_pos)
            action_probs, critic_value = zoe.decision(percep)
            zoe.critic_value_history.append(critic_value[0, 0])
            action = np.random.choice(4, p=np.squeeze(action_probs))
            zoe.action_probs_history.append(tf.math.log(action_probs[0, action]))
            percep, food = pan.act(num2act(action))
            zoe.digested += food
            zoe.digested -= 0.3
            reward = zoe.digested
            zoe.rewards_history.append(reward)
            day_reward += reward
            
            if reward < 0:
                break
        running_reward = 0.05 * day_reward + (1 - 0.05) * running_reward
        returns = []
        discounted_sum = 0
        for r in zoe.rewards_history[::-1]:
            discounted_sum = r + gamma * discounted_sum
            returns.insert(0, discounted_sum)

        # Normalize
        returns = np.array(returns)
        returns = (returns - np.mean(returns)) / (np.std(returns) + eps)
        returns = returns.tolist()
        
        history = zip(zoe.action_probs_history, zoe.critic_value_history, returns)
        actor_losses = []
        critic_losses = []
        for log_prob, value, ret in history:
            diff = ret - value
            actor_losses.append(-log_prob * diff)  # actor loss
            # The critic must be updated so that it predicts a better estimate of
            # the future rewards.
            critic_losses.append(
                zoe.huber_loss(tf.expand_dims(value, 0), tf.expand_dims(ret, 0))
            )
        # Backpropagation
        loss_value = sum(actor_losses) + sum(critic_losses)
        grads = tape.gradient(loss_value, zoe.model.trainable_variables)
        zoe.optimizer.apply_gradients(zip(grads, zoe.model.trainable_variables))

        # Clear the loss and reward history
        zoe.action_probs_history.clear()
        zoe.critic_value_history.clear()
        zoe.rewards_history.clear()
        
    if d%10 ==0:
        template = "running reward: {:.2f} at episode {}"
        print(template.format(running_reward, d))
        print(f"Zoe digested: {zoe.digested}")
        zoe.digested = 0
        zoe.model.save('model')

running reward: 9.83 at episode 0
Zoe digested: -0.01651618529356791
INFO:tensorflow:Assets written to: model/assets
running reward: 102.59 at episode 10
Zoe digested: -0.1970384935136174
INFO:tensorflow:Assets written to: model/assets
running reward: 168.02 at episode 20
Zoe digested: -0.07931325897293362
INFO:tensorflow:Assets written to: model/assets
running reward: 196.39 at episode 30
Zoe digested: -0.17201967668729884
INFO:tensorflow:Assets written to: model/assets
running reward: 224.66 at episode 40
Zoe digested: -0.14073071051461722
INFO:tensorflow:Assets written to: model/assets
running reward: 227.16 at episode 50
Zoe digested: -0.17157272385379657
INFO:tensorflow:Assets written to: model/assets
running reward: 243.95 at episode 60
Zoe digested: -0.21302631633675234
INFO:tensorflow:Assets written to: model/assets
running reward: 261.76 at episode 70
Zoe digested: -0.1357667036861397
INFO:tensorflow:Assets written to: model/assets
running reward: 267.65 at episode 80
Zoe dige

In [607]:
percep = pan.reset_field()

In [524]:
def gallery(images, captionsList=None, row_height='auto'):
    """Shows a set of images in a gallery that flexes with the width of the notebook.
    
    Parameters
    ----------
    images: list of str or bytes
        URLs or bytes of images to display
        
    captions: list of str
        Additional captions to be showed below the image name

    row_height: str
        CSS height value to assign to all images. Set to 'auto' by default to show images
        with their native dimensions. Set to a value like '250px' to make all rows
        in the gallery equal height.
    """
    figures = []
    for i, image in enumerate(images):
        if captionsList == None:
            captions = []
        else:
            captions = captionsList[i]
        if isinstance(image, bytes):
            src = _src_from_data(image)
            caption = ''
        else:
            src = image
            caption = f'<figcaption style="font-size: 0.6em">{image}'
            for c in captions:
                caption += f'<br>{c}'
            caption += f'</figcaption>'
        figures.append(f'''
            <figure style="margin: 5px !important;">
              <img src="{src}" style="height: {row_height}">
              {caption}
            </figure>
        ''')
    return HTML(data=f'''
        <div style="display: flex; flex-flow: row wrap; text-align: center;">
        {''.join(figures)}
        </div>
    ''')

In [564]:
plt.ioff()

In [574]:
!rm "frames/*"

rm: cannot remove 'frames/*': No such file or directory


In [609]:
def create_gif(num_days, pan, zoe, event_name):
    percep = pan.perception()
    zoe.digested = 10
    for d in range(num_days):
        frame = pan.get_field()
        fig = plt.figure()
        plt.imshow(frame)
        plt.title(f'{d:04d}-{zoe.digested:2.3f}')
        fig.savefig(f'frames/frame{d:04d}.jpg')
        plt.close(fig)
        action_probs, critic_value = zoe.decision(percep)
        action = np.random.choice(4, p=np.squeeze(action_probs))
        percep, reward = pan.act(num2act(action))
        zoe.digested += reward - 0.3
    anim_file = os.path.join('gifs', event_name + '.gif')
    with imageio.get_writer(anim_file, mode='I') as writer:
        filenames = glob.glob('frames'+'/*.jpg')
        filenames = sorted(filenames)
        for filename in filenames:
            img = imageio.imread(filename)
            writer.append_data(img)
            os.remove(filename)
    return anim_file
    

In [610]:
pan = Pangea(32)

In [611]:
gif = create_gif(200, pan, zoe, "test")

In [612]:
gallery([gif])