In [1]:
import numpy as np
from numpy import random as rnd
from matplotlib import pyplot as plt
import seaborn as sns
import os,datetime

import pandas as pd
import tensorflow as tf

from sklearn import preprocessing as pp

from keras.models import Model,Sequential
from keras.layers import Input,Dense,Dropout,LSTM,Conv2D,advanced_activations,concatenate

import gym
import pygame
from pygame.locals import *

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


pygame 1.9.4
Hello from the pygame community. https://www.pygame.org/contribute.html


In [2]:
class Environment():
    
    def __init__(self,num_obstacles,num_greenbeans=1):
        self.screen = pygame.display.set_mode((1200,600))
        self.clock = pygame.time.Clock()
        
        self.obstacles = []
        for k in range(num_obstacles):
            crect = pygame.rect.Rect(rnd.randint(50,1100,1),rnd.randint(50,500,1),rnd.randint(20,100,1),rnd.randint(20,100,1))
            self.obstacles.append(crect)
        
    def draw_env(self):
        self.screen.fill(0)
        for obstacle in self.obstacles:
            pygame.draw.rect(self.screen,(0,255,0),obstacle)
        
    def draw_player(self,agent):
        brightness = 255
        pygame.draw.rect(self.screen,(0,0,brightness),agent.rect)
        pygame.draw.rect(self.screen,(255,255,255),agent.rect_t)
        pygame.draw.rect(self.screen,(255,255,255),agent.rect_r)
        pygame.draw.rect(self.screen,(255,255,255),agent.rect_l)
        pygame.draw.rect(self.screen,(255,255,255),agent.rect_b)

In [3]:
action_in = tf.placeholder(dtype=tf.float32,shape=(None,2))
pain_in = tf.placeholder(dtype=tf.float32,shape=(None,4))
dir_in = tf.placeholder(dtype=tf.float32,shape=(None,2))

qual_checked = tf.placeholder(dtype=tf.float32,shape=(None))

concat_vec = tf.concat([action_in,pain_in,dir_in],axis=1)
nn_in = tf.contrib.layers.flatten(concat_vec)

layer0 = tf.contrib.layers.fully_connected(nn_in,num_outputs=62,activation_fn=tf.nn.relu)
nn_out = tf.contrib.layers.fully_connected(layer0,num_outputs=1,activation_fn=tf.nn.tanh)

loss = tf.reduce_mean(tf.square(tf.subtract(nn_out,qual_checked)))
opt = tf.train.AdamOptimizer(learning_rate=0.1).minimize(loss)

In [4]:
class Agent():
    
    def __init__(self,sess,pos_init=(200,100),maxstep=5,size=(20,20),spots_offset=5,num_obstacles=5,stamina=100):
        
        self.actionstore = []
        self.painstore = []
        self.dirstore = []
        
        self.sess = sess 
        
        self.pos = pos_init
        self.maxstep = maxstep
        self.size = size
        self.spots_offset = spots_offset
        self.stamina = stamina
        
        self.pain = np.zeros([4,1])   # t,r,l,b
        self.dir = [0,0]
        
        self.rect = pygame.rect.Rect((self.pos[0],self.pos[1],self.size[0],self.size[1]))
        self.rect_t = pygame.rect.Rect((self.pos[0]+self.spots_offset,self.pos[1],self.size[0]-2*self.spots_offset,1))
        self.rect_r = pygame.rect.Rect((self.pos[0]+self.size[0]-1,self.pos[1]+self.spots_offset,1,self.size[1]-2*self.spots_offset))
        self.rect_l = pygame.rect.Rect((self.pos[0],self.pos[1]+self.spots_offset,1,self.size[1]-2*self.spots_offset))
        self.rect_b = pygame.rect.Rect((self.pos[0]+self.spots_offset,self.pos[1]+self.size[1]-1,self.size[0]-2*self.spots_offset,1))
        
    def get_pos(self):
        return (self.rect[0],self.rect[1])
        
    def move(self,env):
        screen_rect = env.screen.get_rect()
        if any([self.rect_t.colliderect(o) for o in env.obstacles]) or self.rect_t[1]==0:
            self.pain[0] += 1
            self.stamina -= 1
        else:
            if self.pain[0]>0:
                self.pain[0] -= 1
        if any([self.rect_r.colliderect(o) for o in env.obstacles]) or self.rect_r[0]+1==1200:
            self.pain[1] += 1
            self.stamina -= 1
        else:
            if self.pain[1]>0:
                self.pain[1] -= 1
        if any([self.rect_l.colliderect(o) for o in env.obstacles]) or self.rect_l[0]==0:
            self.pain[2] += 1
            self.stamina -= 1
        else:
            if self.pain[2]>0:
                self.pain[2] -= 1
        if any([self.rect_b.colliderect(o) for o in env.obstacles]) or self.rect_b[1]+1==600:
            self.pain[3] += 1
            self.stamina -= 1
        else:
            if self.pain[3]>0:
                self.pain[3] -= 1
        
        ddir = self.decide_action(env,self.pain,self.dir)
        self.dir += ddir
        if self.dir[0]>self.maxstep or self.dir[1]>self.maxstep:stamina-=1
        
        self.rect.move_ip(self.dir[0],0)
        self.rect.move_ip(0,self.dir[1])
        self.rect.clamp_ip(screen_rect)
        
        self.pos = (self.rect[0],self.rect[1]) 
        self.rect_t = pygame.rect.Rect((self.pos[0]+self.spots_offset,self.pos[1],self.size[0]-2*self.spots_offset,1))
        self.rect_r = pygame.rect.Rect((self.pos[0]+self.size[0]-1,self.pos[1]+self.spots_offset,1,self.size[1]-2*self.spots_offset))
        self.rect_l = pygame.rect.Rect((self.pos[0],self.pos[1]+self.spots_offset,1,self.size[1]-2*self.spots_offset))
        self.rect_b = pygame.rect.Rect((self.pos[0]+self.spots_offset,self.pos[1]+self.size[1]-1,self.size[0]-2*self.spots_offset,1))

    def decide_action(self,env,pain,cdir):
        
        possible_actions = np.array([[1,0],[0,1],[-1,0],[0,-1],[0,0]])
        quals = np.zeros(shape=[len(possible_actions)])
        
        for k in range(len(possible_actions)):
            
            action = possible_actions[k,:]
            a_qual = self.sess.run([nn_out],feed_dict={
                action_in:action.reshape(1,2),
                pain_in:pain.reshape(1,4),
                dir_in:np.array(cdir).reshape(1,2)
            })
            quals[k] = a_qual[0].flatten()[0]
            
        self.actionstore.append(action.reshape(1,2))
        self.painstore.append(pain.reshape(1,4))
        self.dirstore.append(np.array(cdir).reshape(1,2))
        
        chosen_action = possible_actions[np.argmax(quals,axis=0)]
        return chosen_action

In [6]:
num_obstacles = 5

with tf.Session() as sess: 

    init = tf.global_variables_initializer()
    sess.run(init)

    epoch_ctr = 0
    while epoch_ctr<5:
    
        try:
            pygame.init()
            env = Environment(num_obstacles)
            player = Agent(sess)
            
            running = True
            while running==True:

                env.draw_env()
                env.draw_player(player)
                player.move(env)
                if player.stamina==0:running=False
                pygame.display.flip()

                if any([e.type==pygame.KEYDOWN for e in pygame.event.get()]):running=False
                env.clock.tick(50)
                # THIS IS THE SNAPSHOT
                img = np.array(pygame.PixelArray(env.screen))
                # USE IT FOR REIN-L

            pygame.quit()

        except SystemExit:
            pygame.quit()
        
        # Update 
        actions = player.actionstore()
        pains = player.painstore()
        dirs = player.dirstore()
        
        # Select last couple of steps, make value vector beginning at -1
        # Run optimizer
        
        # End update
        
        epoch_ctr += 1