## Libraries

In [1]:
import pdb;
import scipy.misc as scimisc

In [2]:

from tkinter import *
from PIL import Image
from PIL import ImageTk

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import matplotlib.animation as animation
from PIL import Image

In [4]:
import MalmoPython
import os
import sys
import time
import random
import json
import numpy as np
import time
from IPython.display import clear_output,display
import logging
import math
actions = {
    'strafe':{
        'left': 'strafe -1',
        'right': 'strafe 1'
    },
    'move':{
        'back':'move -1',
        'forward':'move 1'
    },
    'pitch':{
        'up':'pitch -0.03',
        'down':'pitch 0.03'
    },
    'turn':{
        'anti':'turn -1',
        'clk':'turn 1'
    },
    'jump':{
        'on':'jump 1',
        'off':'jump 0'
    },
    'attack':{
        'on': 'attack 1',
        'off': 'attack 0'
    },
    'use':{
        'on': 'use 1',
        'off': 'use 0'
    },
    'crouch':{
        'on':'crouch 1',
        'off':'crouch 0'
    }
}
# Create default Malmo objects:

In [76]:
simple_actions = {
    'strafe':{
        'left': 'strafe -0.5',
        'right': 'strafe 0.5'
    },
    'move':{
        'back':'move -0.5',
        'forward':'move 0.5'
    }   
}
# flatten dict of actions
ractions = []
for action_type in simple_actions.keys():
    
    for action in simple_actions[action_type]:
        ractions.append(simple_actions[action_type][action])

## Brain

In [7]:
from keras.models import Sequential,model_from_json
from keras.layers import Dense, Activation,GRU,Input,LSTM,Conv2D,Flatten
from keras.optimizers import RMSprop

Using TensorFlow backend.


In [93]:
class Brain:
    def __init__(self, stateCnt, actionCnt):
        self.stateCnt = stateCnt
        self.actionCnt = actionCnt
    
        self.model = self._createModel()
        # self.model.load_weights("cartpole-basic.h5")
    def save(self):
        model_json = self.model.to_json()
        with open('model.json','w') as json_file:
            json_file.write(model_json)
            
        self.model.save_weights('CNNmodel.h5')
    def load(self):
        self.model.load_weights('CNNmodel.h5')
    def _createModel(self):
        model = Sequential()
        # Lets try a CNN to take screen as input
        # batch size is 64, 320x240 video RGB channels with an extra channel for depth
        frame_width = 35
        frame_height = 25
        model.add(Conv2D(32,(8,8),input_shape=(frame_height,frame_width,1),activation='relu'))
        model.add(Conv2D(64,(4,4),input_shape=(frame_height,frame_width,1),activation='relu'))
        model.add(Conv2D(64,(3,3),input_shape=(frame_height,frame_width,1),activation='relu'))
        model.add(Flatten())
        #model.add(Dense(500,activation='relu'))
        model.add(Dense(output_dim=self.actionCnt,activation='softmax'))
        
        opt = RMSprop(lr=0.00025)
        model.compile(loss='mse', optimizer=opt)

        return model

    def train(self, x, y, epoch=1, verbose=0):
        self.model.fit(x, y, batch_size=64, nb_epoch=epoch, verbose=verbose)

    def predict(self, s):
        return np.array(self.model.predict(s))

    def predictOne(self, s):
        return self.predict(s.reshape(1, *self.stateCnt)).flatten()


## Memory

In [9]:
class Memory:   # stored as ( s, a, r, s_ )
    def __init__(self, capacity):
        self.samples = []
        self.capacity = capacity
        

    def add(self, sample):
        self.samples.append(sample)        

        if len(self.samples) > self.capacity:
            self.samples.pop(0)

    def sample(self, n):
        n = min(n, len(self.samples))
        return random.sample(self.samples, n)

## Agent

In [85]:
MEMORY_CAPACITY = 100000
BATCH_SIZE = 64

GAMMA = 0.99

MAX_EPSILON = 1
MIN_EPSILON = 0.01
LAMBDA = 0.0001      # speed of decay

class Agent:
    steps = 0
    epsilon = MAX_EPSILON

    def __init__(self, stateCnt, actionCnt,actions):
        self.stateCnt = stateCnt
        self.actionCnt = actionCnt
        self.actions = actions

        self.brain = Brain(stateCnt, actionCnt)
        self.memory = Memory(MEMORY_CAPACITY)
        
    def act(self, s):
        # Epsilon greedy action selection
        if random.random() < self.epsilon:
            act_int = random.randint(0, self.actionCnt-1)
        else:
            act_int = np.argmax(self.brain.predictOne(s))
        return self.actions[act_int],act_int

    def observe(self, sample):  # in (s, a, r, s_) format
        self.memory.add(sample)        

        # slowly decrease Epsilon based on our eperience
        self.steps += 1
        self.epsilon = MIN_EPSILON + (MAX_EPSILON - MIN_EPSILON) * math.exp(-LAMBDA * self.steps)

    def replay(self):    
        batch = self.memory.sample(BATCH_SIZE)
        batchLen = len(batch)

        no_state = np.zeros(self.stateCnt)
         
        states = np.array([ o[0] for o in batch ])
        states_ = np.array([ (no_state if o[3] is None else o[3]) for o in batch ])
        
        p = agent.brain.predict(states)
        p_ = agent.brain.predict(states_)

        x = np.zeros((batchLen, *self.stateCnt))
        y = np.zeros((batchLen, self.actionCnt))
        
        for i in range(batchLen):
            o = batch[i]
            s = o[0]
            a = o[1]
            r = o[2]
            s_ = o[3]
            
            t = p[i]
            if s_ is None:
                t[a] = r
            else:
                t[a] = r + GAMMA * np.amax(p_[i])
            x[i] = s
            y[i] = t
            
            self.brain.train(x, y)

## Environment

In [None]:
class Env:
    def __init__(self,actions,obs_shape = 3):
        self.world_state = None
        self.my_mission_record = MalmoPython.MissionRecordSpec()
        self.data = None
        #self.observation_space = np.zeros(shape=(obs_shape**2,))
        self.obs_shape = obs_shape
        self.actions = actions
        
        self.host = MalmoPython.AgentHost()
        try:
            self.host.parse( sys.argv )
        except RuntimeError as e:
            print ('ERROR:',e)
            print (self.host.getUsage())
            if self.host.receivedArgument("help"):
                print (self.host.getUsage())
                exit(0)
    def _dist(self,x,y):
        return np.sqrt(np.sum((x-y)**2))
    def observe(self):
        num_frames_seen = self.world_state.number_of_video_frames_since_last_state
        while self.world_state.is_mission_running and self.world_state.number_of_video_frames_since_last_state == num_frames_seen:
            self.world_state = self.host.peekWorldState()
        
        while self.world_state.is_mission_running and all(e.text=='{}' for e in self.world_state.observations):
            self.world_state = self.host.peekWorldState()
        
        if self.world_state.number_of_observations_since_last_state > 0:
            self.data = json.loads(self.world_state.observations[-1].text)
        

        state = self.world_state
            
        #data = json.loads(state.observations[-1].text)
        # get reward if detected, else reward is -1
        reward = 1
        if state.number_of_rewards_since_last_state > 0:
            reward = state.rewards[0].getValue()
        '''
        # reformat grid to a vector that only show the floor with blocks
        vec = []    
        for item in self.data['grid'][::-1]:
            if 'lava' in item:
                vec.append(1)
            elif 'lapis' in item:
                vec.append(2)
            else:
                vec.append(0)
        '''
        
        frame = np.array(state.video_frames[-1].pixels).reshape(self.obs_shape)
        gray_frame = np.dot(frame[...,:3],[0.299,0.587,0.114]).reshape((self.obs_shape[0],self.obs_shape[1]))
        
        
        smaller = scimisc.imresize(gray_frame,1/12,mode='L')
        smaller = np.expand_dims(smaller,2)
    
        # compute reward depending on distance to target
        new_state = np.array(vec)
        tmp = np.array(vec).reshape(self.observation_space.shape)
        idx2 = np.argwhere(tmp == 2)
        
        size = self.observation_space.shape[0]
        idx1 = (np.ceil(size/2),np.ceil(size/2))
                     
        a = (self._dist(idx2,idx1))
        if(a > 0):
            dist_reward = 2000 - a
            reward += dist_reward
            #print("close to objective reward : {}".format(dist_reward))
            
    
        
        
        return(reward,smaller,self.data,state) # return r,s,data,extra_info
        
    def startworld(self,world_file):
        with open(world_file,'r') as f:
            my_mission = MalmoPython.MissionSpec(f.read(), True)
        my_mission_record = MalmoPython.MissionRecordSpec()
        # Attempt to start a mission:
        max_retries = 3
        for retry in range(max_retries):
            try:
                self.host.startMission( my_mission, my_mission_record )
                sys.stdout.write("Mission Started")
                break
            except RuntimeError as e:
                if retry == max_retries - 1:
                    print ("Error starting mission:{}".format(e))
                    exit(1)
                else:
                    time.sleep(2)
        # Loop until mission starts:
        #print ("Waiting for the mission to start ")
        self.world_state = self.host.getWorldState()
        while (not self.world_state.has_mission_begun):
            sys.stdout.write(".")
            time.sleep(0.1)
            self.world_state = self.host.getWorldState()
            for error in self.world_state.errors:
                print ("Error:",error.text)
                
        ## wait until a valid observation        
        while self.world_state.is_mission_running and all(e.text=='{}' for e in self.world_state.observations):
            self.world_state = self.host.peekWorldState()
        #populate emtpy fields for init
        self.data = json.loads(self.world_state.observations[-1].text)
        
        return self.observe()
    def quit(self):
        self.host.sendCommand('quit')
        
    def step(self,action):
        self.host.sendCommand(self.actions[action])
        return self.observe()

In [None]:
class RobustEnv:
    
    
    def waitForInitialState( self ):
        '''Before a command has been sent we wait for an observation of the world and a frame.'''
        # wait for a valid observation
        world_state = self.host.peekWorldState()
        while world_state.is_mission_running and all(e.text=='{}' for e in world_state.observations):
            world_state = self.host.peekWorldState()
        # wait for a frame to arrive after that
        num_frames_seen = world_state.number_of_video_frames_since_last_state
        while world_state.is_mission_running and world_state.number_of_video_frames_since_last_state == num_frames_seen:
            world_state = self.host.peekWorldState()
        world_state = self.host.getWorldState()

        if world_state.is_mission_running:
                
            assert len(world_state.video_frames) > 0, 'No video frames!?'

        return world_state
    def waitForNextState( self ):
        '''After each command has been sent we wait for the observation to change as expected and a frame.'''
        # wait for the observation position to have changed
        print('Waiting for observation...', end=' ')
        while True:
            world_state = self.agent_host.peekWorldState()
            if not world_state.is_mission_running:
                print('mission ended.')
                break
            if not all(e.text=='{}' for e in world_state.observations):
                obs = json.loads( world_state.observations[-1].text )
                print('received.')
                break
        
        # wait for the render position to have changed
        print('Waiting for render...', end=' ')
        while True:
            world_state = self.agent_host.peekWorldState()
            if not world_state.is_mission_running:
                print('mission ended.')
                break
            if len(world_state.video_frames) > 0:
                print('received.')
                break
            
        num_frames_before_get = len(world_state.video_frames)
        world_state = self.agent_host.getWorldState()
            
        if world_state.is_mission_running:
            assert len(world_state.video_frames) > 0, 'No video frames!?'
            num_frames_after_get = len(world_state.video_frames)
            assert num_frames_after_get >= num_frames_before_get, 'Fewer frames after getWorldState!?'
            frame = world_state.video_frames[-1]
            obs = json.loads( world_state.observations[-1].text )
        
        reward = 1
        if world.number_of_rewards_since_last_state > 0:
            reward = state.rewards[0].getValue()
        
        return world_state

## Test for manual code execution

In [None]:

agent_host = MalmoPython.AgentHost()
try:
    agent_host.parse( sys.argv )
except RuntimeError as e:
    print ('ERROR:',e)
    print (agent_host.getUsage())
    exit(1)
if agent_host.receivedArgument("help"):
    print (agent_host.getUsage())
    exit(0)



In [34]:
env = Env(ractions,(300,420,3))
r,s,data,ws = env.startworld('CliffWalking.xml')
done = ws.is_mission_running is False
root = Tk()
root_frame = Frame(root)
canvas = Canvas(root_frame, borderwidth=0, highlightthickness=0, width=200, height=130, bg="black" )
root_frame.pack()
canvas.pack()

frame_width = 25
frame_height = 35

count = 0
while(not done):
    try:
        r,s,data,ws = env.observe()
        count += 1
        #image = Image.frombytes('RGB', (frame.width,frame.height), bytes(frame.pixels) )


        #pre process
        #frame = s.reshape(frame_height,frame_width)
        #smaller = scimisc.imresize(fe,0.25,mode='L')
        #s = np.squeeze(s,2)
        
        image = Image.fromarray(s.astype('int8'),mode='L')
        photo = ImageTk.PhotoImage(image)

        root.one = photo
        canvas.delete("all")
        canvas.create_image(frame_width/2,frame_height/2, image=photo)
        done = ws.is_mission_running is False
        root.update()
        

        if count % 100 == 0:
            print(s[None].shape)
    except:
        root.destroy()
    

ERROR: unrecognised option '-f'
Malmo version: 0.31.0

Allowed options:
  -h [ --help ]         show description of allowed options
  --test                run this as an integration test


Mission Started.......(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)
(1, 75, 105)


In [27]:
root.destroy()

## Plots

In [103]:
fig1 = figure(plot_width=400, plot_height=400,title="rewards",
                      x_axis_label="x",
                      y_axis_label="y")
rplot = fig1.line([],[],color="firebrick",line_width=2)
# make a grid
handle1 = show(fig1, notebook_handle=True)

In [104]:
def update(x,y,handle,plot):
    plot.data_source.data['x'] += [x]
    plot.data_source.data['y'] += [y]
    push_notebook(handle=handle)

## Run Code

In [96]:
env.quit()

In [101]:
agent = Agent((25,35,1),4,ractions)
env = Env(ractions,(300,420,3))

ERROR: unrecognised option '-f'
Malmo version: 0.31.0

Allowed options:
  -h [ --help ]         show description of allowed options
  --test                run this as an integration test






In [105]:
#r,s,data,ws = env.startworld('CliffWalking.xml')
R = [0.0]
for t in range(100):
    r,s,data,ws = env.startworld('CliffWalking.xml')
    done = ws.is_mission_running is False
    while(not done):
        send_a, a = agent.act(s)
        r,s_,data,ws = env.step(a)
        done = ws.is_mission_running is False
        update(t,R[-1],handle1,rplot)
        #normalize data
        #s /= 255
        #s_ /= 255
        agent.observe((s,a,r,s_))
        #pdb.set_trace()
        agent.replay()
        
        s = s_
        R[-1] += r
        #agent.brain.save()
        
    R.append(0.0)
    
    print('done play through {}'.format(t))
    
    
fig2 = figure(plot_width=400, plot_height=400,title="Rewards per episode",
                      x_axis_label="x",
                      y_axis_label="y")
rplot2 = fig2.line(range(len(R)),R,color="teal",line_width=2)
handle2 = show(fig2, notebook_handle=True)

Mission Started.......



done play through 0
Mission Started.......



done play through 1
Mission Started.......



done play through 2
Mission Started.......



done play through 3
Mission Started.......



done play through 4
Mission Started.......



done play through 5
Mission Started.......



done play through 6
Mission Started.......



done play through 7
Mission Started.......



done play through 8
Mission Started.......



done play through 9
Mission Started........



done play through 10
Mission Started.......



done play through 11
Mission Started.......



done play through 12
Mission Started.......



done play through 13
Mission Started.......



done play through 14
Mission Started.......



done play through 15
Mission Started.......



done play through 16
Mission Started.......



done play through 17
Mission Started.......



done play through 18
Mission Started.......



done play through 19
Mission Started.......



done play through 20
Mission Started.......



done play through 21
Mission Started.......



done play through 22
Mission Started.......



done play through 23
Mission Started.......



done play through 24
Mission Started.......



done play through 25
Mission Started.......



done play through 26
Mission Started.......



done play through 27
Mission Started.......



done play through 28
Mission Started........



done play through 29
Mission Started.......



done play through 30
Mission Started.......



done play through 31
Mission Started.......



done play through 32
Mission Started.......



done play through 33
Mission Started.......



done play through 34
Mission Started.......



done play through 35
Mission Started.......



done play through 36
Mission Started........



done play through 37
Mission Started.......



done play through 38
Mission Started.......



done play through 39
Mission Started........



done play through 40
Mission Started.......



done play through 41
Mission Started.......



done play through 42
Mission Started.......



done play through 43
Mission Started.......



done play through 44
Mission Started.......



done play through 45
Mission Started.......



done play through 46
Mission Started.......



done play through 47
Mission Started.......



done play through 48
Mission Started........



done play through 49
Mission Started........



done play through 50
Mission Started.......



done play through 51
Mission Started.......



done play through 52
Mission Started.......



done play through 53
Mission Started.......



done play through 54
Mission Started.......



done play through 55
Mission Started.......



done play through 56
Mission Started........



done play through 57
Mission Started.......



done play through 58
Mission Started........



done play through 59
Mission Started.......



done play through 60
Mission Started.......



done play through 61
Mission Started.......



done play through 62
Mission Started........



done play through 63
Mission Started.......



done play through 64
Mission Started.......



done play through 65
Mission Started........



done play through 66
Mission Started.......



done play through 67
Mission Started.......



done play through 68
Mission Started.......



done play through 69
Mission Started.......



done play through 70
Mission Started........



done play through 71
Mission Started.......



done play through 72
Mission Started.......



done play through 73
Mission Started.......



done play through 74
Mission Started.......



done play through 75
Mission Started......



done play through 76
Mission Started.......



done play through 77
Mission Started.......



done play through 78
Mission Started.......



done play through 79
Mission Started.......



done play through 80
Mission Started........



done play through 81
Mission Started........



done play through 82
Mission Started.......



done play through 83
Mission Started.......



done play through 84
Mission Started........



done play through 85
Mission Started........



done play through 86
Mission Started.......



done play through 87
Mission Started.......



done play through 88
Mission Started.......



done play through 89
Mission Started.......



done play through 90
Mission Started.......



done play through 91
Mission Started.......



done play through 92
Mission Started.......



done play through 93
Mission Started.......



done play through 94
Mission Started.......



done play through 95
Mission Started........



done play through 96
Mission Started.......



done play through 97
Mission Started.......



done play through 98
Mission Started........



done play through 99


In [87]:
a = np.array(range(27)).reshape(3,3,3)
a.shape

(3, 3, 3)

In [None]:
np.pad(a,[(0,3),(0,0),(0,0)], 'constant')

In [None]:
b = np.array(range(49)).reshape(None,1,49)

In [None]:
b

In [None]:
ws.video_frames[0].channels