# 0. Install Dependencies

In [1]:
!pip install tensorflow
!pip install gym
!pip install keras
!pip install keras-rl



# 1. Test Random Environment with OpenAI Gym

In [2]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random

In [3]:
import sys
from pathlib import Path
import random
import importlib
import json
sys.path.append(str(Path("GameManager.py").parent))

from Game.test import *
from Game.playerActions import defense_actions, attack_actions, projectile_actions, nullDef, nullAtk, nullProj
from Game.gameSettings import *
from Game.Skills import *
from Game.projectiles import *
from Game.turnUpdates import *
from Game.PlayerConfigs import Player_Controller

# Manually choose bot files to test
SUBMISSIONPATH = "Submissions"
PATH1 = "Bot1"
PATH2 = "Bot2"

# Get scripts from bot files and return as script objects
def getPlayerFiles(path1, path2, subpath):
    submission_files = Path(subpath)
    p1module = submission_files / (path1 + ".py")
    p2module = submission_files / (path2 + ".py")
    if p1module.is_file() and p2module.is_file():
        # Ensures path works on mac and windows
        subpath = subpath.replace('\\', '.')
        subpath = subpath.replace('/', '.')
        p1 = importlib.import_module(subpath + "." + path1)
        p2 = importlib.import_module(subpath+ "." + path2)
        return p1, p2
    else:
        raise Exception("A file does not exist in " + subpath)


# Checks for players moving into each other
def checkCollision(player1, player2, knock1, knock2, check_midair = False):
    if (correct_dir_pos(player1, player2, knock1, knock2)):
        # If an overlap occured, then a collision has occured, so set
        # horizontal midair velocity to 0
        player1._velocity = 0
        player2._velocity = 0
    elif check_midair:
        # Check for midair players moving towards each other
        # If they end up face-to-face midair, set horizontal velocity to 0
        if ((player1._yCoord == player2._yCoord) and 
            (abs(player1._xCoord - player2._xCoord) == 1)
            and (player1._direction != player2._direction)):
            player1._velocity = 0
            player2._velocity = 0
                
# Plays out a single turn, doesn't check deaths
def executeOneTurn(action,player1, player2, p1_script, p2_script, p1_json_dict, p2_json_dict, projectiles):
    # Initializing knockbacks: knock1 = knockback INFLICTED by player1 on player 2
    knock1 = knock2 = 0
    stun1 = stun2 = 0
    # If midair, start falling/rising and check if a collision occurs
    updateMidair(player1)
    checkCollision(player1, player2, knock1, knock2)
    updateMidair(player2)
    checkCollision(player1, player2, knock1, knock2)


    # Check for existing projectiles belonging to each player
    p1_projectiles = [proj["projectile"] for proj in projectiles if proj["projectile"]._player._id == 1]
    p2_projectiles = [proj["projectile"] for proj in projectiles if proj["projectile"]._player._id == 2]
    
    # Pass relevant information to player scripts, and get a move from them
    p1_move = action;
    p2_move = p2_script.get_move(player2, player1, p2_projectiles, p1_projectiles)
  
    # In case the scripts return None
    if not p1_move:
        p1_move = ("NoMove",)
    if not p2_move:
        p2_move = ("NoMove",)
        
    # Add their move to their list of inputs
    player1._inputs.append(p1_move)
    player2._inputs.append(p2_move)
    
    # Get move from input list
    act1 = player1._action()
    act2 = player2._action()
    
    # Get game information from the result of the players performing their inputs
    knock1, stun1, knock2, stun2, projectiles = performActions(player1, player2, 
                                        act1, act2, stun1, stun2, 
                                        projectiles)
    # JSONFILL always True now...
    # Writes to json files the current actions, positions, hp etc...
    if JSONFILL:
        playerToJson(player1, p1_json_dict, not JSONFILL)
        playerToJson(player2,p2_json_dict, not JSONFILL)
        
    # Check if players move into each other, correct it if they do
    checkCollision(player1, player2, knock1, knock2)
    
    # Make any currently existing projectiles move, and record them in json files
    projectiles, knock1, stun1, knock2, stun2 = projectile_move(projectiles, 
                            knock1, stun1, knock2, stun2, player1, player2,
                            p1_json_dict, p2_json_dict)


    # Only determine knockback and stun after attacks hit
    if (knock1 or stun1) and not player2._superarmor:
        player2._xCoord += knock1
        if not player2._stun:
            player2._stun = stun1
    if (knock2 or stun2) and not player1._superarmor:
        player1._xCoord += knock2
        if not player1._stun:
            player1._stun = stun2
        
    # Final position correction, if any, due to projectiles      
    checkCollision(player1, player2, knock1, knock2, True)
        
    updateCooldown(player1)
    updateCooldown(player2)
    
    updateBuffs(player1)
    updateBuffs(player2)
    
    p1_dead = checkDeath(player1)
    p2_dead = checkDeath(player2)

    # Second write to json files, for any movement due to projectiles, and to 
    # check if a player got hurt
    playerToJson(player1, p1_json_dict, fill=JSONFILL, checkHurt = JSONFILL)
    playerToJson(player2,p2_json_dict, fill=JSONFILL, checkHurt = JSONFILL)

    return projectiles, p1_dead, p2_dead

def setupGame(p1_script, p2_script, leftstart=LEFTSTART, rightstart=RIGHTSTART):
    
    # Initializes player scripts as player controller objects
    player1 = Player_Controller(leftstart,0,HP,GORIGHT, *p1_script.init_player_skills(), 1)
    player2 = Player_Controller(rightstart,0,HP,GOLEFT, *p2_script.init_player_skills(), 2)
    # Ensure that valid primary and secondary skills are set
    assert(check_valid_skills(*p1_script.init_player_skills()))
    assert(check_valid_skills(*p2_script.init_player_skills()))
    return player1,player2
    
# Resets player shield strength
def resetBlock(player):
    player._block._regenShield()
    player._blocking = False
    
# Carries out player actions, return any resulting after effects to main loop  
def performActions(player1, player2, act1, act2, stun1, stun2, projectiles):
    knock1 = knock2 = 0

    # Empty move if player is currently stunned or doing recovery ticks
    if player1._stun or player1._recovery:
        act1 = ("NoMove", "NoMove")
        updateStun(player1)
    if player2._stun or player2._recovery:
        act2 = ("NoMove", "NoMove")
        updateStun(player2)
    
    # Checks if player does something to cancel a skill
    if player1._mid_startup or player1._skill_state:
        if player1._inputs[-1][0] in ("move", "block"):
            player1._skill_state = False
            player1._mid_startup = False
        else:
            act1 = player1._moves[-1]
            
    if player2._mid_startup or player2._skill_state:
        if player2._inputs[-1][0] in ("move", "block"):
            player2._skill_state = False
            player2._mid_startup = False
        else:
            act2 = player2._moves[-1]
        
    # Check if no valid move is input, or if the player is recovering 
    # If so, set act to None to prevent further checks
    if act1[0] not in (attack_actions.keys() | defense_actions.keys() | projectile_actions.keys()):
        if player1._recovery:
            player1._moves.append(("recover", None))
            updateRecovery(player1)
        else:
            player1._moves.append(("NoMove", "NoMove"))
        resetBlock(player1)
        act1 = None
    if act2[0] not in (attack_actions.keys() | defense_actions.keys() | projectile_actions.keys()):
        if player2._recovery:
            player2._moves.append(("recover", None))
            updateRecovery(player2)
        else:
            player2._moves.append(("NoMove", "NoMove"))
        resetBlock(player2)
        act2 = None

    # nullDef, nullAtk, nullProj = default functions that return (0,0) or None
    # actions can only occur if the player is not stunned
    # if a defensive action is taken, it has priority over damage moves/skills
    # defensive = any skill that does not deal damage
    
    # Movements are cached, and then carried out based on position 
    # If there are movements, set act to None to prevent going into attack check
    cached_move_1 = cached_move_2 = None
    if act1:
        if act1[0] != "block":
            resetBlock(player1)
        cached_move_1 = defense_actions.get(act1[0], nullDef)(player1, player2, act1)
        if cached_move_1:
            act1 = None
    if act2:
        if act2[0] != "block":
            resetBlock(player2)
        cached_move_2 = defense_actions.get(act2[0], nullDef)(player2, player1, act2)
        if cached_move_2:
            act2 = None
    # Prevent players that are directly facing each other from moving into each other
    if isinstance(cached_move_1, list) and isinstance(cached_move_2, list):
        if (check_move_collision(player1, player2, cached_move_1, cached_move_2) 
            and cached_move_1[1] == cached_move_2[1] and 
            abs(player1._xCoord - player2._xCoord) == 1):
            cached_move_1 = cached_move_2 = None
            player1._moves[-1] = ("NoMove", None)
            player2._moves[-1] = ("NoMove", None) 
    
    # Further checks for valid movement
    # Prevent horizontal movement if it would result in moving into a still player
    # Diagonal movements are allowed, since midair collision checks occur after
    if isinstance(cached_move_1, list):
        if player1._xCoord + cached_move_1[0] == player2._xCoord and cached_move_2 in ([0,0], None) and not cached_move_1[1]:
            cached_move_1[0] = 0
        player1._xCoord += cached_move_1[0]
        player1._yCoord += cached_move_1[1]
        player1._moves[-1] = ("move", (cached_move_1[0]*player1._direction, cached_move_1[1]))
    if isinstance(cached_move_2, list):
        if player2._xCoord + cached_move_2[0] == player1._xCoord and cached_move_1 in ([0,0], None) and not cached_move_2[1]:
            cached_move_2[0] = 0
        player2._xCoord += cached_move_2[0]
        player2._yCoord += cached_move_2[1]
        player2._moves[-1] = ("move", (cached_move_2[0]*player2._direction, cached_move_2[1]))
        
    # Prevent from going offscreen
    correctPos(player1)
    correctPos(player2)

    # Now check for damage dealing actions
    # Get any knockback and stun values if an attack lands
    # Summon projectiles if any projectile skills were casted
    if act1:
        knock1, stun1 = attack_actions.get(act1[0], nullAtk)(player1, player2, act1)
        proj_obj = projectile_actions.get(act1[0], nullProj)(player1, player2, act1)
        if proj_obj:
            projectiles.append(proj_obj)
        resetBlock(player1)
    if act2:
        knock2, stun2 = attack_actions.get(act2[0], nullAtk)(player2, player1, act2)
        proj_obj = projectile_actions.get(act2[0], nullProj)(player2, player1, act2)
        if proj_obj:
            projectiles.append(proj_obj)
        resetBlock(player2)

    # Correct positioning again just in case
    correctPos(player1)
    correctPos(player2)
    
    # Move to next move in player input list
    player1._move_num += 1
    player2._move_num += 1
    
    return knock1, stun1, knock2, stun2, projectiles

# Initializes json object 
def get_empty_json():
    return {
        'hp': [],
        'xCoord': [],
        'yCoord': [],
        'state': [],
        'actionType': [],
        'stun': [],
        'midair': [],
        'falling':[],
        'direction':[],
        'ProjectileType': None,
        'projXCoord':[],
        'projYCoord':[]
    }
                              
# Main game loop            
    
        
    # Write into json files
    
# Allows to run directly from GameManager to simulate single rounds
# if __name__ == "__main__":

In [32]:
class ShowerEnv(Env):
    def __init__(self):
        PRIMARY_SKILL = TeleportSkill
        SECONDARY_SKILL = Hadoken
        JUMP = ("move", (0,1))
        FORWARD = ("move", (1,0))
        BACK = ("move", (-1,0))
        JUMP_FORWARD = ("move", (1,1))
        JUMP_BACKWARD = ("move", (-1, 1))
        LIGHT = ("light",)
        HEAVY = ("heavy",)
        BLOCK = ("block",)
        PRIMARY = get_skill(PRIMARY_SKILL)
        SECONDARY = get_skill(SECONDARY_SKILL)
        CANCEL = ("skill_cancel", )
        NOMOVE = "NoMove"
        self.Actions=[PRIMARY,SECONDARY,JUMP,FORWARD,BACK,JUMP_FORWARD,JUMP_BACKWARD,LIGHT,HEAVY,BLOCK,CANCEL,NOMOVE]
        self.path1=PATH1;
        self.path2=PATH2;
        self.projectiles=[];
        self.p1_dead=False;
        self.p2_dead=False;
        self.submissionpath=SUBMISSIONPATH
        self.roundNum=1;
        self.p1, self.p2 = getPlayerFiles(self.path1, self.path2, self.submissionpath)
        self.p1_script = self.p1.Script()
        self.p2_script = self.p2.Script()
        self.player1, self.player2 = setupGame(self.p1_script, self.p2_script)
        # Check if file exists if so delete it 
        self.player_json = Path("jsonfiles/")
        # create new battle file with player jsons
        self.new_battle = self.player_json / f"Round_{self.roundNum}"
        self.player1_json = self.new_battle / "p1.json"
        self.player2_json = self.new_battle / "p2.json"
        # create round result file
        self.round_results_json = self.new_battle / "round.json"
        # get list of battles 
        files = self.player_json.glob("*")
        battles = [x for x in files if x.is_dir()]   
        # check if this battle has not happened before
        if f"Round {self.roundNum}" not in battles:
            self.player1_json.parent.mkdir(parents=True, exist_ok=True)
            self.player2_json.parent.mkdir(parents=True, exist_ok=True)
            self.round_results_json.parent.mkdir(parents=True, exist_ok=True)
            
        self.player1_json.open("w")
        self.player2_json.open("w")
        self.round_results_json.open("w")
        # structure the dict, no need to structure round result dict until the end
        self.p1_json_dict = get_empty_json()
        self.p2_json_dict = get_empty_json()
        
        # Initialize variables
        self.projectiles = []
        self.tick = 0
        self.max_tick = TIME_LIMIT * MOVES_PER_SECOND
        
        # Buffer turn : for smoothness
        for _ in range(BUFFERTURNS * 2): # 2 since fill ticks
            playerToJson(self.player1, self.p1_json_dict, fill=True, start=True)
            playerToJson(self.player2, self.p2_json_dict, fill=True, start=True)
            projectileToJson(None, self.p1_json_dict, False, fill=True)
            projectileToJson(None, self.p2_json_dict, False, fill=True)
            self.tick += 1
            self.max_tick += 1
            
        # Loops through turns
        
        # Write into json files
            # choose random player to win if tie

        

        # DO NOTHING
        # JUMP = ("move", (0,1))
        # FORWARD = ("move", (1,0))
        # BACK = ("move", (-1,0))
        # JUMP_FORWARD = ("move", (1,1))
        # JUMP_BACKWARD = ("move", (-1, 1))
        # LIGHT = ("light",)
        # HEAVY = ("heavy",)
        # BLOCK = ("block",)
        # Primary
        # Secondary
        
        self.action_space = Discrete(12)
        # Temperature array

        #Player x
        #Player y
        #Enemy x
        #Enemy y
        #Player HP
        #Enemy HP

        
        self.state =0
        # Set shower length
        
    def step(self, action):
        realAction=self.Actions[action];
        info = {}
        reward=0;
        self.projectiles, self.p1_dead, self.p2_dead = executeOneTurn(realAction,self.player1, 
            self.player2, self.p1_script, self.p2_script, self.p1_json_dict, self.p2_json_dict, 
            self.projectiles)
        self.tick+=1;
        
        done = not (not(self.p1_dead or self.p2_dead) and (self.tick < self.max_tick))
        
        #fdji
        if (done):
            self.player1_json.write_text(json.dumps(self.p1_json_dict))
            self.player2_json.write_text(json.dumps(self.p2_json_dict))
            
            # Test json output
            print_results = False
            if print_results:
                for key in self.p1_json_dict.keys():
                    print(key)
                    print(self.p1_json_dict[key])
                for key in self.p2_json_dict.keys():
                    print(key)
                    print(self.p2_json_dict[key])

                for json_key in self.p1_json_dict:
                    if json_key != "ProjectileType":
                        print(f"{json_key} : {len(self.p1_json_dict[json_key])}")
                        
                for json_key in self.p2_json_dict:
                    if json_key != "ProjectileType":
                        print(f"{json_key} : {len(self.p2_json_dict[json_key])}")
                        
                print(f"START BUFFERS: {BUFFERTURNS}, ACTUAL TURNS: {len(self.player1._inputs)}")
                print(f"jsonfill is {JSONFILL}")
                print(f"{self.path1} HP: {self.player1._hp} --  {self.path2} HP: {self.player2._hp}")
            
            winner = None
            
            if self.player1._hp > self.player2._hp:
                print(f"{self.path1} won in {self.tick} turns!")
                winner = self.path1
            elif self.player1._hp < self.player2._hp:
                print(f"{self.path2} won in {self.tick} turns!")
                winner = self.path2
            else:
                print('Tie!')
            
            round_info = {'p1': self.path1, 'p2':self.path2, 'winner':winner, 'roundNum':self.roundNum}
            self.round_results_json.write_text(json.dumps(round_info))
            
        return self.state, reward, done, info

    def render(self):
        pass
    
    
    def reset(self):
        PRIMARY_SKILL = TeleportSkill
        SECONDARY_SKILL = Hadoken
        JUMP = ("move", (0,1))
        FORWARD = ("move", (1,0))
        BACK = ("move", (-1,0))
        JUMP_FORWARD = ("move", (1,1))
        JUMP_BACKWARD = ("move", (-1, 1))
        LIGHT = ("light",)
        HEAVY = ("heavy",)
        BLOCK = ("block",)
        PRIMARY = get_skill(PRIMARY_SKILL)
        SECONDARY = get_skill(SECONDARY_SKILL)
        CANCEL = ("skill_cancel", )
        NOMOVE = "NoMove"
        self.Actions=[PRIMARY,SECONDARY,JUMP,FORWARD,BACK,JUMP_FORWARD,JUMP_BACKWARD,LIGHT,HEAVY,BLOCK,CANCEL,NOMOVE]
        self.path1=PATH1;
        self.path2=PATH2;
        self.projectiles=[];
        self.p1_dead=False;
        self.p2_dead=False;
        self.submissionpath=SUBMISSIONPATH
        self.roundNum=1;
        self.p1, self.p2 = getPlayerFiles(self.path1, self.path2, self.submissionpath)
        self.p1_script = self.p1.Script()
        self.p2_script = self.p2.Script()
        self.player1, self.player2 = setupGame(self.p1_script,self.p2_script)
        # Check if file exists if so delete it 
        self.player_json = Path("jsonfiles/")
        # create new battle file with player jsons
        self.new_battle = self.player_json / f"Round_{self.roundNum}"
        self.player1_json = self.new_battle / "p1.json"
        self.player2_json = self.new_battle / "p2.json"
        # create round result file
        self.round_results_json = self.new_battle / "round.json"
        # get list of battles 
        files = self.player_json.glob("*")
        battles = [x for x in files if x.is_dir()]   
        # check if this battle has not happened before
        if f"Round {self.roundNum}" not in battles:
            self.player1_json.parent.mkdir(parents=True, exist_ok=True)
            self.player2_json.parent.mkdir(parents=True, exist_ok=True)
            self.round_results_json.parent.mkdir(parents=True, exist_ok=True)
            
        self.player1_json.open("w")
        self.player2_json.open("w")
        self.round_results_json.open("w")
        # structure the dict, no need to structure round result dict until the end
        self.p1_json_dict = get_empty_json()
        self.p2_json_dict = get_empty_json()
        
        # Initialize variables
        self.projectiles = []
        self.tick = 0
        self.max_tick = TIME_LIMIT * MOVES_PER_SECOND
        
        # Buffer turn : for smoothness
        for _ in range(BUFFERTURNS * 2): # 2 since fill ticks
            playerToJson(self.player1, self.p1_json_dict, fill=True, start=True)
            playerToJson(self.player2, self.p2_json_dict, fill=True, start=True)
            projectileToJson(None, self.p1_json_dict, False, fill=True)
            projectileToJson(None, self.p2_json_dict, False, fill=True)
            self.tick += 1
            self.max_tick += 1
            

        return self.state
    

In [30]:
env = ShowerEnv()

True
True
<class 'Game.Skills.TeleportSkill'> <class 'Game.projectiles.Hadoken'>
True
True
<class 'Game.Skills.TeleportSkill'> <class 'Game.projectiles.Hadoken'>


In [19]:
env.observation_space.sample()

AttributeError: 'ShowerEnv' object has no attribute 'observation_space'

In [33]:
episodes = 1
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))

True
True
<class 'Game.Skills.TeleportSkill'> <class 'Game.projectiles.Hadoken'>
True
True
<class 'Game.Skills.TeleportSkill'> <class 'Game.projectiles.Hadoken'>
Bot2 won in 108 turns!
Episode:1 Score:0


# 2. Create a Deep Learning Model with Keras

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [None]:
states = env.observation_space.shape
actions = env.action_space.n

In [None]:
actions

3

In [None]:
def build_model(states, actions):
    model = Sequential()    
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
del model 

In [None]:
model = build_model(states, actions)

In [None]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_9 (Dense)              (None, 24)                48        
_________________________________________________________________
dense_10 (Dense)             (None, 24)                600       
_________________________________________________________________
dense_11 (Dense)             (None, 3)                 75        
Total params: 723
Trainable params: 723
Non-trainable params: 0
_________________________________________________________________


# 3. Build Agent with Keras-RL

In [None]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=50000, visualize=False, verbose=1)

Training for 50000 steps ...
Interval 1 (0 steps performed)
166 episodes - episode_reward: -38.000 [-60.000, 32.000] - loss: 1.235 - mae: 6.439 - mean_q: -8.204

Interval 2 (10000 steps performed)
167 episodes - episode_reward: -30.263 [-60.000, 36.000] - loss: 2.347 - mae: 11.012 - mean_q: -15.812

Interval 3 (20000 steps performed)
167 episodes - episode_reward: -27.964 [-60.000, 36.000] - loss: 2.621 - mae: 11.725 - mean_q: -16.873

Interval 4 (30000 steps performed)
166 episodes - episode_reward: -28.916 [-60.000, 42.000] - loss: 2.326 - mae: 10.960 - mean_q: -15.735

Interval 5 (40000 steps performed)

In [None]:
scores = dqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))

Testing for 100 episodes ...
Episode 1: reward: -56.000, steps: 60
Episode 2: reward: -60.000, steps: 60
Episode 3: reward: -50.000, steps: 60
Episode 4: reward: -60.000, steps: 60
Episode 5: reward: -56.000, steps: 60
Episode 6: reward: -52.000, steps: 60
Episode 7: reward: -60.000, steps: 60
Episode 8: reward: -50.000, steps: 60
Episode 9: reward: -52.000, steps: 60
Episode 10: reward: -56.000, steps: 60
Episode 11: reward: -60.000, steps: 60
Episode 12: reward: -60.000, steps: 60
Episode 13: reward: -52.000, steps: 60
Episode 14: reward: -52.000, steps: 60
Episode 15: reward: -58.000, steps: 60
Episode 16: reward: -50.000, steps: 60
Episode 17: reward: -54.000, steps: 60
Episode 18: reward: -58.000, steps: 60
Episode 19: reward: -60.000, steps: 60
Episode 20: reward: -56.000, steps: 60
Episode 21: reward: -56.000, steps: 60
Episode 22: reward: -52.000, steps: 60
Episode 23: reward: -60.000, steps: 60
Episode 24: reward: -56.000, steps: 60
Episode 25: reward: -58.000, steps: 60
Episo

In [None]:
_ = dqn.test(env, nb_episodes=15, visualize=True)

Testing for 15 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Episode 11: reward: 200.000, steps: 200
Episode 12: reward: 200.000, steps: 200
Episode 13: reward: 200.000, steps: 200
Episode 14: reward: 200.000, steps: 200
Episode 15: reward: 200.000, steps: 200


# 4. Reloading Agent from Memory

In [None]:
dqn.save_weights('dqn_weights.h5f', overwrite=True)

In [None]:
del model
del dqn
del env

In [None]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
_ = dqn.test(env, nb_episodes=5, visualize=True)

Testing for 5 episodes ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
