# STEP 4 - Making DRL PySC2 Agent

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys; sys.path.append('..')
# pytorch network model을 정의한 디렉토리 (별도 작성)
# !unzip  skdrl.zip -d ~

## 0. Runnning 'Agent code' on jupyter notebook 

In [3]:
### unfortunately, PySC2 uses Abseil, which treats python code as if its run like an app
# This does not play well with jupyter notebook
# So we will need to monkeypatch sys.argv
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import sys
#sys.argv = ["python", "--map", "AbyssalReef"]
sys.argv = ["python", "--map", "Simple64"]

# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Run an agent."""



import importlib
import threading

from absl import app
from absl import flags
from future.builtins import range  # pylint: disable=redefined-builtin

from pysc2 import maps
from pysc2.env import available_actions_printer
from pysc2.env import run_loop
from pysc2.env import sc2_env
from pysc2.lib import point_flag
from pysc2.lib import stopwatch
from pysc2.lib import actions

FLAGS = flags.FLAGS

# because of Abseil's horrible design for running code underneath Colabs
# We have to pull out this ugly hack from the hat
# print(globals())

if "flags_defined" not in globals():
    print("Flag set")
    flags.DEFINE_bool("render", False, "Whether to render with pygame.")
    point_flag.DEFINE_point("feature_screen_size", "84",
                            "Resolution for screen feature layers.")
    point_flag.DEFINE_point("feature_minimap_size", "64",
                            "Resolution for minimap feature layers.")
    point_flag.DEFINE_point("rgb_screen_size", None,
                            "Resolution for rendered screen.")
    point_flag.DEFINE_point("rgb_minimap_size", None,
                            "Resolution for rendered minimap.")
    flags.DEFINE_enum("action_space", "RAW", sc2_env.ActionSpace._member_names_,  # pylint: disable=protected-access
                      "Which action space to use. Needed if you take both feature "
                      "and rgb observations.")
    flags.DEFINE_bool("use_feature_units", False,
                      "Whether to include feature units.")
    flags.DEFINE_bool("use_raw_units", True,
                      "Whether to include raw units.")
    flags.DEFINE_integer("raw_resolution", 64, "Raw Resolution.")
    flags.DEFINE_bool("disable_fog", True, "Whether to disable Fog of War.")

    flags.DEFINE_integer("max_agent_steps", 0, "Total agent steps.")
    flags.DEFINE_integer("game_steps_per_episode", None, "Game steps per episode.")
    flags.DEFINE_integer("max_episodes", 0, "Total episodes.")
    flags.DEFINE_integer("step_mul", 8, "Game steps per agent step.")
    flags.DEFINE_float("fps", 22.4, "Frames per second to run the game.")

    #flags.DEFINE_string("agent", "sc2.agent.BasicAgent.ZergBasicAgent",
    #                    "Which agent to run, as a python path to an Agent class.")
    #flags.DEFINE_enum("agent_race", "zerg", sc2_env.Race._member_names_,  # pylint: disable=protected-access
    #                  "Agent 1's race.")
    flags.DEFINE_string("agent", "TerranRLAgentWithRawActsAndRawObs",
                        "Which agent to run, as a python path to an Agent class.")
    flags.DEFINE_enum("agent_race", "terran", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 1's race.")

    flags.DEFINE_string("agent2", "Bot", "Second agent, either Bot or agent class.")
    flags.DEFINE_enum("agent2_race", "zerg", sc2_env.Race._member_names_,  # pylint: disable=protected-access
                      "Agent 2's race.")
#     flags.DEFINE_enum("difficulty", "very_easy", sc2_env.Difficulty._member_names_,  # pylint: disable=protected-access
# The possible difficulty levels are very_easy, easy, medium, medium_hard, hard, harder, very_hard, cheat_vision, cheat_money and cheat_insane. 
# Please note that medium_hard is actually “hard” in the game,
    flags.DEFINE_enum("difficulty", "medium", sc2_env.Difficulty._member_names_,  # pylint: disable=protected-access
                      "If agent2 is a built-in Bot, it's strength.")

    flags.DEFINE_bool("profile", False, "Whether to turn on code profiling.")
    flags.DEFINE_bool("trace", False, "Whether to trace the code execution.")
    flags.DEFINE_integer("parallel", 1, "How many instances to run in parallel.")

    flags.DEFINE_bool("save_replay", True, "Whether to save a replay at the end.")

    flags.DEFINE_string("map", None, "Name of a map to use.")
    flags.mark_flag_as_required("map")

flags_defined = True

# print(type(sc2_env.Race[FLAGS.agent2_race]))
# print(type(sc2_env.Difficulty[FLAGS.difficulty]))

def run_thread(agent_classes, players, map_name, visualize):
  """Run one thread worth of the environment with agents."""
  with sc2_env.SC2Env(
      map_name=map_name,
      players=players,
        agent_interface_format=features.AgentInterfaceFormat(
            action_space=actions.ActionSpace.RAW,
            use_raw_units=True,
            raw_resolution=64,
        ),
      step_mul=FLAGS.step_mul,
      game_steps_per_episode=FLAGS.game_steps_per_episode,
      disable_fog=FLAGS.disable_fog,
      visualize=visualize) as env:
    #env = available_actions_printer.AvailableActionsPrinter(env)
    agents = [agent_cls() for agent_cls in agent_classes]
    run_loop.run_loop(agents, env, FLAGS.max_agent_steps, FLAGS.max_episodes)
    if FLAGS.save_replay:
      env.save_replay(agent_classes[0].__name__)

def main(unused_argv):
  """Run an agent."""
  #stopwatch.sw.enabled = FLAGS.profile or FLAGS.trace
  #stopwatch.sw.trace = FLAGS.trace

  map_inst = maps.get(FLAGS.map)

  agent_classes = []
  players = []

  #agent_module, agent_name = FLAGS.agent.rsplit(".", 1)
  #agent_cls = getattr(importlib.import_module(agent_module), agent_name)
  #agent_classes.append(agent_cls)
  agent_classes.append(TerranRLAgentWithRawActsAndRawObs)
  players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent_race]))

  if map_inst.players >= 2:
    if FLAGS.agent2 == "Bot":
      print(FLAGS.agent2)   
      players.append(sc2_env.Bot(sc2_env.Race["terran"],
                                 sc2_env.Difficulty["medium"]))
    else:
      print("Not Bot")
      #agent_module, agent_name = FLAGS.agent2.rsplit(".", 1)
      #agent_cls = getattr(importlib.import_module(agent_module), agent_name)
      agent_classes.append(TerranRandomAgent)
      players.append(sc2_env.Agent(sc2_env.Race[FLAGS.agent2_race]))

  print(players)

  threads = []
  for _ in range(FLAGS.parallel - 1):
    t = threading.Thread(target=run_thread,
                         args=(agent_classes, players, FLAGS.map, False))
    threads.append(t)
    t.start()

  run_thread(agent_classes, players, FLAGS.map, FLAGS.render)

  for t in threads:
    t.join()

  if FLAGS.profile:
    pass
    #print(stopwatch.sw)

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html
Flag set


## 3. Applying Vanilla DQN to a PySC2 Agent

구현된 기능

- Implementing 'Experience Replay' : 
    - 'Maximization Bias' 문제를 발생시키는 원인 중 하나인 'Sample간의 시간적 연관성'을 해결하기 위한 방법
    - Online Learning 에서 Batch Learning 으로 학습방법 바뀜 : Online update 는 Batch update 보다 일반적으로 Validation loss 가 더 높게 나타남.
    - Reinforcement Learning for Robots. Using Neural Networks. Long -Ji Lin. January 6, 1993. 논문에서 최초로 연구됨 http://isl.anthropomatik.kit.edu/pdf/Lin1993.pdf

- Implementing 'Fixed Q-Target' : 
    - 'Moving Q-Target' 문제 해결하기 위한 방법
    - 2015년 Nature 버전 DQN 논문에서 처음 제안됨. https://deepmind.com/research/publications/human-level-control-through-deep-reinforcement-learning 


구현되지 않은 기능

- Implementing 'Sensory Input Feature-Extraction' :
    - 게임의 Raw Image 를 Neural Net에 넣기 위한 Preprocessing(전처리) 과정
    - Raw Image 의 Sequence중 '최근 4개의 이미지'(과거 정보)를 하나의 새로운 State로 정의하여 non-MDP를 MDP 문제로 바꾸는 Preprocessing 과정 
    - CNN(합성곱 신경망)을 활용한 '차원의 저주' 극복

In [19]:
import random
import time
import math
import os.path

import numpy as np
import pandas as pd
from collections import deque
import pickle

from pysc2.agents import base_agent
from pysc2.env import sc2_env
from pysc2.lib import actions, features, units
from absl import app

import torch
from torch.utils.tensorboard import SummaryWriter

from skdrl.pytorch.model.mlp import NaiveMultiLayerPerceptron
from skdrl.common.memory.memory import ExperienceReplayMemory

# Network를 분리하였으므로, weight를 저정하는 공간도 2개로 분리

DATA_FILE_QNET = '30_rlagent_with_vanilla_dqn_qnet'
DATA_FILE_QNET_TARGET = '30_rlagent_with_vanilla_dqn_qnet_target'
SCORE_FILE = '30_rlagent_with_vanilla_dqn_score'

scores = []                        # list containing scores from each episode
scores_window = deque(maxlen=100)  # last 100 scores

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
writer = SummaryWriter('/home/jupyter/tensorboard_log')
print(device)



import torch
import torch.nn as nn
import numpy as np
import random

class DQN(nn.Module):

    def __init__(self,
                 state_dim: int,
                 action_dim: int,
                 qnet: nn.Module,
                 qnet_target: nn.Module,
                 lr: float,
                 gamma: float,
                 epsilon: float):
        """
        :param state_dim: input state dimension
        :param action_dim: action dimension
        :param qnet: main q network
        :param qnet_target: target q network
        :param lr: learning rate
        :param gamma: discount factor of MDP
        :param epsilon: E-greedy factor
        """

        super(DQN, self).__init__()
        self.state_dim = state_dim
        self.action_dim = action_dim
        self.qnet = qnet
        self.lr = lr
        self.gamma = gamma # discount future reward (미래의 보상을 조금만 반영)
        self.opt = torch.optim.Adam(params=self.qnet.parameters(), lr=lr)
        self.register_buffer('epsilon', torch.ones(1) * epsilon)

        # target network related
        qnet_target.load_state_dict(qnet.state_dict())
        self.qnet_target = qnet_target
        self.criteria = nn.SmoothL1Loss()

    def choose_action(self, state):
        qs = self.qnet(state)
        #prob = np.random.uniform(0.0, 1.0, 1)
        #if torch.from_numpy(prob).float() <= self.epsilon:  # random
        if random.random() <= self.epsilon: # random
            action = np.random.choice(range(self.action_dim))
        else:  # greedy
            action = qs.argmax(dim=-1)
        return int(action)

    def learn(self, state, action, reward, next_state, done):
        s, a, r, ns = state, action, reward, next_state

        # compute Q-Learning target with 'target network'
        with torch.no_grad():
            q_max, _ = self.qnet_target(ns).max(dim=-1, keepdims=True)
            q_target = r + self.gamma * q_max * (1 - done)

        q_val = self.qnet(s).gather(1, a)
        loss = self.criteria(q_val, q_target)

        self.opt.zero_grad()
        loss.backward()
        self.opt.step()


def prepare_training_inputs(sampled_exps, device='cpu'):
    states = []
    actions = []
    rewards = []
    next_states = []
    dones = []
    for sampled_exp in sampled_exps:
        states.append(sampled_exp[0])
        actions.append(sampled_exp[1])
        rewards.append(sampled_exp[2])
        next_states.append(sampled_exp[3])
        dones.append(sampled_exp[4])

    states = torch.cat(states, dim=0).float().to(device)
    actions = torch.cat(actions, dim=0).to(device)
    rewards = torch.cat(rewards, dim=0).float().to(device)
    next_states = torch.cat(next_states, dim=0).float().to(device)
    dones = torch.cat(dones, dim=0).float().to(device)
    return states, actions, rewards, next_states, dones


class TerranAgentWithRawActsAndRawObs(base_agent.BaseAgent):
    actions = ("do_nothing",
               "harvest_minerals",
               "build_supply_depot",
               "build_barracks",
               "train_marine",
               "attack")

    def get_my_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.alliance == features.PlayerRelative.SELF]

    def get_enemy_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.alliance == features.PlayerRelative.ENEMY]

    def get_my_completed_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.build_progress == 100
                and unit.alliance == features.PlayerRelative.SELF]

    def get_enemy_completed_units_by_type(self, obs, unit_type):
        return [unit for unit in obs.observation.raw_units
                if unit.unit_type == unit_type
                and unit.build_progress == 100
                and unit.alliance == features.PlayerRelative.ENEMY]

    def get_distances(self, obs, units, xy):
        units_xy = [(unit.x, unit.y) for unit in units]
        return np.linalg.norm(np.array(units_xy) - np.array(xy), axis=1)

    def getMeanLocation(self, unitList):
        sum_x = 0
        sum_y = 0
        for unit in unitList:
            sum_x += unit.x
            sum_y += unit.y
        mean_x = sum_x / len(unitList)
        mean_y = sum_y / len(unitList)
        return [mean_x, mean_y]

    def transformDistance(self, x, x_distance, y, y_distance):
        if not self.base_top_left:
            return [x - x_distance, y - y_distance]
        return [x + x_distance, y + y_distance]

    def transformLocation(self, x, y):
        if not self.base_top_left:
            return [64 - x, 64 - y]
        return [x, y]

    def step(self, obs):
        super(TerranAgentWithRawActsAndRawObs, self).step(obs)
        if obs.first():
            command_center = self.get_my_units_by_type(
                obs, units.Terran.CommandCenter)[0]
            self.base_top_left = (command_center.x < 32)

    def do_nothing(self, obs):
        return actions.RAW_FUNCTIONS.no_op()

    def harvest_minerals(self, obs):
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        idle_scvs = [scv for scv in scvs if scv.order_length == 0]
        if len(idle_scvs) > 0:
            mineral_patches = [unit for unit in obs.observation.raw_units
                               if unit.unit_type in [
                                   units.Neutral.BattleStationMineralField,
                                   units.Neutral.BattleStationMineralField750,
                                   units.Neutral.LabMineralField,
                                   units.Neutral.LabMineralField750,
                                   units.Neutral.MineralField,
                                   units.Neutral.MineralField750,
                                   units.Neutral.PurifierMineralField,
                                   units.Neutral.PurifierMineralField750,
                                   units.Neutral.PurifierRichMineralField,
                                   units.Neutral.PurifierRichMineralField750,
                                   units.Neutral.RichMineralField,
                                   units.Neutral.RichMineralField750
                               ]]
            scv = random.choice(idle_scvs)
            distances = self.get_distances(obs, mineral_patches, (scv.x, scv.y))
            mineral_patch = mineral_patches[np.argmin(distances)]
            return actions.RAW_FUNCTIONS.Harvest_Gather_unit(
                "now", scv.tag, mineral_patch.tag)
        return actions.RAW_FUNCTIONS.no_op()

    def build_supply_depot(self, obs):
        supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
        barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
        barrack_ratio = (len(supply_depots) - len(barrackses) * 2)
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        if (obs.observation.player.minerals >= 100 and len(supply_depots) < 25):
            free_supply = (obs.observation.player.food_cap - obs.observation.player.food_used)
            exp_free_supply = (15 + len(supply_depots) * 8) - obs.observation.player.food_used
            # print("exp_cap : ",exp_free_supply, "cap : ", obs.observation.player.food_cap, " used: ", obs.observation.player.food_used)

            if (exp_free_supply <= 20):
#                 print("supply : ", len(supply_depots), "barrack : ", len(barrackses))
                supply_depot_xy = (22, 26) if self.base_top_left else (35, 42)
                ccs = self.get_my_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    mean_x, mean_y = self.getMeanLocation(ccs)
                    barracks_xy = (22, 21) if self.base_top_left else (35, 45)
                    if len(scvs) > 0:
                        scv = random.choice(scvs)
                        x = random.randint(mean_x - 15, mean_x + 15)
                        y = random.randint(mean_y - 10, mean_y + 10)
                        #                     print("build_supply", (x,y))
                        return actions.RAW_FUNCTIONS.Build_SupplyDepot_pt(
                            "now", scv.tag, (x, y))
        return actions.RAW_FUNCTIONS.no_op()

    def build_barracks(self, obs):
        supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
        barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        barrack_ratio = len(supply_depots) - len(barrackses)
        #         print("build barracks", obs.observation.player.minerals, barrack_ratio)
        if (obs.observation.player.minerals >= 150 and len(barrackses) < 15):
            if (len(supply_depots) > 0 and barrack_ratio >= 0):
                ccs = self.get_my_units_by_type(obs, units.Terran.CommandCenter)
                if len(ccs) > 0:
                    mean_x, mean_y = self.getMeanLocation(ccs)
                    if len(scvs) > 0:
                        scv = random.choice(scvs)
                        x = random.randint(mean_x - 15, mean_x + 15)
                        y = random.randint(mean_y - 12, mean_y + 12)
                        #                 print("build_barrack", )
                        return actions.RAW_FUNCTIONS.Build_Barracks_pt("now", scv.tag, (x, y))
        return actions.RAW_FUNCTIONS.no_op()

    def train_marine(self, obs):
        completed_barrackses = self.get_my_completed_units_by_type(obs, units.Terran.Barracks)
        free_supply = (obs.observation.player.food_cap - obs.observation.player.food_used)
        if (len(completed_barrackses) > 0 and obs.observation.player.minerals >= 100 and free_supply > 0):
            barracks = self.get_my_units_by_type(obs, units.Terran.Barracks)[0]
            if barracks.order_length < 5:
                return actions.RAW_FUNCTIONS.Train_Marine_quick("now", barracks.tag)
        return actions.RAW_FUNCTIONS.no_op()

    def attack(self, obs):
        marines = self.get_my_units_by_type(obs, units.Terran.Marine)
        if len(marines) > 3:

            attack_xy = (38, 44) if self.base_top_left else (19, 23)
            distances = self.get_distances(obs, marines, attack_xy)
            # print(distances)
            # print(np.argmax(distances))
            # print(marine)
            x_offset = random.randint(-12, 12)
            y_offset = random.randint(-7, 7)
            return actions.RAW_FUNCTIONS.Attack_pt(
                "now", [unit.tag for unit in marines], (attack_xy[0] + x_offset, attack_xy[1] + y_offset))
        return actions.RAW_FUNCTIONS.no_op()


class TerranRandomAgent(TerranAgentWithRawActsAndRawObs):
    def step(self, obs):
        super(TerranRandomAgent, self).step(obs)
        action = random.choice(self.actions)
        return getattr(self, action)(obs)

cuda:0


### Hyperparameter

하이퍼파라미터는 심층강화학습 알고리즘에서 성능에 매우 큰 영향을 미칩니다.
이 실험에 쓰인 하이퍼파라미터는 https://github.com/chucnorrisful/dqn 실험에서 제안된 값들을 참고하였습니다.
- self.epsilon = max(self.eps_min, self.eps_max - self.eps_min * (self.episode_count / 50))

In [None]:
class TerranRLAgentWithRawActsAndRawObs(TerranAgentWithRawActsAndRawObs):
    def __init__(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).__init__()

        self.s_dim = 12
        self.a_dim = 6

        self.lr = 1e-4 * 1
        self.batch_size = 32
        self.gamma = 0.99
        self.memory_size = 200000
        self.eps_max = 0.7
        self.eps_min = 0.02
        self.epsilon = 0.7
        self.init_sampling = 4000
        self.target_update_interval = 10

        self.data_file_qnet = DATA_FILE_QNET
        self.data_file_qnet_target = DATA_FILE_QNET_TARGET
        self.score_file = SCORE_FILE

        self.qnetwork = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                                                  output_dim=self.a_dim,
                                                  num_neurons=[256, 128, 64],
                                                  hidden_act_func='ReLU',
                                                  out_act_func='Identity').to(device)

        self.qnetwork_target = NaiveMultiLayerPerceptron(input_dim=self.s_dim,
                                                         output_dim=self.a_dim,
                                                         num_neurons=[256, 128, 64],
                                                         hidden_act_func='ReLU',
                                                         out_act_func='Identity').to(device)

        if os.path.isfile(self.data_file_qnet + '.pt'):
            self.qnetwork.load_state_dict(torch.load(self.data_file_qnet + '.pt', map_location=torch.device('cpu')))

        if os.path.isfile(self.data_file_qnet_target + '.pt'):
            self.qnetwork_target.load_state_dict(torch.load(self.data_file_qnet_target + '.pt', map_location=torch.device('cpu')))

        # initialize target network same as the main network.
        self.qnetwork_target.load_state_dict(self.qnetwork.state_dict())

        self.dqn = DQN(state_dim=self.s_dim,
                       action_dim=self.a_dim,
                       qnet=self.qnetwork,
                       qnet_target=self.qnetwork_target,
                       lr=self.lr,
                       gamma=self.gamma,
                       epsilon=self.epsilon).to(device)

        self.memory = ExperienceReplayMemory(self.memory_size)

        self.print_every = 1
        self.cum_reward = 0
        self.cum_loss = 0
        self.episode_count = 0

        self.new_game()

    def reset(self):
        super(TerranRLAgentWithRawActsAndRawObs, self).reset()
        self.new_game()

    def new_game(self):
        self.base_top_left = None
        self.previous_state = None
        self.previous_action = None
        self.cum_reward = 0
        self.cum_loss = 0

        # epsilon scheduling
        # slowly decaying_epsilon
        self.epsilon = max(self.eps_min, self.eps_max - self.eps_min * (self.episode_count / 50))
        self.dqn.epsilon = torch.tensor(self.epsilon).to(device)

    def get_state(self, obs):
        scvs = self.get_my_units_by_type(obs, units.Terran.SCV)
        idle_scvs = [scv for scv in scvs if scv.order_length == 0]
        command_centers = self.get_my_units_by_type(obs, units.Terran.CommandCenter)
        supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
        completed_supply_depots = self.get_my_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
        completed_barrackses = self.get_my_completed_units_by_type(
            obs, units.Terran.Barracks)
        marines = self.get_my_units_by_type(obs, units.Terran.Marine)

        queued_marines = (completed_barrackses[0].order_length
                          if len(completed_barrackses) > 0 else 0)

        free_supply = (obs.observation.player.food_cap -
                       obs.observation.player.food_used)
        #         can_afford_supply_depot = obs.observation.player.minerals >= 100
        #         can_afford_barracks = obs.observation.player.minerals >= 150
        can_afford_marine = obs.observation.player.minerals >= 100
        too_much_minerals = obs.observation.player.minerals >= 2000
        minerals_size = round(obs.observation.player.minerals / 10, 1)

        enemy_scvs = self.get_enemy_units_by_type(obs, units.Terran.SCV)
        enemy_idle_scvs = [scv for scv in enemy_scvs if scv.order_length == 0]
        enemy_command_centers = self.get_enemy_units_by_type(
            obs, units.Terran.CommandCenter)
        enemy_supply_depots = self.get_enemy_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_completed_supply_depots = self.get_enemy_completed_units_by_type(
            obs, units.Terran.SupplyDepot)
        enemy_barrackses = self.get_enemy_units_by_type(obs, units.Terran.Barracks)
        enemy_completed_barrackses = self.get_enemy_completed_units_by_type(
            obs, units.Terran.Barracks)
        enemy_Factory = self.get_enemy_units_by_type(obs, units.Terran.Factory)
        enemy_Starport = self.get_enemy_units_by_type(obs, units.Terran.Starport)
        enemy_Bunker = self.get_enemy_units_by_type(obs, units.Terran.Bunker)

        enemy_marines = self.get_enemy_units_by_type(obs, units.Terran.Marine)
        enemy_Marauder = self.get_enemy_units_by_type(obs, units.Terran.Marauder)
        enemy_Reaper = self.get_enemy_units_by_type(obs, units.Terran.Reaper)
        enemy_Hellion = self.get_enemy_units_by_type(obs, units.Terran.Hellion)
        enemy_Hellbat = self.get_enemy_units_by_type(obs, units.Terran.Hellbat)
        enemy_SiegeTank = self.get_enemy_units_by_type(obs, units.Terran.SiegeTank)
        enemy_Cyclone = self.get_enemy_units_by_type(obs, units.Terran.Cyclone)
        enemy_WidowMine = self.get_enemy_units_by_type(obs, units.Terran.WidowMine)
        enemy_Thor = self.get_enemy_units_by_type(obs, units.Terran.Thor)
        enemy_Viking = self.get_enemy_units_by_type(obs, units.Terran.VikingAssault)
        enemy_Medivac = self.get_enemy_units_by_type(obs, units.Terran.Medivac)
        enemy_Liberator = self.get_enemy_units_by_type(obs, units.Terran.Liberator)
        enemy_Raven = self.get_enemy_units_by_type(obs, units.Terran.Raven)
        enemy_Battlecruiser = self.get_enemy_units_by_type(obs, units.Terran.Battlecruiser)
        enemy_land_count = len(enemy_marines) + len(enemy_Marauder) + len(enemy_Reaper) + len(enemy_Hellion) + \
                           len(enemy_Hellbat) + len(enemy_SiegeTank) + len(enemy_Cyclone) + len(enemy_WidowMine) + len(enemy_Thor)
        enemy_air_count = len(enemy_Viking) + len(enemy_Medivac) + len(enemy_Medivac) + len(enemy_Liberator) + len(enemy_Raven) + len(enemy_Battlecruiser)
        enemy_total_count = enemy_land_count + enemy_air_count
        enemy_center = self.get_enemy_units_by_type(obs, units.Terran.CommandCenter)

        killed_unit_count = obs.observation.score_cumulative.killed_value_units
        killed_building_count = obs.observation.score_cumulative.killed_value_structures
        collected_minerals = obs.observation.score_cumulative.collected_minerals
        spent_minerals = obs.observation.score_cumulative.spent_minerals
        obs.observation
        idle_worker_time = obs.observation.score_cumulative.idle_worker_time
        idle_production_time = obs.observation.score_cumulative.idle_production_time

#         if(enemy_total_count > 0):
#             print(enemy_total_count)
        #         print("barracks : ", len(barrackses), " supply : ", len(supply_depots))
        #         print("free_supply : ", free_supply, " marines : ", len(marines))
        #         print("supply : ", len(supply_depots), " complete : ", len(completed_supply_depots))

        return (
            #                 len(idle_scvs),
#             len(scvs),
            len(supply_depots),
            len(barrackses),
            len(marines),
            round(obs.observation.player.minerals / 10, 0),
            round(spent_minerals / 10, 0),
            #                 too_much_minerals,
            idle_production_time,
            killed_unit_count,
            killed_building_count,
            len(enemy_center),
            len(enemy_scvs),
            len(enemy_supply_depots),
#             len(enemy_barrackses),
#             len(enemy_Factory),
#             len(enemy_Bunker),
            enemy_total_count
        )

    #         return (
    #                 len(scvs),
    # #                 len(idle_scvs),
    #                 len(supply_depots),
    #                 len(barrackses),
    #                 len(marines),
    # #                 queued_marines,
    #                 free_supply,
    # #                 can_afford_marine,
    # #                 too_much_minerals,
    # #                 collected_minerals,
    #                 spent_minerals,
    # #                 idle_production_time,
    #                 killed_unit_count,
    #                 killed_building_count,
    #                 idle_worker_time,
    #                 idle_production_time,
    #                 len(enemy_command_centers),
    #                 len(enemy_scvs),
    #                 len(enemy_supply_depots),
    #                 len(enemy_barrackses),
    #                 len(enemy_marines),
    # #                 len(enemy_Marauder),
    # #                 len(enemy_Reaper),
    #                 len(enemy_Factory),
    # #                 len(enemy_Hellion) + len(enemy_Hellbat),
    # #                 len(enemy_SiegeTank),
    # #                 len(enemy_Cyclone),
    # #                 len(enemy_WidowMine),
    # #                 len(enemy_Thor),
    #                 len(enemy_Starport),
    # #                 len(enemy_Viking),
    # #                 len(enemy_Medivac),
    # #                 len(enemy_Liberator),
    # #                 len(enemy_Raven),
    # #                 len(enemy_Battlecruiser),
    #                 enemy_land_count,
    #                 enemy_total_count
    # #                 enemy_air_count,
    #                 )

    # return (
    # len(command_centers),
    #         len(scvs),
    #         len(idle_scvs),
    #         len(supply_depots),
    #         # len(completed_supply_depots),
    #         len(barrackses),
    #         # len(completed_barrackses),
    #         len(marines),
    #         queued_marines,
    #         free_supply,
    #         # can_afford_supply_depot,
    #         # can_afford_barracks,
    #         can_afford_marine,
    #         too_much_minerals,
    #         minerals_size,
    #         len(enemy_command_centers),
    #         len(enemy_scvs),
    #         # len(enemy_idle_scvs),
    #         len(enemy_supply_depots),
    #         # len(enemy_completed_supply_depots),
    #         len(enemy_barrackses),
    #         # len(enemy_completed_barrackses),
    #         len(enemy_marines),
    #         killed_unit_count,
    #         killed_building_count
    #         )

    def step(self, obs):
        super(TerranRLAgentWithRawActsAndRawObs, self).step(obs)

        # time.sleep(0.5)

        state_org = self.get_state(obs)

        state = torch.tensor(state_org).float().view(1, self.s_dim).to(device)
        action_idx = self.dqn.choose_action(state)
        action = self.actions[action_idx]
        done = True if obs.last() else False

        if self.previous_action is not None:
            experience = (self.previous_state.to(device),
                          torch.tensor(self.previous_action).view(1, 1).to(device),
                          torch.tensor(obs.reward).view(1, 1).to(device),
                          state.to(device),
                          torch.tensor(done).view(1, 1).to(device))
            self.memory.push(experience)

        # return (
        # 0         len(command_centers),
        # 1         len(scvs),
        # 2         len(idle_scvs),
        # 3         len(supply_depots),
        # 4         len(barrackses),
        # 5         len(marines),
        # 6         queued_marines,
        # 7         free_supply,
        # 8         can_afford_marine,
        # 9         too_much_minerals,
        # 10         minerals_size,
        # 11         len(enemy_command_centers),
        # 12         len(enemy_scvs),
        # 13        len(enemy_supply_depots),
        # 14         len(enemy_barrackses),
        # 15         len(enemy_marines),
        # 16         killed_unit_count,
        # 17         killed_building_count
        # 18         )

        self.previous_state = state
        self.previous_action = action_idx
        self.cum_reward = obs.reward

        if obs.last():
            supply_depots = self.get_my_units_by_type(obs, units.Terran.SupplyDepot)
            marines = self.get_my_units_by_type(obs, units.Terran.Marine)
            barrackses = self.get_my_units_by_type(obs, units.Terran.Barracks)
            print("barracks : ", len(barrackses), " supply : ", len(supply_depots))
            print("marines : ", len(marines))
            self.episode_count = self.episode_count + 1

            if len(self.memory) >= self.init_sampling:
                # training dqn
                sampled_exps = self.memory.sample(self.batch_size)
                sampled_exps = prepare_training_inputs(sampled_exps, device)
                self.dqn.learn(*sampled_exps)

            if self.episode_count % self.target_update_interval == 0:
                self.dqn.qnet_target.load_state_dict(self.dqn.qnet.state_dict())

            if self.episode_count % self.print_every == 0:
                msg = (self.episode_count, self.cum_reward, self.epsilon)
                print("Episode : {:4.0f} | Cumulative Reward : {:4.0f} | Epsilon : {:.3f}".format(*msg))

            torch.save(self.dqn.qnet.state_dict(), self.data_file_qnet + '.pt')
            torch.save(self.dqn.qnet_target.state_dict(), self.data_file_qnet_target + '.pt')

            scores_window.append(obs.reward)  # save most recent reward
            win_rate = scores_window.count(1) / len(scores_window) * 100
            tie_rate = scores_window.count(0) / len(scores_window) * 100
            lost_rate = scores_window.count(-1) / len(scores_window) * 100

            scores.append([win_rate, tie_rate, lost_rate])  # save most recent score(win_rate, tie_rate, lost_rate)
            with open(self.score_file + '.txt', "wb") as fp:
                pickle.dump(scores, fp)
            #                 print(np.array(scores))

            # writer.add_scalar("Loss/train", self.cum_loss/obs.observation.game_loop, self.episode_count)
            # writer.add_scalar("Score", self.cum_reward, self.episode_count)

        return getattr(self, action)(obs)
    
if __name__ == "__main__":
  app.run(main)

I0924 00:05:48.254783 140688196453760 sc_process.py:135] Launching SC2: /home/jupyter/StarCraftII/Versions/Base59877/SC2_x64 -listen 127.0.0.1 -port 24377 -dataDir /home/jupyter/StarCraftII/ -tempDir /tmp/sc-3x6i_o00/
I0924 00:05:48.341655 140688196453760 remote_controller.py:167] Connecting to: ws://127.0.0.1:24377/sc2api, attempt: 0, running: True


Bot
[Agent(race=[<Race.terran: 1>], name='<unknown>'), Bot(race=[<Race.terran: 1>], difficulty=<Difficulty.medium: 3>, build=[<BotBuild.random: 1>])]


I0924 00:05:49.345455 140688196453760 remote_controller.py:167] Connecting to: ws://127.0.0.1:24377/sc2api, attempt: 1, running: True
I0924 00:05:50.348520 140688196453760 remote_controller.py:167] Connecting to: ws://127.0.0.1:24377/sc2api, attempt: 2, running: True
I0924 00:05:56.286703 140688196453760 sc2_env.py:314] Environment is ready
I0924 00:05:56.297224 140688196453760 sc2_env.py:507] Starting episode 1: [terran, terran] on Simple64
I0924 00:07:49.534119 140688196453760 sc2_env.py:725] Episode 1 finished after 16080 game steps. Outcome: [-1], reward: [-1], score: [65]


barracks :  0  supply :  0
marines :  0
Episode :    1 | Cumulative Reward :   -1 | Epsilon : 0.900


I0924 00:07:52.878456 140688196453760 sc2_env.py:507] Starting episode 2: [terran, terran] on Simple64
I0924 00:09:55.672523 140688196453760 sc2_env.py:725] Episode 2 finished after 17880 game steps. Outcome: [-1], reward: [-1], score: [260]


barracks :  0  supply :  0
marines :  0
Episode :    2 | Cumulative Reward :   -1 | Epsilon : 0.899


I0924 00:09:59.043756 140688196453760 sc2_env.py:507] Starting episode 3: [terran, terran] on Simple64
I0924 00:11:37.552018 140688196453760 sc2_env.py:725] Episode 3 finished after 14560 game steps. Outcome: [-1], reward: [-1], score: [290]


barracks :  0  supply :  0
marines :  0
Episode :    3 | Cumulative Reward :   -1 | Epsilon : 0.899


I0924 00:11:40.897291 140688196453760 sc2_env.py:507] Starting episode 4: [terran, terran] on Simple64
I0924 00:14:06.955365 140688196453760 sc2_env.py:725] Episode 4 finished after 20080 game steps. Outcome: [-1], reward: [-1], score: [665]


barracks :  0  supply :  0
marines :  0
Episode :    4 | Cumulative Reward :   -1 | Epsilon : 0.898


I0924 00:14:10.306294 140688196453760 sc2_env.py:507] Starting episode 5: [terran, terran] on Simple64
I0924 00:16:57.809493 140688196453760 sc2_env.py:725] Episode 5 finished after 20872 game steps. Outcome: [-1], reward: [-1], score: [245]


barracks :  0  supply :  0
marines :  0
Episode :    5 | Cumulative Reward :   -1 | Epsilon : 0.898


I0924 00:17:01.138025 140688196453760 sc2_env.py:507] Starting episode 6: [terran, terran] on Simple64
I0924 00:19:40.189448 140688196453760 sc2_env.py:725] Episode 6 finished after 19960 game steps. Outcome: [-1], reward: [-1], score: [160]


barracks :  0  supply :  0
marines :  0
Episode :    6 | Cumulative Reward :   -1 | Epsilon : 0.897


I0924 00:19:43.590170 140688196453760 sc2_env.py:507] Starting episode 7: [terran, terran] on Simple64
I0924 00:21:39.152258 140688196453760 sc2_env.py:725] Episode 7 finished after 16376 game steps. Outcome: [-1], reward: [-1], score: [635]


barracks :  0  supply :  0
marines :  0
Episode :    7 | Cumulative Reward :   -1 | Epsilon : 0.896


I0924 00:21:42.525231 140688196453760 sc2_env.py:507] Starting episode 8: [terran, terran] on Simple64
I0924 00:21:53.282989 140688196453760 sc2_env.py:752] Environment Close
I0924 00:21:53.285248 140688196453760 sc_process.py:232] Shutdown gracefully.
I0924 00:21:53.285917 140688196453760 sc_process.py:210] Shutdown with return code: -2
I0924 00:23:43.155565 140688196453760 sc2_env.py:725] Episode 8 finished after 16672 game steps. Outcome: [-1], reward: [-1], score: [145]


barracks :  0  supply :  0
marines :  0
Episode :    8 | Cumulative Reward :   -1 | Epsilon : 0.896


I0924 00:23:46.504496 140688196453760 sc2_env.py:507] Starting episode 9: [terran, terran] on Simple64
I0924 00:28:01.751894 140688196453760 sc2_env.py:725] Episode 9 finished after 28800 game steps. Outcome: [0], reward: [0], score: [10555]


barracks :  6  supply :  19
marines :  133
Episode :    9 | Cumulative Reward :    0 | Epsilon : 0.895


I0924 00:28:05.152161 140688196453760 sc2_env.py:507] Starting episode 10: [terran, terran] on Simple64
I0924 00:30:13.046583 140688196453760 sc2_env.py:725] Episode 10 finished after 17528 game steps. Outcome: [-1], reward: [-1], score: [270]


barracks :  0  supply :  0
marines :  0
Episode :   10 | Cumulative Reward :   -1 | Epsilon : 0.895


I0924 00:30:16.380347 140688196453760 sc2_env.py:507] Starting episode 11: [terran, terran] on Simple64
I0924 00:30:57.002555 140688196453760 sc2_env.py:725] Episode 11 finished after 7712 game steps. Outcome: [1], reward: [1], score: [3710]


barracks :  5  supply :  6
marines :  23
Episode :   11 | Cumulative Reward :    1 | Epsilon : 0.894


I0924 00:31:00.350357 140688196453760 sc2_env.py:507] Starting episode 12: [terran, terran] on Simple64
I0924 00:32:11.089972 140688196453760 sc2_env.py:725] Episode 12 finished after 11512 game steps. Outcome: [-1], reward: [-1], score: [680]


barracks :  0  supply :  0
marines :  0
Episode :   12 | Cumulative Reward :   -1 | Epsilon : 0.893


I0924 00:32:14.449375 140688196453760 sc2_env.py:507] Starting episode 13: [terran, terran] on Simple64
I0924 00:33:06.898912 140688196453760 sc2_env.py:725] Episode 13 finished after 9432 game steps. Outcome: [1], reward: [1], score: [4345]


barracks :  5  supply :  7
marines :  32
Episode :   13 | Cumulative Reward :    1 | Epsilon : 0.893


I0924 00:33:10.236108 140688196453760 sc2_env.py:507] Starting episode 14: [terran, terran] on Simple64
I0924 00:34:49.106558 140688196453760 sc2_env.py:725] Episode 14 finished after 14456 game steps. Outcome: [-1], reward: [-1], score: [860]


barracks :  0  supply :  0
marines :  0
Episode :   14 | Cumulative Reward :   -1 | Epsilon : 0.892


I0924 00:34:52.481085 140688196453760 sc2_env.py:507] Starting episode 15: [terran, terran] on Simple64
I0924 00:36:37.413783 140688196453760 sc2_env.py:725] Episode 15 finished after 15128 game steps. Outcome: [-1], reward: [-1], score: [140]


barracks :  0  supply :  0
marines :  0
Episode :   15 | Cumulative Reward :   -1 | Epsilon : 0.892


I0924 00:36:40.768701 140688196453760 sc2_env.py:507] Starting episode 16: [terran, terran] on Simple64
I0924 00:38:21.476430 140688196453760 sc2_env.py:725] Episode 16 finished after 14712 game steps. Outcome: [-1], reward: [-1], score: [220]


barracks :  0  supply :  0
marines :  0
Episode :   16 | Cumulative Reward :   -1 | Epsilon : 0.891


I0924 00:38:24.906723 140688196453760 sc2_env.py:507] Starting episode 17: [terran, terran] on Simple64
I0924 00:39:13.240166 140688196453760 sc2_env.py:725] Episode 17 finished after 8680 game steps. Outcome: [1], reward: [1], score: [4010]


barracks :  5  supply :  6
marines :  29
Episode :   17 | Cumulative Reward :    1 | Epsilon : 0.890


I0924 00:39:16.578634 140688196453760 sc2_env.py:507] Starting episode 18: [terran, terran] on Simple64
I0924 00:40:02.744765 140688196453760 sc2_env.py:725] Episode 18 finished after 8328 game steps. Outcome: [1], reward: [1], score: [4355]


barracks :  4  supply :  7
marines :  37
Episode :   18 | Cumulative Reward :    1 | Epsilon : 0.890


I0924 00:40:06.110075 140688196453760 sc2_env.py:507] Starting episode 19: [terran, terran] on Simple64
I0924 00:41:41.569361 140688196453760 sc2_env.py:725] Episode 19 finished after 14176 game steps. Outcome: [-1], reward: [-1], score: [905]


barracks :  0  supply :  0
marines :  0
Episode :   19 | Cumulative Reward :   -1 | Epsilon : 0.889


I0924 00:41:44.901389 140688196453760 sc2_env.py:507] Starting episode 20: [terran, terran] on Simple64
I0924 00:43:46.139378 140688196453760 sc2_env.py:725] Episode 20 finished after 17488 game steps. Outcome: [-1], reward: [-1], score: [50]


barracks :  0  supply :  0
marines :  0
Episode :   20 | Cumulative Reward :   -1 | Epsilon : 0.889


I0924 00:43:49.469394 140688196453760 sc2_env.py:507] Starting episode 21: [terran, terran] on Simple64
I0924 00:45:01.227105 140688196453760 sc2_env.py:725] Episode 21 finished after 12488 game steps. Outcome: [1], reward: [1], score: [4370]


barracks :  6  supply :  7
marines :  32
Episode :   21 | Cumulative Reward :    1 | Epsilon : 0.888


I0924 00:45:04.549297 140688196453760 sc2_env.py:507] Starting episode 22: [terran, terran] on Simple64
I0924 00:47:07.380659 140688196453760 sc2_env.py:725] Episode 22 finished after 17064 game steps. Outcome: [-1], reward: [-1], score: [855]


barracks :  0  supply :  0
marines :  0
Episode :   22 | Cumulative Reward :   -1 | Epsilon : 0.887


I0924 00:47:10.722633 140688196453760 sc2_env.py:507] Starting episode 23: [terran, terran] on Simple64
I0924 00:48:46.691484 140688196453760 sc2_env.py:725] Episode 23 finished after 14464 game steps. Outcome: [-1], reward: [-1], score: [610]


barracks :  0  supply :  0
marines :  0
Episode :   23 | Cumulative Reward :   -1 | Epsilon : 0.887


I0924 00:48:50.024165 140688196453760 sc2_env.py:507] Starting episode 24: [terran, terran] on Simple64
I0924 00:52:26.216886 140688196453760 sc2_env.py:725] Episode 25 finished after 15240 game steps. Outcome: [-1], reward: [-1], score: [240]


barracks :  0  supply :  0
marines :  0
Episode :   25 | Cumulative Reward :   -1 | Epsilon : 0.886


I0924 00:52:29.541810 140688196453760 sc2_env.py:507] Starting episode 26: [terran, terran] on Simple64
I0924 00:53:29.506673 140688196453760 sc2_env.py:725] Episode 26 finished after 10496 game steps. Outcome: [1], reward: [1], score: [4265]


barracks :  6  supply :  7
marines :  30
Episode :   26 | Cumulative Reward :    1 | Epsilon : 0.885


I0924 00:53:32.859525 140688196453760 sc2_env.py:507] Starting episode 27: [terran, terran] on Simple64
I0924 00:55:06.388327 140688196453760 sc2_env.py:725] Episode 27 finished after 13768 game steps. Outcome: [-1], reward: [-1], score: [465]


barracks :  0  supply :  0
marines :  0
Episode :   27 | Cumulative Reward :   -1 | Epsilon : 0.884


I0924 00:55:09.720254 140688196453760 sc2_env.py:507] Starting episode 28: [terran, terran] on Simple64
I0924 00:59:07.193279 140688196453760 sc2_env.py:725] Episode 28 finished after 28800 game steps. Outcome: [0], reward: [0], score: [10585]


barracks :  8  supply :  19
marines :  128
Episode :   28 | Cumulative Reward :    0 | Epsilon : 0.884


I0924 00:59:10.558569 140688196453760 sc2_env.py:507] Starting episode 29: [terran, terran] on Simple64
I0924 01:00:59.193709 140688196453760 sc2_env.py:725] Episode 29 finished after 15712 game steps. Outcome: [-1], reward: [-1], score: [795]


barracks :  0  supply :  0
marines :  0
Episode :   29 | Cumulative Reward :   -1 | Epsilon : 0.883


I0924 01:01:02.565574 140688196453760 sc2_env.py:507] Starting episode 30: [terran, terran] on Simple64
I0924 01:02:15.670055 140688196453760 sc2_env.py:725] Episode 30 finished after 12280 game steps. Outcome: [-1], reward: [-1], score: [405]


barracks :  0  supply :  0
marines :  0
Episode :   30 | Cumulative Reward :   -1 | Epsilon : 0.883


I0924 01:02:19.019183 140688196453760 sc2_env.py:507] Starting episode 31: [terran, terran] on Simple64
I0924 01:03:08.369893 140688196453760 sc2_env.py:725] Episode 31 finished after 9024 game steps. Outcome: [1], reward: [1], score: [3900]


barracks :  4  supply :  6
marines :  29
Episode :   31 | Cumulative Reward :    1 | Epsilon : 0.882


I0924 01:03:11.718670 140688196453760 sc2_env.py:507] Starting episode 32: [terran, terran] on Simple64
I0924 01:05:02.703867 140688196453760 sc2_env.py:725] Episode 32 finished after 15480 game steps. Outcome: [-1], reward: [-1], score: [465]


barracks :  0  supply :  0
marines :  0
Episode :   32 | Cumulative Reward :   -1 | Epsilon : 0.881


I0924 01:05:06.038969 140688196453760 sc2_env.py:507] Starting episode 33: [terran, terran] on Simple64
I0924 01:05:49.240981 140688196453760 sc2_env.py:725] Episode 33 finished after 8120 game steps. Outcome: [1], reward: [1], score: [3835]


barracks :  4  supply :  6
marines :  27
Episode :   33 | Cumulative Reward :    1 | Epsilon : 0.881


I0924 01:05:52.607098 140688196453760 sc2_env.py:507] Starting episode 34: [terran, terran] on Simple64
I0924 01:08:03.446643 140688196453760 sc2_env.py:725] Episode 34 finished after 17616 game steps. Outcome: [-1], reward: [-1], score: [530]


barracks :  0  supply :  0
marines :  0
Episode :   34 | Cumulative Reward :   -1 | Epsilon : 0.880


I0924 01:08:06.801508 140688196453760 sc2_env.py:507] Starting episode 35: [terran, terran] on Simple64
I0924 01:09:53.004486 140688196453760 sc2_env.py:725] Episode 35 finished after 15184 game steps. Outcome: [-1], reward: [-1], score: [675]


barracks :  0  supply :  0
marines :  0
Episode :   35 | Cumulative Reward :   -1 | Epsilon : 0.880


I0924 01:09:56.342802 140688196453760 sc2_env.py:507] Starting episode 36: [terran, terran] on Simple64
I0924 01:11:54.405695 140688196453760 sc2_env.py:725] Episode 36 finished after 16960 game steps. Outcome: [-1], reward: [-1], score: [460]


barracks :  0  supply :  0
marines :  0
Episode :   36 | Cumulative Reward :   -1 | Epsilon : 0.879


I0924 01:11:57.762572 140688196453760 sc2_env.py:507] Starting episode 37: [terran, terran] on Simple64
I0924 01:13:47.491535 140688196453760 sc2_env.py:725] Episode 37 finished after 15704 game steps. Outcome: [-1], reward: [-1], score: [250]


barracks :  0  supply :  0
marines :  0
Episode :   37 | Cumulative Reward :   -1 | Epsilon : 0.878


I0924 01:13:50.858415 140688196453760 sc2_env.py:507] Starting episode 38: [terran, terran] on Simple64
I0924 01:15:51.314871 140688196453760 sc2_env.py:725] Episode 38 finished after 17568 game steps. Outcome: [-1], reward: [-1], score: [885]


barracks :  0  supply :  0
marines :  0
Episode :   38 | Cumulative Reward :   -1 | Epsilon : 0.878


I0924 01:15:54.669905 140688196453760 sc2_env.py:507] Starting episode 39: [terran, terran] on Simple64
I0924 01:16:36.587032 140688196453760 sc2_env.py:725] Episode 39 finished after 7904 game steps. Outcome: [1], reward: [1], score: [3645]


barracks :  3  supply :  6
marines :  27
Episode :   39 | Cumulative Reward :    1 | Epsilon : 0.877


I0924 01:16:39.923399 140688196453760 sc2_env.py:507] Starting episode 40: [terran, terran] on Simple64
I0924 01:18:24.871108 140688196453760 sc2_env.py:725] Episode 40 finished after 15624 game steps. Outcome: [-1], reward: [-1], score: [65]


barracks :  0  supply :  0
marines :  0
Episode :   40 | Cumulative Reward :   -1 | Epsilon : 0.877


I0924 01:18:28.256132 140688196453760 sc2_env.py:507] Starting episode 41: [terran, terran] on Simple64
I0924 01:19:47.423812 140688196453760 sc2_env.py:725] Episode 41 finished after 12520 game steps. Outcome: [-1], reward: [-1], score: [430]


barracks :  0  supply :  0
marines :  0
Episode :   41 | Cumulative Reward :   -1 | Epsilon : 0.876


I0924 01:19:50.802155 140688196453760 sc2_env.py:507] Starting episode 42: [terran, terran] on Simple64
I0924 01:23:57.462161 140688196453760 sc2_env.py:725] Episode 42 finished after 28800 game steps. Outcome: [0], reward: [0], score: [11345]


barracks :  5  supply :  20
marines :  150
Episode :   42 | Cumulative Reward :    0 | Epsilon : 0.875


I0924 01:24:00.861050 140688196453760 sc2_env.py:507] Starting episode 43: [terran, terran] on Simple64
I0924 01:25:30.920643 140688196453760 sc2_env.py:725] Episode 43 finished after 14200 game steps. Outcome: [1], reward: [1], score: [4685]


barracks :  6  supply :  7
marines :  38
Episode :   43 | Cumulative Reward :    1 | Epsilon : 0.875


I0924 01:25:34.258073 140688196453760 sc2_env.py:507] Starting episode 44: [terran, terran] on Simple64
I0924 01:27:17.090780 140688196453760 sc2_env.py:725] Episode 44 finished after 14792 game steps. Outcome: [-1], reward: [-1], score: [600]


barracks :  0  supply :  0
marines :  0
Episode :   44 | Cumulative Reward :   -1 | Epsilon : 0.874


I0924 01:27:20.425814 140688196453760 sc2_env.py:507] Starting episode 45: [terran, terran] on Simple64
I0924 01:28:48.635400 140688196453760 sc2_env.py:725] Episode 45 finished after 13704 game steps. Outcome: [-1], reward: [-1], score: [65]


barracks :  0  supply :  0
marines :  0
Episode :   45 | Cumulative Reward :   -1 | Epsilon : 0.874


I0924 01:28:51.956959 140688196453760 sc2_env.py:507] Starting episode 46: [terran, terran] on Simple64
I0924 01:30:33.885133 140688196453760 sc2_env.py:725] Episode 46 finished after 14872 game steps. Outcome: [-1], reward: [-1], score: [400]


barracks :  0  supply :  0
marines :  0
Episode :   46 | Cumulative Reward :   -1 | Epsilon : 0.873


I0924 01:30:37.215812 140688196453760 sc2_env.py:507] Starting episode 47: [terran, terran] on Simple64
I0924 01:32:44.154440 140688196453760 sc2_env.py:725] Episode 47 finished after 18384 game steps. Outcome: [-1], reward: [-1], score: [310]


barracks :  0  supply :  0
marines :  0
Episode :   47 | Cumulative Reward :   -1 | Epsilon : 0.872


I0924 01:32:47.498523 140688196453760 sc2_env.py:507] Starting episode 48: [terran, terran] on Simple64
I0924 01:34:27.072413 140688196453760 sc2_env.py:725] Episode 48 finished after 15040 game steps. Outcome: [-1], reward: [-1], score: [330]


barracks :  0  supply :  0
marines :  0
Episode :   48 | Cumulative Reward :   -1 | Epsilon : 0.872


I0924 01:34:30.409826 140688196453760 sc2_env.py:507] Starting episode 49: [terran, terran] on Simple64
I0924 01:36:15.091205 140688196453760 sc2_env.py:725] Episode 49 finished after 15160 game steps. Outcome: [-1], reward: [-1], score: [140]


barracks :  0  supply :  0
marines :  0
Episode :   49 | Cumulative Reward :   -1 | Epsilon : 0.871


I0924 01:36:18.422502 140688196453760 sc2_env.py:507] Starting episode 50: [terran, terran] on Simple64
I0924 01:37:02.988756 140688196453760 sc2_env.py:725] Episode 50 finished after 8280 game steps. Outcome: [1], reward: [1], score: [3885]


barracks :  5  supply :  6
marines :  26
Episode :   50 | Cumulative Reward :    1 | Epsilon : 0.871


I0924 01:37:06.317813 140688196453760 sc2_env.py:507] Starting episode 51: [terran, terran] on Simple64
I0924 01:38:21.091768 140688196453760 sc2_env.py:725] Episode 51 finished after 12360 game steps. Outcome: [-1], reward: [-1], score: [260]


barracks :  0  supply :  0
marines :  0
Episode :   51 | Cumulative Reward :   -1 | Epsilon : 0.870


I0924 01:38:24.412832 140688196453760 sc2_env.py:507] Starting episode 52: [terran, terran] on Simple64
I0924 01:42:40.919470 140688196453760 sc2_env.py:725] Episode 52 finished after 28800 game steps. Outcome: [0], reward: [0], score: [11505]


barracks :  5  supply :  21
marines :  150
Episode :   52 | Cumulative Reward :    0 | Epsilon : 0.869


I0924 01:42:44.299155 140688196453760 sc2_env.py:507] Starting episode 53: [terran, terran] on Simple64
I0924 01:43:58.762134 140688196453760 sc2_env.py:725] Episode 53 finished after 12920 game steps. Outcome: [1], reward: [1], score: [5285]


barracks :  5  supply :  9
marines :  48
Episode :   53 | Cumulative Reward :    1 | Epsilon : 0.869


I0924 01:44:02.083584 140688196453760 sc2_env.py:507] Starting episode 54: [terran, terran] on Simple64
I0924 01:45:33.359652 140688196453760 sc2_env.py:725] Episode 54 finished after 13496 game steps. Outcome: [-1], reward: [-1], score: [525]


barracks :  0  supply :  0
marines :  0
Episode :   54 | Cumulative Reward :   -1 | Epsilon : 0.868


I0924 01:45:36.678530 140688196453760 sc2_env.py:507] Starting episode 55: [terran, terran] on Simple64
I0924 01:49:24.900091 140688196453760 sc2_env.py:725] Episode 55 finished after 28800 game steps. Outcome: [0], reward: [0], score: [9270]


barracks :  5  supply :  17
marines :  113
Episode :   55 | Cumulative Reward :    0 | Epsilon : 0.868


I0924 01:49:28.278054 140688196453760 sc2_env.py:507] Starting episode 56: [terran, terran] on Simple64
I0924 01:51:19.623864 140688196453760 sc2_env.py:725] Episode 56 finished after 15784 game steps. Outcome: [-1], reward: [-1], score: [550]


barracks :  0  supply :  0
marines :  0
Episode :   56 | Cumulative Reward :   -1 | Epsilon : 0.867


I0924 01:51:22.988094 140688196453760 sc2_env.py:507] Starting episode 57: [terran, terran] on Simple64
I0924 01:52:07.262476 140688196453760 sc2_env.py:725] Episode 57 finished after 8032 game steps. Outcome: [1], reward: [1], score: [3870]


barracks :  3  supply :  7
marines :  31
Episode :   57 | Cumulative Reward :    1 | Epsilon : 0.866


I0924 01:52:10.591387 140688196453760 sc2_env.py:507] Starting episode 58: [terran, terran] on Simple64
I0924 01:56:22.125677 140688196453760 sc2_env.py:725] Episode 58 finished after 28800 game steps. Outcome: [0], reward: [0], score: [11170]


barracks :  9  supply :  19
marines :  137
Episode :   58 | Cumulative Reward :    0 | Epsilon : 0.866


I0924 01:56:25.505261 140688196453760 sc2_env.py:507] Starting episode 59: [terran, terran] on Simple64
I0924 01:58:16.413684 140688196453760 sc2_env.py:725] Episode 59 finished after 15720 game steps. Outcome: [-1], reward: [-1], score: [280]


barracks :  0  supply :  0
marines :  0
Episode :   59 | Cumulative Reward :   -1 | Epsilon : 0.865


I0924 01:58:19.782868 140688196453760 sc2_env.py:507] Starting episode 60: [terran, terran] on Simple64
I0924 02:00:03.028636 140688196453760 sc2_env.py:725] Episode 60 finished after 14624 game steps. Outcome: [-1], reward: [-1], score: [415]


barracks :  0  supply :  0
marines :  0
Episode :   60 | Cumulative Reward :   -1 | Epsilon : 0.865


I0924 02:00:06.376898 140688196453760 sc2_env.py:507] Starting episode 61: [terran, terran] on Simple64
I0924 02:02:01.531120 140688196453760 sc2_env.py:725] Episode 61 finished after 15984 game steps. Outcome: [-1], reward: [-1], score: [280]


barracks :  0  supply :  0
marines :  0
Episode :   61 | Cumulative Reward :   -1 | Epsilon : 0.864


I0924 02:02:04.888613 140688196453760 sc2_env.py:507] Starting episode 62: [terran, terran] on Simple64
I0924 02:04:09.161062 140688196453760 sc2_env.py:725] Episode 62 finished after 16872 game steps. Outcome: [-1], reward: [-1], score: [630]


barracks :  0  supply :  0
marines :  0
Episode :   62 | Cumulative Reward :   -1 | Epsilon : 0.863


I0924 02:04:12.485310 140688196453760 sc2_env.py:507] Starting episode 63: [terran, terran] on Simple64
I0924 02:05:09.475910 140688196453760 sc2_env.py:725] Episode 63 finished after 10008 game steps. Outcome: [1], reward: [1], score: [4745]


barracks :  5  supply :  8
marines :  39
Episode :   63 | Cumulative Reward :    1 | Epsilon : 0.863


I0924 02:05:12.821643 140688196453760 sc2_env.py:507] Starting episode 64: [terran, terran] on Simple64
I0924 02:06:44.753335 140688196453760 sc2_env.py:725] Episode 64 finished after 14296 game steps. Outcome: [-1], reward: [-1], score: [220]


barracks :  0  supply :  0
marines :  0
Episode :   64 | Cumulative Reward :   -1 | Epsilon : 0.862


I0924 02:06:48.094813 140688196453760 sc2_env.py:507] Starting episode 65: [terran, terran] on Simple64
I0924 02:07:40.437304 140688196453760 sc2_env.py:725] Episode 65 finished after 9480 game steps. Outcome: [1], reward: [1], score: [4260]


barracks :  4  supply :  7
marines :  34
Episode :   65 | Cumulative Reward :    1 | Epsilon : 0.862


I0924 02:07:43.794289 140688196453760 sc2_env.py:507] Starting episode 66: [terran, terran] on Simple64
I0924 02:09:38.540277 140688196453760 sc2_env.py:725] Episode 66 finished after 16520 game steps. Outcome: [-1], reward: [-1], score: [1070]


barracks :  0  supply :  0
marines :  0
Episode :   66 | Cumulative Reward :   -1 | Epsilon : 0.861


I0924 02:09:41.902228 140688196453760 sc2_env.py:507] Starting episode 67: [terran, terran] on Simple64
I0924 02:12:34.130064 140688196453760 sc2_env.py:725] Episode 67 finished after 23576 game steps. Outcome: [-1], reward: [-1], score: [475]


barracks :  0  supply :  0
marines :  0
Episode :   67 | Cumulative Reward :   -1 | Epsilon : 0.860


I0924 02:12:37.465913 140688196453760 sc2_env.py:507] Starting episode 68: [terran, terran] on Simple64
I0924 02:14:34.880785 140688196453760 sc2_env.py:725] Episode 68 finished after 17128 game steps. Outcome: [-1], reward: [-1], score: [410]


barracks :  0  supply :  0
marines :  0
Episode :   68 | Cumulative Reward :   -1 | Epsilon : 0.860


I0924 02:14:38.207283 140688196453760 sc2_env.py:507] Starting episode 69: [terran, terran] on Simple64
I0924 02:18:28.387218 140688196453760 sc2_env.py:725] Episode 69 finished after 28800 game steps. Outcome: [0], reward: [0], score: [10325]


barracks :  7  supply :  18
marines :  126
Episode :   69 | Cumulative Reward :    0 | Epsilon : 0.859


I0924 02:18:31.767992 140688196453760 sc2_env.py:507] Starting episode 70: [terran, terran] on Simple64
I0924 02:19:19.409889 140688196453760 sc2_env.py:725] Episode 70 finished after 8568 game steps. Outcome: [1], reward: [1], score: [4250]


barracks :  4  supply :  7
marines :  35
Episode :   70 | Cumulative Reward :    1 | Epsilon : 0.859


I0924 02:19:22.755157 140688196453760 sc2_env.py:507] Starting episode 71: [terran, terran] on Simple64
I0924 02:21:04.438218 140688196453760 sc2_env.py:725] Episode 71 finished after 14920 game steps. Outcome: [-1], reward: [-1], score: [340]


barracks :  0  supply :  0
marines :  0
Episode :   71 | Cumulative Reward :   -1 | Epsilon : 0.858


I0924 02:21:07.786274 140688196453760 sc2_env.py:507] Starting episode 72: [terran, terran] on Simple64
I0924 02:21:58.024603 140688196453760 sc2_env.py:725] Episode 72 finished after 9376 game steps. Outcome: [1], reward: [1], score: [4005]


barracks :  5  supply :  6
marines :  27
Episode :   72 | Cumulative Reward :    1 | Epsilon : 0.857


I0924 02:22:01.316688 140688196453760 sc2_env.py:507] Starting episode 73: [terran, terran] on Simple64
I0924 02:23:37.601741 140688196453760 sc2_env.py:725] Episode 73 finished after 14640 game steps. Outcome: [-1], reward: [-1], score: [75]


barracks :  0  supply :  0
marines :  0
Episode :   73 | Cumulative Reward :   -1 | Epsilon : 0.857


I0924 02:23:40.918682 140688196453760 sc2_env.py:507] Starting episode 74: [terran, terran] on Simple64
I0924 02:24:48.006433 140688196453760 sc2_env.py:725] Episode 74 finished after 10992 game steps. Outcome: [-1], reward: [-1], score: [615]


barracks :  0  supply :  0
marines :  0
Episode :   74 | Cumulative Reward :   -1 | Epsilon : 0.856


I0924 02:24:51.366171 140688196453760 sc2_env.py:507] Starting episode 75: [terran, terran] on Simple64
I0924 02:26:31.980833 140688196453760 sc2_env.py:725] Episode 75 finished after 14840 game steps. Outcome: [-1], reward: [-1], score: [950]


barracks :  0  supply :  0
marines :  0
Episode :   75 | Cumulative Reward :   -1 | Epsilon : 0.856


I0924 02:26:35.294697 140688196453760 sc2_env.py:507] Starting episode 76: [terran, terran] on Simple64
I0924 02:28:52.210974 140688196453760 sc2_env.py:725] Episode 76 finished after 19416 game steps. Outcome: [-1], reward: [-1], score: [205]


barracks :  0  supply :  0
marines :  0
Episode :   76 | Cumulative Reward :   -1 | Epsilon : 0.855


I0924 02:28:55.520056 140688196453760 sc2_env.py:507] Starting episode 77: [terran, terran] on Simple64
I0924 02:30:48.617672 140688196453760 sc2_env.py:725] Episode 77 finished after 14872 game steps. Outcome: [-1], reward: [-1], score: [490]


barracks :  0  supply :  0
marines :  0
Episode :   77 | Cumulative Reward :   -1 | Epsilon : 0.854


I0924 02:30:52.024155 140688196453760 sc2_env.py:507] Starting episode 78: [terran, terran] on Simple64
I0924 02:31:42.872309 140688196453760 sc2_env.py:725] Episode 78 finished after 9120 game steps. Outcome: [1], reward: [1], score: [4230]


barracks :  4  supply :  7
marines :  34
Episode :   78 | Cumulative Reward :    1 | Epsilon : 0.854


I0924 02:31:46.183109 140688196453760 sc2_env.py:507] Starting episode 79: [terran, terran] on Simple64
I0924 02:32:47.391216 140688196453760 sc2_env.py:725] Episode 79 finished after 10960 game steps. Outcome: [1], reward: [1], score: [4210]


barracks :  4  supply :  7
marines :  33
Episode :   79 | Cumulative Reward :    1 | Epsilon : 0.853


I0924 02:32:50.714770 140688196453760 sc2_env.py:507] Starting episode 80: [terran, terran] on Simple64
I0924 02:34:38.034695 140688196453760 sc2_env.py:725] Episode 80 finished after 15552 game steps. Outcome: [-1], reward: [-1], score: [460]


barracks :  0  supply :  0
marines :  0
Episode :   80 | Cumulative Reward :   -1 | Epsilon : 0.853


I0924 02:34:41.330433 140688196453760 sc2_env.py:507] Starting episode 81: [terran, terran] on Simple64
I0924 02:35:20.880143 140688196453760 sc2_env.py:725] Episode 81 finished after 7592 game steps. Outcome: [1], reward: [1], score: [3540]


barracks :  4  supply :  5
marines :  25
Episode :   81 | Cumulative Reward :    1 | Epsilon : 0.852


I0924 02:35:24.185128 140688196453760 sc2_env.py:507] Starting episode 82: [terran, terran] on Simple64
I0924 02:37:00.504823 140688196453760 sc2_env.py:725] Episode 82 finished after 14672 game steps. Outcome: [-1], reward: [-1], score: [55]


barracks :  0  supply :  0
marines :  0
Episode :   82 | Cumulative Reward :   -1 | Epsilon : 0.851


I0924 02:37:03.844034 140688196453760 sc2_env.py:507] Starting episode 83: [terran, terran] on Simple64
I0924 02:38:40.643601 140688196453760 sc2_env.py:725] Episode 83 finished after 14352 game steps. Outcome: [-1], reward: [-1], score: [245]


barracks :  0  supply :  0
marines :  0
Episode :   83 | Cumulative Reward :   -1 | Epsilon : 0.851


I0924 02:38:43.983992 140688196453760 sc2_env.py:507] Starting episode 84: [terran, terran] on Simple64
I0924 02:40:40.437494 140688196453760 sc2_env.py:725] Episode 84 finished after 15328 game steps. Outcome: [-1], reward: [-1], score: [155]


barracks :  0  supply :  0
marines :  0
Episode :   84 | Cumulative Reward :   -1 | Epsilon : 0.850


I0924 02:40:43.795887 140688196453760 sc2_env.py:507] Starting episode 85: [terran, terran] on Simple64
I0924 02:45:07.981968 140688196453760 sc2_env.py:725] Episode 85 finished after 28800 game steps. Outcome: [0], reward: [0], score: [11090]


barracks :  7  supply :  21
marines :  137
Episode :   85 | Cumulative Reward :    0 | Epsilon : 0.850


I0924 02:45:11.293813 140688196453760 sc2_env.py:507] Starting episode 86: [terran, terran] on Simple64
I0924 02:47:10.899235 140688196453760 sc2_env.py:725] Episode 86 finished after 17048 game steps. Outcome: [-1], reward: [-1], score: [750]


barracks :  0  supply :  0
marines :  0
Episode :   86 | Cumulative Reward :   -1 | Epsilon : 0.849


I0924 02:47:14.183344 140688196453760 sc2_env.py:507] Starting episode 87: [terran, terran] on Simple64
I0924 02:48:08.421528 140688196453760 sc2_env.py:725] Episode 87 finished after 9784 game steps. Outcome: [1], reward: [1], score: [4595]


barracks :  4  supply :  7
marines :  42
Episode :   87 | Cumulative Reward :    1 | Epsilon : 0.848


I0924 02:48:11.722691 140688196453760 sc2_env.py:507] Starting episode 88: [terran, terran] on Simple64
I0924 02:49:43.596871 140688196453760 sc2_env.py:725] Episode 88 finished after 13848 game steps. Outcome: [-1], reward: [-1], score: [785]


barracks :  0  supply :  0
marines :  0
Episode :   88 | Cumulative Reward :   -1 | Epsilon : 0.848


I0924 02:49:46.922973 140688196453760 sc2_env.py:507] Starting episode 89: [terran, terran] on Simple64
I0924 02:51:40.550343 140688196453760 sc2_env.py:725] Episode 89 finished after 16672 game steps. Outcome: [-1], reward: [-1], score: [220]


barracks :  0  supply :  0
marines :  0
Episode :   89 | Cumulative Reward :   -1 | Epsilon : 0.847


I0924 02:51:43.852622 140688196453760 sc2_env.py:507] Starting episode 90: [terran, terran] on Simple64
I0924 02:52:25.644192 140688196453760 sc2_env.py:725] Episode 90 finished after 7976 game steps. Outcome: [1], reward: [1], score: [4150]


barracks :  4  supply :  7
marines :  33
Episode :   90 | Cumulative Reward :    1 | Epsilon : 0.847


I0924 02:52:28.914668 140688196453760 sc2_env.py:507] Starting episode 91: [terran, terran] on Simple64
I0924 02:54:13.123290 140688196453760 sc2_env.py:725] Episode 91 finished after 15272 game steps. Outcome: [-1], reward: [-1], score: [465]


barracks :  0  supply :  0
marines :  0
Episode :   91 | Cumulative Reward :   -1 | Epsilon : 0.846


I0924 02:54:16.436139 140688196453760 sc2_env.py:507] Starting episode 92: [terran, terran] on Simple64
I0924 02:58:21.172040 140688196453760 sc2_env.py:725] Episode 92 finished after 28800 game steps. Outcome: [0], reward: [0], score: [11310]


barracks :  6  supply :  21
marines :  146
Episode :   92 | Cumulative Reward :    0 | Epsilon : 0.845


I0924 02:58:24.507640 140688196453760 sc2_env.py:507] Starting episode 93: [terran, terran] on Simple64
I0924 02:59:07.537445 140688196453760 sc2_env.py:725] Episode 93 finished after 8240 game steps. Outcome: [1], reward: [1], score: [3930]


barracks :  4  supply :  6
marines :  29
Episode :   93 | Cumulative Reward :    1 | Epsilon : 0.845


I0924 02:59:10.826925 140688196453760 sc2_env.py:507] Starting episode 94: [terran, terran] on Simple64
I0924 03:00:05.637501 140688196453760 sc2_env.py:725] Episode 94 finished after 10120 game steps. Outcome: [1], reward: [1], score: [4640]


barracks :  4  supply :  8
marines :  39
Episode :   94 | Cumulative Reward :    1 | Epsilon : 0.844


I0924 03:00:08.946664 140688196453760 sc2_env.py:507] Starting episode 95: [terran, terran] on Simple64
I0924 03:01:21.426710 140688196453760 sc2_env.py:725] Episode 95 finished after 11648 game steps. Outcome: [-1], reward: [-1], score: [95]


barracks :  0  supply :  0
marines :  0
Episode :   95 | Cumulative Reward :   -1 | Epsilon : 0.844


I0924 03:01:24.725493 140688196453760 sc2_env.py:507] Starting episode 96: [terran, terran] on Simple64
I0924 03:02:34.700749 140688196453760 sc2_env.py:725] Episode 96 finished after 12184 game steps. Outcome: [1], reward: [1], score: [5170]


barracks :  7  supply :  8
marines :  41
Episode :   96 | Cumulative Reward :    1 | Epsilon : 0.843


I0924 03:02:38.030495 140688196453760 sc2_env.py:507] Starting episode 97: [terran, terran] on Simple64
I0924 03:04:16.935108 140688196453760 sc2_env.py:725] Episode 97 finished after 14592 game steps. Outcome: [-1], reward: [-1], score: [795]


barracks :  0  supply :  0
marines :  0
Episode :   97 | Cumulative Reward :   -1 | Epsilon : 0.842


I0924 03:04:20.292958 140688196453760 sc2_env.py:507] Starting episode 98: [terran, terran] on Simple64
I0924 03:06:05.397239 140688196453760 sc2_env.py:725] Episode 98 finished after 15040 game steps. Outcome: [-1], reward: [-1], score: [550]


barracks :  0  supply :  0
marines :  0
Episode :   98 | Cumulative Reward :   -1 | Epsilon : 0.842


I0924 03:06:08.699182 140688196453760 sc2_env.py:507] Starting episode 99: [terran, terran] on Simple64
I0924 03:08:02.924302 140688196453760 sc2_env.py:725] Episode 99 finished after 15976 game steps. Outcome: [-1], reward: [-1], score: [810]


barracks :  0  supply :  0
marines :  0
Episode :   99 | Cumulative Reward :   -1 | Epsilon : 0.841


I0924 03:08:06.251214 140688196453760 sc2_env.py:507] Starting episode 100: [terran, terran] on Simple64
I0924 03:09:03.206344 140688196453760 sc2_env.py:725] Episode 100 finished after 10304 game steps. Outcome: [1], reward: [1], score: [4220]


barracks :  5  supply :  6
marines :  34
Episode :  100 | Cumulative Reward :    1 | Epsilon : 0.841


I0924 03:09:06.583736 140688196453760 sc2_env.py:507] Starting episode 101: [terran, terran] on Simple64
I0924 03:11:07.678082 140688196453760 sc2_env.py:725] Episode 101 finished after 17704 game steps. Outcome: [-1], reward: [-1], score: [350]


barracks :  0  supply :  0
marines :  0
Episode :  101 | Cumulative Reward :   -1 | Epsilon : 0.840


I0924 03:11:11.181165 140688196453760 sc2_env.py:507] Starting episode 102: [terran, terran] on Simple64
I0924 03:12:40.767338 140688196453760 sc2_env.py:725] Episode 102 finished after 13720 game steps. Outcome: [-1], reward: [-1], score: [50]


barracks :  0  supply :  0
marines :  0
Episode :  102 | Cumulative Reward :   -1 | Epsilon : 0.839


I0924 03:12:44.124070 140688196453760 sc2_env.py:507] Starting episode 103: [terran, terran] on Simple64
I0924 03:13:24.798406 140688196453760 sc2_env.py:725] Episode 103 finished after 7896 game steps. Outcome: [1], reward: [1], score: [3655]


barracks :  4  supply :  5
marines :  27
Episode :  103 | Cumulative Reward :    1 | Epsilon : 0.839


I0924 03:13:28.143296 140688196453760 sc2_env.py:507] Starting episode 104: [terran, terran] on Simple64
I0924 03:14:15.742690 140688196453760 sc2_env.py:725] Episode 104 finished after 8848 game steps. Outcome: [1], reward: [1], score: [3985]


barracks :  5  supply :  6
marines :  29
Episode :  104 | Cumulative Reward :    1 | Epsilon : 0.838


I0924 03:14:19.092960 140688196453760 sc2_env.py:507] Starting episode 105: [terran, terran] on Simple64
I0924 03:18:11.003206 140688196453760 sc2_env.py:725] Episode 105 finished after 28800 game steps. Outcome: [0], reward: [0], score: [10570]


barracks :  6  supply :  19
marines :  134
Episode :  105 | Cumulative Reward :    0 | Epsilon : 0.838


I0924 03:18:14.345532 140688196453760 sc2_env.py:507] Starting episode 106: [terran, terran] on Simple64
I0924 03:18:57.509944 140688196453760 sc2_env.py:725] Episode 106 finished after 8112 game steps. Outcome: [1], reward: [1], score: [3700]


barracks :  4  supply :  6
marines :  26
Episode :  106 | Cumulative Reward :    1 | Epsilon : 0.837


I0924 03:19:00.800414 140688196453760 sc2_env.py:507] Starting episode 107: [terran, terran] on Simple64
I0924 03:20:45.649810 140688196453760 sc2_env.py:725] Episode 107 finished after 15176 game steps. Outcome: [-1], reward: [-1], score: [675]


barracks :  0  supply :  0
marines :  0
Episode :  107 | Cumulative Reward :   -1 | Epsilon : 0.836


I0924 03:20:49.015803 140688196453760 sc2_env.py:507] Starting episode 108: [terran, terran] on Simple64
I0924 03:21:48.877261 140688196453760 sc2_env.py:725] Episode 108 finished after 10440 game steps. Outcome: [1], reward: [1], score: [4490]


barracks :  7  supply :  6
marines :  35
Episode :  108 | Cumulative Reward :    1 | Epsilon : 0.836


I0924 03:21:52.235280 140688196453760 sc2_env.py:507] Starting episode 109: [terran, terran] on Simple64
I0924 03:23:28.182198 140688196453760 sc2_env.py:725] Episode 109 finished after 14488 game steps. Outcome: [-1], reward: [-1], score: [380]


barracks :  0  supply :  0
marines :  0
Episode :  109 | Cumulative Reward :   -1 | Epsilon : 0.835


I0924 03:23:31.557979 140688196453760 sc2_env.py:507] Starting episode 110: [terran, terran] on Simple64
I0924 03:24:58.542155 140688196453760 sc2_env.py:725] Episode 110 finished after 13624 game steps. Outcome: [-1], reward: [-1], score: [85]


barracks :  0  supply :  0
marines :  0
Episode :  110 | Cumulative Reward :   -1 | Epsilon : 0.835


I0924 03:25:01.885009 140688196453760 sc2_env.py:507] Starting episode 111: [terran, terran] on Simple64
I0924 03:26:41.327616 140688196453760 sc2_env.py:725] Episode 111 finished after 14560 game steps. Outcome: [-1], reward: [-1], score: [875]


barracks :  0  supply :  0
marines :  0
Episode :  111 | Cumulative Reward :   -1 | Epsilon : 0.834


I0924 03:26:44.742012 140688196453760 sc2_env.py:507] Starting episode 112: [terran, terran] on Simple64
I0924 03:27:28.425541 140688196453760 sc2_env.py:725] Episode 112 finished after 8120 game steps. Outcome: [1], reward: [1], score: [4020]


barracks :  4  supply :  6
marines :  32
Episode :  112 | Cumulative Reward :    1 | Epsilon : 0.833


I0924 03:27:31.777240 140688196453760 sc2_env.py:507] Starting episode 113: [terran, terran] on Simple64
I0924 03:29:27.087865 140688196453760 sc2_env.py:725] Episode 113 finished after 16384 game steps. Outcome: [-1], reward: [-1], score: [140]


barracks :  0  supply :  0
marines :  0
Episode :  113 | Cumulative Reward :   -1 | Epsilon : 0.833


I0924 03:29:30.414478 140688196453760 sc2_env.py:507] Starting episode 114: [terran, terran] on Simple64
I0924 03:31:17.375992 140688196453760 sc2_env.py:725] Episode 114 finished after 15408 game steps. Outcome: [-1], reward: [-1], score: [285]


barracks :  0  supply :  0
marines :  0
Episode :  114 | Cumulative Reward :   -1 | Epsilon : 0.832


I0924 03:31:20.710361 140688196453760 sc2_env.py:507] Starting episode 115: [terran, terran] on Simple64
I0924 03:33:02.723966 140688196453760 sc2_env.py:725] Episode 115 finished after 15216 game steps. Outcome: [-1], reward: [-1], score: [205]


barracks :  0  supply :  0
marines :  0
Episode :  115 | Cumulative Reward :   -1 | Epsilon : 0.832


I0924 03:33:06.064846 140688196453760 sc2_env.py:507] Starting episode 116: [terran, terran] on Simple64
I0924 03:34:12.537184 140688196453760 sc2_env.py:725] Episode 116 finished after 11664 game steps. Outcome: [1], reward: [1], score: [4495]


barracks :  6  supply :  7
marines :  33
Episode :  116 | Cumulative Reward :    1 | Epsilon : 0.831


I0924 03:34:15.852596 140688196453760 sc2_env.py:507] Starting episode 117: [terran, terran] on Simple64
I0924 03:35:01.412628 140688196453760 sc2_env.py:725] Episode 117 finished after 8288 game steps. Outcome: [1], reward: [1], score: [3635]


barracks :  6  supply :  5
marines :  21
Episode :  117 | Cumulative Reward :    1 | Epsilon : 0.830


I0924 03:35:04.742276 140688196453760 sc2_env.py:507] Starting episode 118: [terran, terran] on Simple64
I0924 03:36:47.804324 140688196453760 sc2_env.py:725] Episode 118 finished after 15136 game steps. Outcome: [-1], reward: [-1], score: [410]


barracks :  0  supply :  0
marines :  0
Episode :  118 | Cumulative Reward :   -1 | Epsilon : 0.830


I0924 03:36:51.142196 140688196453760 sc2_env.py:507] Starting episode 119: [terran, terran] on Simple64
I0924 03:38:34.524229 140688196453760 sc2_env.py:725] Episode 119 finished after 15040 game steps. Outcome: [-1], reward: [-1], score: [1045]


barracks :  0  supply :  0
marines :  0
Episode :  119 | Cumulative Reward :   -1 | Epsilon : 0.829


I0924 03:38:37.867689 140688196453760 sc2_env.py:507] Starting episode 120: [terran, terran] on Simple64
I0924 03:40:26.448931 140688196453760 sc2_env.py:725] Episode 120 finished after 15608 game steps. Outcome: [-1], reward: [-1], score: [280]


barracks :  0  supply :  0
marines :  0
Episode :  120 | Cumulative Reward :   -1 | Epsilon : 0.829


I0924 03:40:29.805027 140688196453760 sc2_env.py:507] Starting episode 121: [terran, terran] on Simple64
I0924 03:41:12.919721 140688196453760 sc2_env.py:725] Episode 121 finished after 7912 game steps. Outcome: [1], reward: [1], score: [4065]


barracks :  5  supply :  6
marines :  28
Episode :  121 | Cumulative Reward :    1 | Epsilon : 0.828


I0924 03:41:16.270238 140688196453760 sc2_env.py:507] Starting episode 122: [terran, terran] on Simple64
I0924 03:43:14.281277 140688196453760 sc2_env.py:725] Episode 122 finished after 16224 game steps. Outcome: [-1], reward: [-1], score: [475]


barracks :  0  supply :  0
marines :  0
Episode :  122 | Cumulative Reward :   -1 | Epsilon : 0.827


I0924 03:43:17.651392 140688196453760 sc2_env.py:507] Starting episode 123: [terran, terran] on Simple64
I0924 03:47:08.733099 140688196453760 sc2_env.py:725] Episode 123 finished after 28800 game steps. Outcome: [0], reward: [0], score: [10985]


barracks :  6  supply :  20
marines :  140
Episode :  123 | Cumulative Reward :    0 | Epsilon : 0.827


I0924 03:47:12.126469 140688196453760 sc2_env.py:507] Starting episode 124: [terran, terran] on Simple64
I0924 03:51:08.672590 140688196453760 sc2_env.py:725] Episode 124 finished after 28800 game steps. Outcome: [0], reward: [0], score: [10625]


barracks :  5  supply :  20
marines :  135
Episode :  124 | Cumulative Reward :    0 | Epsilon : 0.826


I0924 03:51:12.041347 140688196453760 sc2_env.py:507] Starting episode 125: [terran, terran] on Simple64
I0924 03:52:51.390224 140688196453760 sc2_env.py:725] Episode 125 finished after 14696 game steps. Outcome: [-1], reward: [-1], score: [470]


barracks :  0  supply :  0
marines :  0
Episode :  125 | Cumulative Reward :   -1 | Epsilon : 0.826


I0924 03:52:54.703331 140688196453760 sc2_env.py:507] Starting episode 126: [terran, terran] on Simple64
I0924 03:53:37.792730 140688196453760 sc2_env.py:725] Episode 126 finished after 8248 game steps. Outcome: [1], reward: [1], score: [3870]


barracks :  4  supply :  5
marines :  29
Episode :  126 | Cumulative Reward :    1 | Epsilon : 0.825


I0924 03:53:41.086209 140688196453760 sc2_env.py:507] Starting episode 127: [terran, terran] on Simple64
I0924 03:55:05.735818 140688196453760 sc2_env.py:725] Episode 127 finished after 13408 game steps. Outcome: [-1], reward: [-1], score: [475]


barracks :  0  supply :  0
marines :  0
Episode :  127 | Cumulative Reward :   -1 | Epsilon : 0.824


I0924 03:55:09.088200 140688196453760 sc2_env.py:507] Starting episode 128: [terran, terran] on Simple64
I0924 03:57:00.879825 140688196453760 sc2_env.py:725] Episode 128 finished after 16872 game steps. Outcome: [-1], reward: [-1], score: [425]


barracks :  0  supply :  0
marines :  0
Episode :  128 | Cumulative Reward :   -1 | Epsilon : 0.824


I0924 03:57:04.225602 140688196453760 sc2_env.py:507] Starting episode 129: [terran, terran] on Simple64
I0924 03:58:24.612501 140688196453760 sc2_env.py:725] Episode 129 finished after 12720 game steps. Outcome: [-1], reward: [-1], score: [80]


barracks :  0  supply :  0
marines :  0
Episode :  129 | Cumulative Reward :   -1 | Epsilon : 0.823


I0924 03:58:27.963028 140688196453760 sc2_env.py:507] Starting episode 130: [terran, terran] on Simple64
I0924 04:00:12.979879 140688196453760 sc2_env.py:725] Episode 130 finished after 15240 game steps. Outcome: [-1], reward: [-1], score: [710]


barracks :  0  supply :  0
marines :  0
Episode :  130 | Cumulative Reward :   -1 | Epsilon : 0.823


I0924 04:00:16.336466 140688196453760 sc2_env.py:507] Starting episode 131: [terran, terran] on Simple64
I0924 04:01:50.657391 140688196453760 sc2_env.py:725] Episode 131 finished after 14448 game steps. Outcome: [-1], reward: [-1], score: [70]


barracks :  0  supply :  0
marines :  0
Episode :  131 | Cumulative Reward :   -1 | Epsilon : 0.822


I0924 04:01:53.992568 140688196453760 sc2_env.py:507] Starting episode 132: [terran, terran] on Simple64
I0924 04:02:40.547684 140688196453760 sc2_env.py:725] Episode 132 finished after 8504 game steps. Outcome: [1], reward: [1], score: [3945]


barracks :  5  supply :  6
marines :  27
Episode :  132 | Cumulative Reward :    1 | Epsilon : 0.821


I0924 04:02:43.883727 140688196453760 sc2_env.py:507] Starting episode 133: [terran, terran] on Simple64
I0924 04:06:10.337698 140688196453760 sc2_env.py:725] Episode 133 finished after 28800 game steps. Outcome: [0], reward: [0], score: [8930]


barracks :  6  supply :  16
marines :  105
Episode :  133 | Cumulative Reward :    0 | Epsilon : 0.821


I0924 04:06:13.702378 140688196453760 sc2_env.py:507] Starting episode 134: [terran, terran] on Simple64
I0924 04:08:13.997447 140688196453760 sc2_env.py:725] Episode 134 finished after 16896 game steps. Outcome: [-1], reward: [-1], score: [60]


barracks :  0  supply :  0
marines :  0
Episode :  134 | Cumulative Reward :   -1 | Epsilon : 0.820


I0924 04:08:17.369215 140688196453760 sc2_env.py:507] Starting episode 135: [terran, terran] on Simple64
I0924 04:09:50.248527 140688196453760 sc2_env.py:725] Episode 135 finished after 14792 game steps. Outcome: [-1], reward: [-1], score: [55]


barracks :  0  supply :  0
marines :  0
Episode :  135 | Cumulative Reward :   -1 | Epsilon : 0.820


I0924 04:09:53.583384 140688196453760 sc2_env.py:507] Starting episode 136: [terran, terran] on Simple64
I0924 04:11:44.387510 140688196453760 sc2_env.py:725] Episode 136 finished after 16416 game steps. Outcome: [-1], reward: [-1], score: [560]


barracks :  0  supply :  0
marines :  0
Episode :  136 | Cumulative Reward :   -1 | Epsilon : 0.819


I0924 04:11:47.733623 140688196453760 sc2_env.py:507] Starting episode 137: [terran, terran] on Simple64
I0924 04:13:33.762863 140688196453760 sc2_env.py:725] Episode 137 finished after 15816 game steps. Outcome: [-1], reward: [-1], score: [165]


barracks :  0  supply :  0
marines :  0
Episode :  137 | Cumulative Reward :   -1 | Epsilon : 0.818


I0924 04:13:37.112507 140688196453760 sc2_env.py:507] Starting episode 138: [terran, terran] on Simple64
I0924 04:14:26.419910 140688196453760 sc2_env.py:725] Episode 138 finished after 8912 game steps. Outcome: [1], reward: [1], score: [4150]


barracks :  6  supply :  6
marines :  29
Episode :  138 | Cumulative Reward :    1 | Epsilon : 0.818


I0924 04:14:29.761777 140688196453760 sc2_env.py:507] Starting episode 139: [terran, terran] on Simple64
I0924 04:16:11.931142 140688196453760 sc2_env.py:725] Episode 139 finished after 14800 game steps. Outcome: [-1], reward: [-1], score: [425]


barracks :  0  supply :  0
marines :  0
Episode :  139 | Cumulative Reward :   -1 | Epsilon : 0.817


I0924 04:16:15.274732 140688196453760 sc2_env.py:507] Starting episode 140: [terran, terran] on Simple64
I0924 04:18:48.978054 140688196453760 sc2_env.py:725] Episode 140 finished after 21152 game steps. Outcome: [-1], reward: [-1], score: [85]


barracks :  0  supply :  0
marines :  0
Episode :  140 | Cumulative Reward :   -1 | Epsilon : 0.817


I0924 04:18:52.299330 140688196453760 sc2_env.py:507] Starting episode 141: [terran, terran] on Simple64
I0924 04:20:34.440270 140688196453760 sc2_env.py:725] Episode 141 finished after 15216 game steps. Outcome: [-1], reward: [-1], score: [300]


barracks :  0  supply :  0
marines :  0
Episode :  141 | Cumulative Reward :   -1 | Epsilon : 0.816


I0924 04:20:37.772223 140688196453760 sc2_env.py:507] Starting episode 142: [terran, terran] on Simple64
I0924 04:21:21.595354 140688196453760 sc2_env.py:725] Episode 142 finished after 8160 game steps. Outcome: [1], reward: [1], score: [4195]


barracks :  5  supply :  7
marines :  30
Episode :  142 | Cumulative Reward :    1 | Epsilon : 0.815


I0924 04:21:24.907436 140688196453760 sc2_env.py:507] Starting episode 143: [terran, terran] on Simple64
I0924 04:22:05.906261 140688196453760 sc2_env.py:725] Episode 143 finished after 7984 game steps. Outcome: [1], reward: [1], score: [3670]


barracks :  5  supply :  6
marines :  24
Episode :  143 | Cumulative Reward :    1 | Epsilon : 0.815


I0924 04:22:09.211409 140688196453760 sc2_env.py:507] Starting episode 144: [terran, terran] on Simple64
I0924 04:23:56.897464 140688196453760 sc2_env.py:725] Episode 144 finished after 15552 game steps. Outcome: [-1], reward: [-1], score: [255]


barracks :  0  supply :  0
marines :  0
Episode :  144 | Cumulative Reward :   -1 | Epsilon : 0.814


I0924 04:24:00.240330 140688196453760 sc2_env.py:507] Starting episode 145: [terran, terran] on Simple64
I0924 04:26:10.293883 140688196453760 sc2_env.py:725] Episode 145 finished after 18360 game steps. Outcome: [-1], reward: [-1], score: [200]


barracks :  0  supply :  0
marines :  0
Episode :  145 | Cumulative Reward :   -1 | Epsilon : 0.814


I0924 04:26:13.622821 140688196453760 sc2_env.py:507] Starting episode 146: [terran, terran] on Simple64
I0924 04:27:54.395874 140688196453760 sc2_env.py:725] Episode 146 finished after 14912 game steps. Outcome: [-1], reward: [-1], score: [435]


barracks :  0  supply :  0
marines :  0
Episode :  146 | Cumulative Reward :   -1 | Epsilon : 0.813


I0924 04:27:57.738999 140688196453760 sc2_env.py:507] Starting episode 147: [terran, terran] on Simple64
I0924 04:29:43.525758 140688196453760 sc2_env.py:725] Episode 147 finished after 15304 game steps. Outcome: [-1], reward: [-1], score: [540]


barracks :  0  supply :  0
marines :  0
Episode :  147 | Cumulative Reward :   -1 | Epsilon : 0.812


I0924 04:29:46.869612 140688196453760 sc2_env.py:507] Starting episode 148: [terran, terran] on Simple64
I0924 04:31:27.312863 140688196453760 sc2_env.py:725] Episode 148 finished after 14888 game steps. Outcome: [-1], reward: [-1], score: [410]


barracks :  0  supply :  0
marines :  0
Episode :  148 | Cumulative Reward :   -1 | Epsilon : 0.812


I0924 04:31:30.680106 140688196453760 sc2_env.py:507] Starting episode 149: [terran, terran] on Simple64
I0924 04:32:12.725861 140688196453760 sc2_env.py:725] Episode 149 finished after 7936 game steps. Outcome: [1], reward: [1], score: [3835]


barracks :  4  supply :  6
marines :  28
Episode :  149 | Cumulative Reward :    1 | Epsilon : 0.811


I0924 04:32:16.059784 140688196453760 sc2_env.py:507] Starting episode 150: [terran, terran] on Simple64
I0924 04:34:06.468902 140688196453760 sc2_env.py:725] Episode 150 finished after 15848 game steps. Outcome: [-1], reward: [-1], score: [250]


barracks :  0  supply :  0
marines :  0
Episode :  150 | Cumulative Reward :   -1 | Epsilon : 0.811


I0924 04:34:09.836013 140688196453760 sc2_env.py:507] Starting episode 151: [terran, terran] on Simple64
I0924 04:36:06.048065 140688196453760 sc2_env.py:725] Episode 151 finished after 17056 game steps. Outcome: [-1], reward: [-1], score: [190]


barracks :  0  supply :  0
marines :  0
Episode :  151 | Cumulative Reward :   -1 | Epsilon : 0.810


I0924 04:36:09.378244 140688196453760 sc2_env.py:507] Starting episode 152: [terran, terran] on Simple64
I0924 04:37:51.603571 140688196453760 sc2_env.py:725] Episode 152 finished after 15128 game steps. Outcome: [-1], reward: [-1], score: [390]


barracks :  0  supply :  0
marines :  0
Episode :  152 | Cumulative Reward :   -1 | Epsilon : 0.809


I0924 04:37:54.968282 140688196453760 sc2_env.py:507] Starting episode 153: [terran, terran] on Simple64
I0924 04:39:37.165592 140688196453760 sc2_env.py:725] Episode 153 finished after 16464 game steps. Outcome: [-1], reward: [-1], score: [650]


barracks :  0  supply :  0
marines :  0
Episode :  153 | Cumulative Reward :   -1 | Epsilon : 0.809


I0924 04:39:40.520416 140688196453760 sc2_env.py:507] Starting episode 154: [terran, terran] on Simple64
I0924 04:41:28.926231 140688196453760 sc2_env.py:725] Episode 154 finished after 15528 game steps. Outcome: [-1], reward: [-1], score: [700]


barracks :  0  supply :  0
marines :  0
Episode :  154 | Cumulative Reward :   -1 | Epsilon : 0.808


I0924 04:41:32.247420 140688196453760 sc2_env.py:507] Starting episode 155: [terran, terran] on Simple64
I0924 04:42:37.284969 140688196453760 sc2_env.py:725] Episode 155 finished after 11488 game steps. Outcome: [1], reward: [1], score: [4595]


barracks :  4  supply :  7
marines :  40
Episode :  155 | Cumulative Reward :    1 | Epsilon : 0.808


I0924 04:42:40.616104 140688196453760 sc2_env.py:507] Starting episode 156: [terran, terran] on Simple64
I0924 04:44:31.174249 140688196453760 sc2_env.py:725] Episode 156 finished after 15976 game steps. Outcome: [-1], reward: [-1], score: [1015]


barracks :  0  supply :  0
marines :  0
Episode :  156 | Cumulative Reward :   -1 | Epsilon : 0.807


I0924 04:44:34.533772 140688196453760 sc2_env.py:507] Starting episode 157: [terran, terran] on Simple64
I0924 04:46:18.432197 140688196453760 sc2_env.py:725] Episode 157 finished after 15064 game steps. Outcome: [-1], reward: [-1], score: [270]


barracks :  0  supply :  0
marines :  0
Episode :  157 | Cumulative Reward :   -1 | Epsilon : 0.806


I0924 04:46:21.789010 140688196453760 sc2_env.py:507] Starting episode 158: [terran, terran] on Simple64
I0924 04:50:36.082527 140688196453760 sc2_env.py:725] Episode 158 finished after 28800 game steps. Outcome: [0], reward: [0], score: [11660]


barracks :  6  supply :  21
marines :  149
Episode :  158 | Cumulative Reward :    0 | Epsilon : 0.806


I0924 04:50:39.463011 140688196453760 sc2_env.py:507] Starting episode 159: [terran, terran] on Simple64
I0924 04:52:21.218118 140688196453760 sc2_env.py:725] Episode 159 finished after 14832 game steps. Outcome: [-1], reward: [-1], score: [155]


barracks :  0  supply :  0
marines :  0
Episode :  159 | Cumulative Reward :   -1 | Epsilon : 0.805


I0924 04:52:24.514138 140688196453760 sc2_env.py:507] Starting episode 160: [terran, terran] on Simple64
I0924 04:54:44.548166 140688196453760 sc2_env.py:725] Episode 160 finished after 18712 game steps. Outcome: [-1], reward: [-1], score: [555]


barracks :  0  supply :  0
marines :  0
Episode :  160 | Cumulative Reward :   -1 | Epsilon : 0.805


I0924 04:54:47.900673 140688196453760 sc2_env.py:507] Starting episode 161: [terran, terran] on Simple64
I0924 04:55:55.092380 140688196453760 sc2_env.py:725] Episode 161 finished after 11128 game steps. Outcome: [-1], reward: [-1], score: [170]


barracks :  0  supply :  0
marines :  0
Episode :  161 | Cumulative Reward :   -1 | Epsilon : 0.804


I0924 04:55:58.414150 140688196453760 sc2_env.py:507] Starting episode 162: [terran, terran] on Simple64
I0924 05:02:57.716341 140688196453760 sc2_env.py:725] Episode 164 finished after 28800 game steps. Outcome: [0], reward: [0], score: [11775]


barracks :  7  supply :  21
marines :  150
Episode :  164 | Cumulative Reward :    0 | Epsilon : 0.802


I0924 05:03:01.095626 140688196453760 sc2_env.py:507] Starting episode 165: [terran, terran] on Simple64
I0924 05:04:02.244759 140688196453760 sc2_env.py:725] Episode 165 finished after 10736 game steps. Outcome: [1], reward: [1], score: [4570]


barracks :  5  supply :  7
marines :  38
Episode :  165 | Cumulative Reward :    1 | Epsilon : 0.802


I0924 05:04:05.590346 140688196453760 sc2_env.py:507] Starting episode 166: [terran, terran] on Simple64
I0924 05:06:17.166558 140688196453760 sc2_env.py:725] Episode 166 finished after 18312 game steps. Outcome: [-1], reward: [-1], score: [470]


barracks :  0  supply :  0
marines :  0
Episode :  166 | Cumulative Reward :   -1 | Epsilon : 0.801


I0924 05:06:20.507662 140688196453760 sc2_env.py:507] Starting episode 167: [terran, terran] on Simple64
I0924 05:07:52.808553 140688196453760 sc2_env.py:725] Episode 167 finished after 14328 game steps. Outcome: [-1], reward: [-1], score: [60]


barracks :  0  supply :  0
marines :  0
Episode :  167 | Cumulative Reward :   -1 | Epsilon : 0.800


I0924 05:07:56.172769 140688196453760 sc2_env.py:507] Starting episode 168: [terran, terran] on Simple64
I0924 05:09:37.665703 140688196453760 sc2_env.py:725] Episode 168 finished after 15296 game steps. Outcome: [-1], reward: [-1], score: [120]


### [Winning rate graph]

In [None]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# SCORE_FILE = 'rlagent_with_vanilla_dqn_score'

In [None]:
with open(SCORE_FILE + '.txt', "rb") as fp:
    scores = pickle.load(fp)

In [None]:
np_scores = np.array(scores)
np_scores

In [None]:
# plot the scores
fig = plt.figure()
ax = fig.add_subplot(111)
plt.plot(np.arange(len(np_scores)), np_scores.T[0], color='r', label='win rate')
plt.plot(np.arange(len(np_scores)), np_scores.T[1], color='g', label='tie rate')
plt.plot(np.arange(len(np_scores)), np_scores.T[2], color='b', label='lose rate')
plt.ylabel('Score %')
plt.xlabel('Episode #')
plt.legend(loc='best')
plt.show()

In [None]:
print device