Skip to content

Commit

Permalink
Arena task, self-play, eval, model saving
Browse files Browse the repository at this point in the history
  • Loading branch information
cswinter committed Oct 13, 2019
1 parent 21011a1 commit 7f5d65a
Show file tree
Hide file tree
Showing 6 changed files with 238 additions and 55 deletions.
25 changes: 16 additions & 9 deletions codecraft.py
Expand Up @@ -8,17 +8,24 @@
RETRIES = 100


def create_game(game_length: int = None, action_delay: int = 0) -> int:
def create_game(game_length: int = None, action_delay: int = 0, self_play: bool = False, custom_map=None) -> int:
if custom_map is None:
custom_map = ''
try:
scripted_opponent = 'false' if self_play else 'true'
if game_length:
response = requests.post(f'http://localhost:9000/start-game?maxTicks={game_length}&actionDelay={action_delay}').json()
response = requests.post(f'http://localhost:9000/start-game'
f'?maxTicks={game_length}'
f'&actionDelay={action_delay}'
f'&scriptedOpponent={scripted_opponent}',
json=custom_map).json()
else:
response = requests.post(f'http://localhost:9000/start-game?actionDelay={action_delay}').json()
return int(response['id'])
except requests.exceptions.ConnectionError:
logging.info(f"Connection error on create_game, retrying")
time.sleep(1)
return create_game()
return create_game(game_length, action_delay, self_play)


def act(game_id: int, action):
Expand All @@ -38,15 +45,15 @@ def act(game_id: int, action):

def act_batch(actions, disable_harvest: bool = False):
payload = {}
for (game_id, move, turn, buildSpec, harvest) in actions:
for (game_id, player_id, move, turn, buildSpec, harvest) in actions:
action = {
"buildDrone": buildSpec,
"move": move,
"harvest": not disable_harvest,#harvest,
"transfer": False,
"turn": turn,
}
payload[game_id] = action
payload[f'{game_id}.{player_id}'] = action

retries = 100
while retries > 0:
Expand All @@ -62,13 +69,13 @@ def act_batch(actions, disable_harvest: bool = False):
time.sleep(1)


def observe(game_id: int):
def observe(game_id: int, player_id: int = 0):
try:
return requests.get(f'http://localhost:9000/observation?gameID={game_id}&playerID=0').json()
return requests.get(f'http://localhost:9000/observation?gameID={game_id}&playerID={player_id}').json()
except requests.exceptions.ConnectionError:
logging.info(f"Connection error on observe({game_id}), retrying")
logging.info(f"Connection error on observe({game_id}.{player_id}), retrying")
time.sleep(1)
return observe(game_id)
return observe(game_id, player_id)


def observe_batch(game_ids):
Expand Down
102 changes: 82 additions & 20 deletions gym_codecraft/envs/codecraft_vec_env.py
Expand Up @@ -8,10 +8,49 @@
import codecraft


def map_arena_tiny():
return {
'mapWidth': 1000,
'mapHeight': 1000,
'player1Drones': [
{
'xPos': np.random.randint(-450, 450),
'yPos': np.random.randint(-450, 450),
'resources': 0,
'storageModules': 1,
'missileBatteries': 0,
'constructors': 1,
'engines': 0,
'shieldGenerators': 0,
}
],
'player2Drones': [
{
'xPos': np.random.randint(-450, 450),
'yPos': np.random.randint(-450, 450),
'resources': 0,
'storageModules': 0,
'missileBatteries': 1,
'constructors': 0,
'engines': 0,
'shieldGenerators': 3,
}
]
}


class CodeCraftVecEnv(VecEnv):
def __init__(self, num_envs, game_length, objective, action_delay):
def __init__(self, num_envs, num_self_play, objective, action_delay, stagger=True):
assert(num_envs >= 2 * num_self_play)
self.objective = objective
self.action_delay = action_delay
self.num_self_play = num_self_play
self.stagger = stagger
self.game_length = 3 * 60 * 60
self.custom_map = lambda: None
if objective == Objective.ARENA_TINY:
self.game_length = 1 * 60 * 60
self.custom_map = map_arena_tiny

observations_low = []
observations_high = []
Expand Down Expand Up @@ -47,25 +86,35 @@ def __init__(self, num_envs, game_length, objective, action_delay):
self.eplen = []
self.eprew = []
self.score = []
self.game_length = game_length

def reset(self):
self.games = []
self.eplen = []
self.score = []
for i in range(self.num_envs):
for i in range(self.num_envs - self.num_self_play):
# spread out initial game lengths to stagger start times
game_id = codecraft.create_game(self.game_length * (i + 1) // self.num_envs, self.action_delay)
self_play = i < self.num_self_play
game_length = self.game_length * (i + 1) // (self.num_envs - self.num_self_play) if self.stagger else self.game_length
game_id = codecraft.create_game(
game_length,
self.action_delay,
self_play,
self.custom_map())
# print("Starting game:", game_id)
self.games.append(game_id)
self.games.append((game_id, 0))
self.eplen.append(1)
self.eprew.append(0)
self.score.append(None)
if self_play:
self.games.append((game_id, 1))
self.eplen.append(1)
self.eprew.append(0)
self.score.append(None)
return self.observe()[0]

def step_async(self, actions):
game_actions = []
for (game_id, action) in zip(self.games, actions):
for ((game_id, player_id), action) in zip(self.games, actions):
# 0-5: turn/movement (4 is no turn, no movement)
# 6: build [0,1,0,0,0] drone (if minerals > 5)
# 7: harvest
Expand All @@ -83,7 +132,7 @@ def step_async(self, actions):
build = [[0, 1, 0, 0, 0]]
if action == 7:
harvest = True
game_actions.append((game_id, move, turn, build, harvest))
game_actions.append((game_id, player_id, move, turn, build, harvest))

codecraft.act_batch(game_actions, disable_harvest=self.objective == Objective.DISTANCE_TO_CRYSTAL)

Expand All @@ -96,14 +145,18 @@ def observe(self):
infos = []
obs = codecraft.observe_batch_raw(self.games)
global_features = 1
nonobs_features = 2
dstride = 7
nonobs_features = 3
dstride = 13
mstride = 4
stride = global_features + dstride + 10 * mstride
stride = global_features + dstride + 10 * mstride + 10 * dstride
for i in range(self.num_envs):
x = obs[stride * i + global_features + 0]
y = obs[stride * i + global_features + 1]
if self.objective == Objective.ALLIED_WEALTH:
if self.objective == Objective.ARENA_TINY:
allied_score = obs[stride * self.num_envs + i * nonobs_features + 1]
enemy_score = obs[stride * self.num_envs + i * nonobs_features + 2]
score = 2 * allied_score / (allied_score + enemy_score + 1e-8)
elif self.objective == Objective.ALLIED_WEALTH:
score = obs[stride * self.num_envs + i * nonobs_features + 1] * 0.1
elif self.objective == Objective.DISTANCE_TO_ORIGIN:
score = -dist(x, y, 0.0, 0.0)
Expand Down Expand Up @@ -131,16 +184,24 @@ def observe(self):
self.score[i] = score

if obs[stride * self.num_envs + i * nonobs_features] > 0:
game_id = codecraft.create_game(self.game_length, self.action_delay)
self.games[i] = game_id
observation = codecraft.observe(game_id)
(game_id, pid) = self.games[i]
if pid == 0:
self_play = i // 2 < self.num_self_play
game_id = codecraft.create_game(self.game_length,
self.action_delay,
self_play,
self.custom_map())
self.games[i] = (game_id, 0)
if self_play:
self.games[i + 1] = (game_id, 1)
observation = codecraft.observe(game_id, pid)
# TODO
# obs[stride * i:stride * (i + 1)] = codecraft.observation_to_np(observation)

dones.append(1.0)
infos.append({'episode': {'r': self.eprew[i], 'l': self.eplen[i]}})
infos.append({'episode': {'r': self.eprew[i], 'l': self.eplen[i], 'index': i}})
self.eplen[i] = 1
self.eprew[i] = reward
self.eprew[i] = 0
self.score[i] = None
else:
self.eplen[i] += 1
Expand All @@ -161,13 +222,13 @@ def close(self):
while running > 0:
game_actions = []
active_games = []
for game_id in self.games:
for (game_id, player_id) in self.games:
if not done[game_id]:
active_games.append(game_id)
game_actions.append((game_id, False, 0, [], False))
active_games.append((game_id, player_id))
game_actions.append((game_id, player_id, False, 0, [], False))
codecraft.act_batch(game_actions)
obs = codecraft.observe_batch(active_games)
for o, game_id in zip(obs, active_games):
for o, (game_id, _) in zip(obs, active_games):
if o['winner']:
done[game_id] = True
running -= 1
Expand All @@ -178,6 +239,7 @@ class Objective(Enum):
DISTANCE_TO_CRYSTAL = 'DISTANCE_TO_CRYSTAL'
DISTANCE_TO_ORIGIN = 'DISTANCE_TO_ORIGIN'
DISTANCE_TO_1000_500 = 'DISTANCE_TO_1000_500'
ARENA_TINY = 'ARENA_TINY'


def dist2(x1, y1, x2, y2):
Expand Down
13 changes: 10 additions & 3 deletions hyper_params.py
Expand Up @@ -23,10 +23,16 @@ def __init__(self):
self.zero_init_vf = True # Set all initial weights for value function head to zero
self.small_init_pi = False # Set initial weights for policy head to small values and biases to zero

# Eval
self.eval_envs = 64
self.eval_timesteps = 360
self.eval_frequency = 1e5

# RL
self.steps = 10e6 # Total number of timesteps
self.num_envs = 64 # Number of environments
self.num_self_play = 32 # Number of self-play environments (each provides two environments)
self.seq_rosteps = 256 # Number of sequential steps per rollout
self.rosteps = 256 * 64 # Number of total rollout steps
self.gamma = 0.99 # Discount factor
self.lamb = 0.95 # Generalized advantage estimation parameter lambda
self.norm_advs = True # Normalize advantage values
Expand All @@ -35,9 +41,10 @@ def __init__(self):
self.cliprange = 0.2 # PPO cliprange
self.clip_vf = False # Use clipped value function objective

self.rosteps = self.num_envs * self.seq_rosteps

# Task
self.objective = envs.Objective.ALLIED_WEALTH
self.game_length = 3 * 60 * 60
self.objective = envs.Objective.ARENA_TINY
self.action_delay = 0

def args_parser(self) -> argparse.ArgumentParser:
Expand Down

0 comments on commit 7f5d65a

Please sign in to comment.