Skip to content

Commit

Permalink
Game rule randomization
Browse files Browse the repository at this point in the history
  • Loading branch information
cswinter committed May 7, 2020
1 parent bbcc61f commit 327e30a
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 10 deletions.
23 changes: 19 additions & 4 deletions codecraft.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,18 @@

import numpy as np

from gym_codecraft.envs.codecraft_vec_env import Rules


RETRIES = 100


def create_game(game_length: int = None, action_delay: int = 0, self_play: bool = False, custom_map=None, strong_scripted_opponent=False) -> int:
def create_game(game_length: int = None,
action_delay: int = 0,
self_play: bool = False,
custom_map=None,
strong_scripted_opponent=False,
rules=Rules()) -> int:
if custom_map is None:
custom_map = ''
try:
Expand All @@ -19,7 +26,8 @@ def create_game(game_length: int = None, action_delay: int = 0, self_play: bool
f'?maxTicks={game_length}'
f'&actionDelay={action_delay}'
f'&scriptedOpponent={scripted_opponent}'
f'&idleOpponent={idle_opponent}',
f'&idleOpponent={idle_opponent}'
f'&mothershipDamageMultiplier={rules.mothership_damage_multiplier}',
json=custom_map).json()
else:
response = requests.post(f'http://localhost:9000/start-game?actionDelay={action_delay}').json()
Expand Down Expand Up @@ -93,6 +101,10 @@ def observe_batch(game_ids):
time.sleep(10)


def scalabool(b: bool) -> str:
return 'true' if b else 'false'


def observe_batch_raw(game_ids,
allies,
drones,
Expand All @@ -105,7 +117,8 @@ def observe_batch_raw(game_ids,
map_size=False,
last_seen=False,
is_visible=False,
abstime=False):
abstime=False,
rule_msdm=False):
retries = RETRIES
ebcstr = ''
if len(extra_build_costs) > 0:
Expand All @@ -122,12 +135,14 @@ def observe_batch_raw(game_ids,
f'isVisible={"true" if is_visible else "false"}&' \
f'abstime={"true" if abstime else "false"}&' \
f'mapSize={"true" if map_size else "false"}&' \
f'v2={"true" if v2 else "false"}' + ebcstr
f'v2={"true" if v2 else "false"}&' \
f'ruleMsdm={scalabool(rule_msdm)}' + ebcstr
while retries > 0:
try:
response = requests.get(url,
json=game_ids,
stream=True)
response.raise_for_status()
response_bytes = response.content
return np.frombuffer(response_bytes, dtype=np.float32)
except requests.exceptions.ConnectionError as e:
Expand Down
41 changes: 36 additions & 5 deletions gym_codecraft/envs/codecraft_vec_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,16 @@ class ObsConfig:
feat_is_visible: bool = False
feat_abstime: bool = False
v2: bool = False
feat_rule_msdm: bool = False

def global_features(self):
gf = 2
if self.feat_map_size:
gf += 2
if self.feat_abstime:
gf += 2
if self.feat_rule_msdm:
gf += 1
return gf

def dstride(self):
Expand Down Expand Up @@ -81,6 +84,11 @@ def endallenemies(self):
DEFAULT_OBS_CONFIG = ObsConfig(allies=2, drones=4, minerals=2, tiles=0, global_drones=4)


@dataclass(frozen=True)
class Rules:
mothership_damage_multiplier: float = 1.0


def drone_dict(x, y,
storage_modules=0,
missile_batteries=0,
Expand Down Expand Up @@ -109,6 +117,12 @@ def random_drone():
return drone


def random_rules(randomness: float):
return Rules(
mothership_damage_multiplier=np.random.uniform(1.0, randomness * 10.0),
)


def map_arena_tiny(randomize: bool, hardness: int):
storage_modules = 1
constructors = 1
Expand Down Expand Up @@ -529,7 +543,9 @@ def __init__(self,
attac=0.0,
protec=0.0,
max_army_size_score=999999,
max_enemy_army_size_score=999999):
max_enemy_army_size_score=999999,
rule_rng_fraction=0.0,
rule_rng_amount=0.0):
assert(num_envs >= 2 * num_self_play)
self.num_envs = num_envs
self.objective = objective
Expand All @@ -553,6 +569,8 @@ def __init__(self,
self.protec = protec
self.max_army_size_score = max_army_size_score
self.max_enemy_army_size_score = max_enemy_army_size_score
self.rule_rng_fraction = rule_rng_fraction
self.rule_rng_amount = rule_rng_amount
if objective == Objective.ARENA_TINY:
self.game_length = 1 * 60 * 60
self.custom_map = map_arena_tiny
Expand Down Expand Up @@ -608,6 +626,12 @@ def __init__(self,
self.score = []
self.performed_builds = []

def rules(self) -> Rules:
if np.random.uniform(0, 1) < self.rule_rng_fraction:
return random_rules(self.rule_rng_amount)
else:
return Rules()

def reset(self, partitioned_obs_config=None):
if partitioned_obs_config:
return list(self._reset(partitioned_obs_config))
Expand All @@ -628,7 +652,8 @@ def _reset(self, partitioned_obs_config=None):
self.action_delay,
self_play,
self.next_map(),
self.strong_scripted_opponent)
self.strong_scripted_opponent,
self.rules())
self.game_count += 1

self.games.append((game_id, 0))
Expand Down Expand Up @@ -716,11 +741,13 @@ def observe(self, env_subset=None, obs_config=None):
map_size=obs_config.feat_map_size,
last_seen=obs_config.feat_last_seen,
is_visible=obs_config.feat_is_visible,
abstime=obs_config.feat_abstime)
abstime=obs_config.feat_abstime,
rule_msdm=obs_config.feat_rule_msdm)
stride = obs_config.stride()
for i in range(num_envs):
game = env_subset[i] if env_subset else i
winner = obs[stride * num_envs + i * obs_config.nonobs_features()]
elimination_win = 0
if self.objective.vs():
allied_score = obs[stride * num_envs + i * obs_config.nonobs_features() + 1]
allied_score = min(allied_score, self.max_army_size_score)
Expand All @@ -731,6 +758,7 @@ def observe(self, env_subset=None, obs_config=None):
score = 2 * allied_score / (allied_score + enemy_score + 1e-8) - 1
if winner > 0 and enemy_score == 0:
score += self.win_bonus
elimination_win = 1
if self.attac > 0:
score -= self.attac * min_enemy_ms_health
if self.protec > 0:
Expand Down Expand Up @@ -772,14 +800,16 @@ def observe(self, env_subset=None, obs_config=None):
self.action_delay,
self_play,
m,
self.strong_scripted_opponent)
self.strong_scripted_opponent,
self.rules())
self.mp_game_count += 1
else:
game_id = codecraft.create_game(self.game_length,
self.action_delay,
self_play,
self.next_map(),
self.strong_scripted_opponent)
self.strong_scripted_opponent,
self.rules())
self.game_count += 1
else:
game_id = self.games[game - 1][0]
Expand All @@ -797,6 +827,7 @@ def observe(self, env_subset=None, obs_config=None):
'l': self.eplen[game],
'index': game,
'score': self.score[game],
'elimination': elimination_win,
}})
self.eplen[game] = 1
self.eprew[game] = 0
Expand Down
2 changes: 2 additions & 0 deletions hyper_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ def __init__(self):
self.task_randomize = True
self.symmetric_map = False
self.mix_mp = 0.0 # Fraction of maps that use MICRO_PRACTICE instead of the main objective
self.rule_rng_fraction = 0.0 # Fraction of maps that use randomize ruleset
self.rule_rng_amount = 1.0 # Amount of rule randomization


@staticmethod
Expand Down
13 changes: 12 additions & 1 deletion main.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def train(hps: HyperParams, out_dir: str) -> None:
feat_map_size=hps.feat_map_size,
feat_abstime=hps.feat_abstime,
v2=True,
feat_rule_msdm=hps.rule_rng_fraction > 0,
)
if torch.cuda.is_available():
device = torch.device("cuda:0")
Expand Down Expand Up @@ -120,6 +121,7 @@ def train(hps: HyperParams, out_dir: str) -> None:
epoch = 0
eprewmean = 0
eplenmean = 0
eliminationmean = 0
completed_episodes = 0
env = None
num_self_play_schedule = hps.get_num_self_play_schedule()
Expand Down Expand Up @@ -161,7 +163,9 @@ def train(hps: HyperParams, out_dir: str) -> None:
attac=hps.attac,
protec=hps.protec,
max_army_size_score=hps.max_army_size_score,
max_enemy_army_size_score=hps.max_enemy_army_size_score)
max_enemy_army_size_score=hps.max_enemy_army_size_score,
rule_rng_fraction=hps.rule_rng_fraction,
rule_rng_amount=hps.rule_rng_amount)
obs, action_masks, privileged_obs = env.reset()

if total_steps >= next_eval and hps.eval_envs > 0:
Expand Down Expand Up @@ -221,6 +225,7 @@ def train(hps: HyperParams, out_dir: str) -> None:
ema = min(95, completed_episodes * 10) / 100.0
eprewmean = eprewmean * ema + (1 - ema) * info['episode']['r']
eplenmean = eplenmean * ema + (1 - ema) * info['episode']['l']
eliminationmean = eliminationmean * ema + (1 - ema) * info['episode']['elimination']
completed_episodes += 1

obs_tensor = torch.tensor(obs).to(device)
Expand Down Expand Up @@ -332,6 +337,7 @@ def train(hps: HyperParams, out_dir: str) -> None:
'throughput': throughput,
'eprewmean': eprewmean,
'eplenmean': eplenmean,
'eliminationmean': eliminationmean,
'entropy': sum(entropies) / len(entropies) / np.log(2),
'explained variance': explained_var,
'gradnorm': gradnorm * hps.bs / hps.rosteps,
Expand Down Expand Up @@ -435,6 +441,7 @@ def eval(policy,
strong_scripted_opponent=True)

scores = []
eliminations = []
scores_by_opp = defaultdict(list)
lengths = []
evens = list([2 * i for i in range(num_envs // 2)])
Expand Down Expand Up @@ -489,6 +496,7 @@ def eval(policy,
index = info['episode']['index']
score = info['episode']['score']
length = info['episode']['l']
elimination = info['episode']['elimination']
scores.append(score)
lengths.append(length)
for name, opp in opponents.items():
Expand All @@ -500,12 +508,15 @@ def eval(policy,
print(f'Eval: {np.array(scores).mean()}')

scores = np.array(scores)
eliminations = np.array(eliminations)

if curr_step is not None:
wandb.log({
'eval_mean_score': scores.mean(),
'eval_max_score': scores.max(),
'eval_min_score': scores.min(),
'eval_games': len(scores),
'eval_elimination_rate': eliminations.mean(),
}, step=curr_step)
for opp_name, scores in scores_by_opp.items():
scores = np.array(scores)
Expand Down

0 comments on commit 327e30a

Please sign in to comment.