Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Automatically adjust resource costs
  • Loading branch information
cswinter committed May 13, 2020
1 parent cdbde3c commit 91c4e97
Show file tree
Hide file tree
Showing 5 changed files with 165 additions and 40 deletions.
104 changes: 104 additions & 0 deletions adr.py
@@ -0,0 +1,104 @@
from collections import defaultdict
from gym_codecraft.envs.codecraft_vec_env import Rules


class ADR:
def __init__(self, stepsize=0.05):
self.ruleset = Rules()
self.ruleset.cost_modifier_storage = 0.7
self.ruleset.cost_modifier_constructor = 0.5
self.ruleset.cost_modifier_engines = 0.5
self.ruleset.cost_modifier_size[2] = 0.8
self.ruleset.cost_modifier_size[3] = 0.6
self.target_fractions = normalize({
'1m': 10.0,
'1s': 5.0,
'1m1p': 4.0,
'2m': 2.0,
'1s1c': 2.0,
'2m1e1p': 2.0,
'3m1p': 2.0,
'2m2p': 2.0,
'2s2c': 2.0,
'2s1c1e': 2.0,
'2s1m1c': 2.0,
})
self.stepsize = stepsize

def step(self, counts):
gradient = defaultdict(lambda: 0.0)
for build, bfraction in normalize(counts).items():
loss = self.target_fractions[build] - bfraction
for module, mfraction in module_norm(build).items():
gradient[module] += mfraction * loss * self.stepsize
gradient[f'size{size(build)}'] += loss * self.stepsize
for key, grad in gradient.items():
if key == 'm':
self.ruleset.cost_modifier_missiles -= grad
if key == 's':
self.ruleset.cost_modifier_storage -= grad
if key == 'p':
self.ruleset.cost_modifier_shields -= grad
if key == 'c':
self.ruleset.cost_modifier_constructor -= grad
if key == 'e':
self.ruleset.cost_modifier_engines -= grad
if key == 'size1':
self.ruleset.cost_modifier_size[0] -= grad
if key == 'size2':
self.ruleset.cost_modifier_size[1] -= grad
if key == 'size3':
self.ruleset.cost_modifier_size[2] -= grad
if key == 'size4':
self.ruleset.cost_modifier_size[3] -= grad

high = max(
self.ruleset.cost_modifier_storage,
self.ruleset.cost_modifier_engines,
self.ruleset.cost_modifier_shields,
self.ruleset.cost_modifier_missiles,
self.ruleset.cost_modifier_constructor,
*self.ruleset.cost_modifier_size
)

def clip(val):
return max(0.5, val / high)
self.ruleset.cost_modifier_storage = clip(self.ruleset.cost_modifier_storage)
self.ruleset.cost_modifier_engines = clip(self.ruleset.cost_modifier_engines)
self.ruleset.cost_modifier_shields = clip(self.ruleset.cost_modifier_shields)
self.ruleset.cost_modifier_missiles = clip(self.ruleset.cost_modifier_missiles)
self.ruleset.cost_modifier_constructor = clip(self.ruleset.cost_modifier_constructor)
for i in range(4):
self.ruleset.cost_modifier_size[i] = clip(self.ruleset.cost_modifier_size[i])

def metrics(self):
return {
'adr_missile_cost': self.ruleset.cost_modifier_missiles,
'adr_storage_cost': self.ruleset.cost_modifier_storage,
'adr_constructor_cost': self.ruleset.cost_modifier_constructor,
'adr_engine_cost': self.ruleset.cost_modifier_engines,
'adr_shield_cost': self.ruleset.cost_modifier_shields,
'adr_size1_cost': self.ruleset.cost_modifier_size[0],
'adr_size2_cost': self.ruleset.cost_modifier_size[1],
'adr_size4_cost': self.ruleset.cost_modifier_size[3],
}


def size(build):
modules = 0
for module in [build[i:i+2] for i in range(0, len(build), 2)]:
modules += int(module[:1])
return modules


def module_norm(build):
weights = defaultdict(lambda: 0.0)
for module in [build[i:i+2] for i in range(0, len(build), 2)]:
weights[module[1:]] = float(module[:1])
return normalize(weights)


def normalize(weights):
total = sum(weights.values())
return {key: weight / total for key, weight in weights.items()}

30 changes: 15 additions & 15 deletions codecraft.py
Expand Up @@ -114,21 +114,21 @@ def scalabool(b: bool) -> str:
return 'true' if b else 'false'


def observe_batch_raw(game_ids,
allies,
drones,
minerals,
global_drones,
tiles,
relative_positions,
v2,
extra_build_costs = [],
map_size=False,
last_seen=False,
is_visible=False,
abstime=False,
rule_msdm=False,
rule_costs=False):
def observe_batch_raw(game_ids: object,
allies: object,
drones: object,
minerals: object,
global_drones: object,
tiles: object,
relative_positions: object,
v2: object,
extra_build_costs: object = [],
map_size: object = False,
last_seen: object = False,
is_visible: object = False,
abstime: object = False,
rule_msdm: object = False,
rule_costs: object = False) -> object:
retries = RETRIES
ebcstr = ''
if len(extra_build_costs) > 0:
Expand Down
52 changes: 32 additions & 20 deletions gym_codecraft/envs/codecraft_vec_env.py
Expand Up @@ -2,8 +2,8 @@
import math

from enum import Enum
from dataclasses import dataclass
from typing import Tuple
from dataclasses import dataclass, field
from typing import List
import numpy as np

import codecraft
Expand Down Expand Up @@ -88,10 +88,10 @@ def endallenemies(self):
DEFAULT_OBS_CONFIG = ObsConfig(allies=2, drones=4, minerals=2, tiles=0, global_drones=4)


@dataclass(frozen=True)
@dataclass
class Rules:
mothership_damage_multiplier: float = 1.0
cost_modifier_size: Tuple[float, float, float, float] = (1.0, 1.0, 1.0, 1.0)
cost_modifier_size: List[float] = field(default_factory=lambda: [1.0, 1.0, 1.0, 1.0])
cost_modifier_missiles: float = 1.0
cost_modifier_shields: float = 1.0
cost_modifier_storage: float = 1.0
Expand Down Expand Up @@ -127,21 +127,32 @@ def random_drone():
return drone


def random_rules(rnd_msdm: float, rnd_cost: float) -> Rules:
return Rules(
mothership_damage_multiplier=2 ** np.random.uniform(0.0, 4.0 * rnd_msdm),
cost_modifier_size=(
1.0,
1.0 - np.random.uniform(0.0, 0.15 * rnd_cost),
1.0 - np.random.uniform(0.0, 0.15 * rnd_cost),
1.0 - np.random.uniform(0.0, 0.4 * rnd_cost),
),
cost_modifier_constructor=1.0-np.random.uniform(0.0, 0.4 * rnd_cost),
cost_modifier_missiles=1.0-np.random.uniform(0.0, 0.1 * rnd_cost),
cost_modifier_shields=1.0-np.random.uniform(0.0, 0.1 * rnd_cost),
cost_modifier_storage=1.0-np.random.uniform(0.0, 0.4 * rnd_cost),
cost_modifier_engines=1.0-np.random.uniform(0.0, 0.3 * rnd_cost),
)
def random_rules(rnd_msdm: float, rnd_cost: float, targets: Rules) -> Rules:
if targets is not None:
return Rules(
mothership_damage_multiplier=2 ** np.random.uniform(0.0, 4.0 * rnd_msdm),
cost_modifier_size=[np.random.uniform(low, 1.0) for low in targets.cost_modifier_size],
cost_modifier_constructor=np.random.uniform(targets.cost_modifier_constructor, 1.0),
cost_modifier_missiles=np.random.uniform(targets.cost_modifier_missiles, 1.0),
cost_modifier_shields=np.random.uniform(targets.cost_modifier_shields, 1.0),
cost_modifier_engines=np.random.uniform(targets.cost_modifier_engines, 1.0),
cost_modifier_storage=np.random.uniform(targets.cost_modifier_storage, 1.0),
)
else:
return Rules(
mothership_damage_multiplier=2 ** np.random.uniform(0.0, 4.0 * rnd_msdm),
cost_modifier_size=[
1.0,
1.0 - np.random.uniform(0.0, 0.15 * rnd_cost),
1.0 - np.random.uniform(0.0, 0.15 * rnd_cost),
1.0 - np.random.uniform(0.0, 0.4 * rnd_cost),
],
cost_modifier_constructor=1.0-np.random.uniform(0.0, 0.4 * rnd_cost),
cost_modifier_missiles=1.0-np.random.uniform(0.0, 0.1 * rnd_cost),
cost_modifier_shields=1.0-np.random.uniform(0.0, 0.1 * rnd_cost),
cost_modifier_storage=1.0-np.random.uniform(0.0, 0.4 * rnd_cost),
cost_modifier_engines=1.0-np.random.uniform(0.0, 0.3 * rnd_cost),
)


def map_arena_tiny(randomize: bool, hardness: int):
Expand Down Expand Up @@ -633,6 +644,7 @@ def __init__(self,
self.rule_rng_fraction = rule_rng_fraction
self.rule_rng_amount = rule_rng_amount
self.rule_cost_rng = rule_cost_rng
self.rng_ruleset = None
if objective == Objective.ARENA_TINY:
self.game_length = 1 * 60 * 60
self.custom_map = map_arena_tiny
Expand Down Expand Up @@ -705,7 +717,7 @@ def __init__(self,

def rules(self) -> Rules:
if np.random.uniform(0, 1) < self.rule_rng_fraction:
return random_rules(self.rule_rng_amount, self.rule_cost_rng)
return random_rules(self.rule_rng_amount, self.rule_cost_rng, self.rng_ruleset)
else:
return Rules()

Expand Down
1 change: 1 addition & 0 deletions hyper_params.py
Expand Up @@ -104,6 +104,7 @@ def __init__(self):
self.rule_rng_fraction = 0.0 # Fraction of maps that use randomize ruleset
self.rule_rng_amount = 1.0 # Amount of rule randomization
self.rule_cost_rng = 0.0
self.adr = False # Automatically adjust environment rules


@staticmethod
Expand Down
18 changes: 13 additions & 5 deletions main.py
Expand Up @@ -11,6 +11,7 @@

import wandb

from adr import ADR
from gym_codecraft import envs
from gym_codecraft.envs.codecraft_vec_env import ObsConfig
from hyper_params import HyperParams
Expand Down Expand Up @@ -72,8 +73,8 @@ def train(hps: HyperParams, out_dir: str) -> None:
feat_map_size=hps.feat_map_size,
feat_abstime=hps.feat_abstime,
v2=True,
feat_rule_msdm=hps.rule_rng_fraction > 0,
feat_rule_costs=hps.rule_cost_rng > 0,
feat_rule_msdm=hps.rule_rng_fraction > 0 or hps.adr,
feat_rule_costs=hps.rule_cost_rng > 0 or hps.adr,
)
if torch.cuda.is_available():
device = torch.device("cuda:0")
Expand Down Expand Up @@ -129,7 +130,7 @@ def train(hps: HyperParams, out_dir: str) -> None:
num_self_play_schedule = hps.get_num_self_play_schedule()
batches_per_update_schedule = hps.get_batches_per_update_schedule()
entropy_bonus_schedule = hps.get_entropy_bonus_schedule()

adr = ADR()
rewmean = 0.0
rewstd = 1.0
while total_steps < hps.steps + resume_steps:
Expand Down Expand Up @@ -198,6 +199,8 @@ def train(hps: HyperParams, out_dir: str) -> None:
all_privileged_obs = []

policy.eval()
if hps.adr:
env.rng_ruleset = adr.ruleset
with torch.no_grad():
# Rollout
for step in range(hps.seq_rosteps):
Expand Down Expand Up @@ -229,10 +232,14 @@ def train(hps: HyperParams, out_dir: str) -> None:
eprewmean = eprewmean * ema + (1 - ema) * info['episode']['r']
eplenmean = eplenmean * ema + (1 - ema) * info['episode']['l']
eliminationmean = eliminationmean * ema + (1 - ema) * info['episode']['elimination']
for action, count in info['episode']['builds'].items():
buildmean[action] = buildmean[action] * ema + (1 - ema) * count
builds = info['episode']['builds']
for build in set().union(builds.keys(), buildmean.keys()):
count = builds[build]
buildmean[build] = buildmean[build] * ema + (1 - ema) * count
completed_episodes += 1

adr.step(buildmean)

obs_tensor = torch.tensor(obs).to(device)
action_masks_tensor = torch.tensor(action_masks).to(device)
privileged_obs_tensor = torch.tensor(privileged_obs).to(device)
Expand Down Expand Up @@ -362,6 +369,7 @@ def train(hps: HyperParams, out_dir: str) -> None:
}
for action, count in buildmean.items():
metrics[f'build_{action}'] = count
metrics.update(adr.metrics())
total_norm = 0.0
count = 0
for name, param in policy.named_parameters():
Expand Down

0 comments on commit 91c4e97

Please sign in to comment.