Init
--------------------------

In [None]:
from datatable import make_table_macro
from dataprocess import squad_dict2list_batch
from model import NetDNN, ODNet, FunctionConcat, BattleNet
import trainer

In [None]:
import pandas as pd
import numpy as np

In [None]:
from tqdm.notebook import tqdm

In [None]:
import matplotlib.pyplot as plt

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

Variables
--------------------------

In [None]:
p_unit_columns = [4, 73, 74, 75, 76, 77, 83, 141, 311]
z_unit_columns = [9, 105, 107, 109, 110, 126, 688]
pz_unit_columns = p_unit_columns + z_unit_columns

In [None]:
maps = ["Plain", "PlainSlow", "BushOne", "BushTwo", "Corridor3", "Ramp3"]
unittypes = ["pp", "zz", "zp"]
columns = pz_unit_columns

In [None]:
column_index_p1 = column_index_p2 = columns

In [None]:
target_device = torch.device("cuda")
target_dtype = torch.float32
pinning = False


Dataset
-----------------------------------------

In [None]:
oo = make_table_macro(r"E:/output0421/output", maps, unittypes, pickleprefix="pcooa_")

In [None]:
ooz = make_table_macro(r"E:/output0421/output", maps, ["zz"], pickleprefix="pcooa_")

In [None]:
oop = make_table_macro(r"E:/output0421/output", maps, ["pp"], pickleprefix="pcooa_")

In [None]:
table_index = lambda map_index, unittype_index: unittype_index*len(maps)+map_index
oo[table_index(3,2)] # BushTwo, zerg vs protoss

Preprocessing
--------------------------

In [None]:
class MinimalDS(Dataset):
    
    def __init__(self, x, y):
        assert(len(x)==len(y))
        self.x = x
        self.y = y
        self.len = len(x)
        
    def __getitem__(self, i):
        return self.x[i], self.y[i]
        
    def __len__(self):
        return self.len

In [None]:
def table_to_ssty(tablelet, column_index_p1, column_index_p2):
    set_p1 = squad_dict2list_batch(tablelet[("setup", "squad_p1")].values, column_index_p1)
    set_p2 = squad_dict2list_batch(tablelet[("setup", "squad_p2")].values, column_index_p2)
    set_battlefield = tablelet[("setup", "battlefield")].values
    #num = np.unique(set_battlefield, axis=0)
    #num = num.shape[0]
    num = len(maps)
    set_battlefield_onehot = np.eye(num)[set_battlefield]
    set_y = tablelet[("statistics", "winrates")].values
    return set_p1, set_p2, set_battlefield_onehot, set_y

In [None]:
def ssty_to_dataset(set_p1, set_p2, set_battlefield_onehot, set_y):
    set_x = np.concatenate([set_p1,set_p2, set_battlefield_onehot], axis=1)
    tensorx = torch.tensor(set_x, device=target_device, dtype=target_dtype)
    tensory = torch.tensor(set_y, device=target_device, dtype=target_dtype)
    ds = MinimalDS(tensorx, tensory)
    return ds

def table_to_dataset(tablelet, column_index_p1, column_index_p2):
    return ssty_to_dataset(*table_to_ssty(tablelet, column_index_p1, column_index_p2))

Training & Testing
-------------------------------

In [None]:
dnn1gen = lambda: NetDNN(len(column_index_p1)+ len(column_index_p2) + len(maps), 48, 48, 48, 24, 1).to(target_device, target_dtype)
dnnws1gen = lambda: BattleNet(NetDNN(len(column_index_p1), 24, 24), len(column_index_p1), len(maps), 48 + len(maps), 24, 24, 1).to(target_device, target_dtype)

In [None]:
def train(net, criterion, optimizer, traindl, testdl, valdl=None, *, iterations=None):
    print(net)
    bestloss = np.inf
    bestnet = None
    bestepoch = 0
    if iterations is None:
        iterations = 50
    for i in tqdm(range(iterations), desc="epoch"):
        trainer.train_1epoch(traindl, net, criterion, optimizer)
        if valdl is not None:
            loss = trainer.test_1epoch(valdl, net, criterion)
            if bestloss > loss:
                bestloss = loss
                bestnet = net.state_dict()
                bestepoch = i
    if valdl is not None:
        print("best epoch is {} with loss {}".format(bestepoch, bestloss))
    return bestnet

In [None]:
def test(dl, net):
    acc = trainer.test_1epoch_2args(dl, net, trainer.count_correct_predictions_2args)
    loss = trainer.test_1epoch(dl, net, nn.BCEWithLogitsLoss())
    return acc, loss

In [None]:
def ring_indexing(table, i, j, offset=0):
    i, j = (i+offset)%2000, (j+offset)%2000
    return table[i: j] if i < j else pd.concat((table[:j], table[i:]), axis=0)

In [None]:
def big_loop(cv_i, netgens):
    offset = cv_i * 200
    # 50, 100, 200, ..., 1600 dataset
    traintables = [pd.concat([ring_indexing(table,0,25 * (2**i), offset) for table in oo], axis=0) for i in range(7)]
    valtable = pd.concat([ring_indexing(table,1600,1800, offset) for table in oo], axis=0)
    testtable = pd.concat([ring_indexing(table,1800,2000, offset) for table in oo], axis=0)
    column_index_p1 = column_index_p2 = columns

    traindss = [table_to_dataset(traintables[i], column_index_p1, column_index_p2) for i in range(7)]
    valds = table_to_dataset(valtable, column_index_p1, column_index_p2)
    testds = table_to_dataset(testtable, column_index_p1, column_index_p2)
    
    traindls = [DataLoader(traindss[i], batch_size=25 * (2**i), shuffle=True, pin_memory=pinning) for i in range(7)]
    valdl = DataLoader(valds, batch_size=200, pin_memory=pinning)
    testdl = DataLoader(testds, batch_size=200, pin_memory=pinning)
    
    # generating dataset for each battlefields
    testtables_battlefield = [pd.concat([ring_indexing(oo[i*len(maps)+j], 1800,2000, offset) for i in range(len(unittypes))], axis=0) for j in range(0, len(maps))]
    testds_battlefield = [table_to_dataset(testtable_, column_index_p1, column_index_p2) for testtable_ in testtables_battlefield]
    testdl_battlefield = [DataLoader(ds, batch_size=200) for ds in testds_battlefield]
    
    outputacc = []
    outputloss = []
    for i, netgen in enumerate(tqdm(netgens, desc="net")):
        for j in tqdm(range(7), desc="#data"):
            net = netgen()
            accs = []
            losses = []
            criterion = nn.BCEWithLogitsLoss()
            optimizer = optim.Adam(net.parameters(), lr=0.001)
            traindl = traindls[j]
            bestweight = train(net, criterion, optimizer, traindl, testdl, valdl, iterations=100)
            net.load_state_dict(bestweight)
            for dl in [traindl, valdl, testdl, *testdl_battlefield]:
                acc, loss = test(dl, net)
                accs.append(acc)
                losses.append(loss)
            outputacc.append((i, j, *accs))
            outputloss.append((i, j, *losses))
    return outputacc, outputloss

In [None]:
def big_loop_plain(cv_i, netgens):
    # train only in plain, test in all battlefields.
    offset = cv_i * 200
    # 50, 100, 200, ..., 1600 dataset
    # pick only plain. plain is idx 0
    traintables = [pd.concat([ring_indexing(oo[u*len(maps) + 0],0,25 * (2**i), offset) for u in range(len(unittypes))], axis=0) for i in range(7)]
    valtable = pd.concat([ring_indexing(oo[u*len(maps) + 0],1600,1800, offset) for u in range(len(unittypes))], axis=0)
    testtable = pd.concat([ring_indexing(oo[u*len(maps) + 0],1800,2000, offset) for u in range(len(unittypes))], axis=0)
    column_index_p1 = column_index_p2 = columns

    traindss = [table_to_dataset(traintables[i], column_index_p1, column_index_p2) for i in range(7)]
    valds = table_to_dataset(valtable, column_index_p1, column_index_p2)
    testds = table_to_dataset(testtable, column_index_p1, column_index_p2)
    
    traindls = [DataLoader(traindss[i], batch_size=25 * (2**i), shuffle=True, pin_memory=pinning) for i in range(7)]
    valdl = DataLoader(valds, batch_size=200, pin_memory=pinning)
    testdl = DataLoader(testds, batch_size=200, pin_memory=pinning)
    
    # generating dataset for each battlefields
    testtables_battlefield = [pd.concat([ring_indexing(oo[i*len(maps)+j], 1800,2000, offset) for i in range(len(unittypes))], axis=0) for j in range(0, len(maps))]
    testds_battlefield = [table_to_dataset(testtable_, column_index_p1, column_index_p2) for testtable_ in testtables_battlefield]
    testdl_battlefield = [DataLoader(ds, batch_size=200) for ds in testds_battlefield]
    
    outputacc = []
    outputloss = []
    for i, netgen in enumerate(tqdm(netgens, desc="net")):
        for j in tqdm(range(7), desc="#data"):
            net = netgen()
            accs = []
            losses = []
            criterion = nn.BCEWithLogitsLoss()
            optimizer = optim.Adam(net.parameters(), lr=0.001)
            traindl = traindls[j]
            bestweight = train(net, criterion, optimizer, traindl, testdl, valdl, iterations=100)
            net.load_state_dict(bestweight)
            for dl in [traindl, valdl, testdl, *testdl_battlefield]:
                acc, loss = test(dl, net)
                accs.append(acc)
                losses.append(loss)
            outputacc.append((i, j, *accs))
            outputloss.append((i, j, *losses))
    return outputacc, outputloss

In [None]:
trainer.verbose = False

A. Trained with combats on a Plain
---------------------------

In [None]:
llo1, llo2 = [], []
for k in tqdm(range(0,10), desc="iteration"):
    lo1, lo2 = [], []
    for cv_i in tqdm(range(0,10), desc="cross validation"):
        o1, o2 = big_loop_plain(cv_i, [dnn1gen, dnnws1gen])
        lo1.append(o1)
        lo2.append(o2)
    llo1.append(lo1)
    llo2.append(lo2)

### Print

From the table below, the number of training data N is $|N| = 25 \times 2^\text{ndata}$.

The value from the column net indicates: 

|Value of Net|Name|
|---|--------|
| 0 |   DNN  |
| 1 | DNN+WS | 

In [None]:
# Avg. Accuracy
o1 = np.average(np.array(llo1), axis=(0,1))
pd.DataFrame(o1, columns=["net", "ndata", "train", "val", "test", *maps])

In [None]:
# Avg. Loss
o2 = np.average(np.array(llo2), axis=(0,1))
pd.DataFrame(o2, columns=["net", "ndata", "train", "val", "test", *maps])

### Data Save & Load

In [None]:
# save
for i in range(10):
    for j in range(10):
        pd.DataFrame(llo1[i][j], columns=["net", "ndata", "train", "val", "test", *maps]).to_csv("csvout/acc_plain_b{}_cv{}.csv".format(i,j))
        pd.DataFrame(llo2[i][j], columns=["net", "ndata", "train", "val", "test", *maps]).to_csv("csvout/loss_plain_b{}_cv{}.csv".format(i,j))

In [None]:
# load
llo1 = [[0]*10 for _ in range(10)]
llo2 = [[0]*10 for _ in range(10)]
for i in range(10):
    for j in range(10):
        llo1[i][j] = pd.read_csv("csvout/acc_plain_b{}_cv{}.csv".format(i,j), index_col=0).values
        llo2[i][j] = pd.read_csv("csvout/loss_plain_b{}_cv{}.csv".format(i,j), index_col=0).values

## B. Trained with combats on all battlefields

In [None]:
llo1, llo2 = [], []
for k in tqdm(range(0,10), desc="iteration"):
    lo1, lo2 = [], []
    for cv_i in tqdm(range(0,10), desc="cross validation"):
        o1, o2 = big_loop(cv_i, [dnn1gen, dnnws1gen])
        lo1.append(o1)
        lo2.append(o2)
    llo1.append(lo1)
    llo2.append(lo2)

### Print

From the table below, the number of training data N is $|N| = 25 \times 2^\text{ndata}$ for each battlefield.

The value from the column net indicates: 

|Value of Net|Name|
|---|--------|
| 0 |   DNN  |
| 1 | DNN+WS | 

In [None]:
# Avg. Accuracy
o1 = np.average(np.array(llo1), axis=(0,1))
pd.DataFrame(o1, columns=["net", "ndata", "train", "val", "test", *maps])

In [None]:
# Avg. Loss
o2 = np.average(np.array(llo2), axis=(0,1))
pd.DataFrame(o2, columns=["net", "ndata", "train", "val", "test", *maps])

### Data Save & Load

In [None]:
# save
for i in range(10):
    for j in range(10):
        pd.DataFrame(llo1[i][j], columns=["net", "ndata", "train", "val", "test", *maps]).to_csv("csvout/acc_all_b{}_cv{}.csv".format(i,j))
        pd.DataFrame(llo2[i][j], columns=["net", "ndata", "train", "val", "test", *maps]).to_csv("csvout/loss_all_b{}_cv{}.csv".format(i,j))

In [None]:
# load
llo1 = [[0]*10 for _ in range(10)]
llo2 = [[0]*10 for _ in range(10)]
for i in range(10):
    for j in range(10):
        llo1[i][j] = pd.read_csv("csvout/acc_all_b{}_cv{}.csv".format(i,j), index_col=0).values
        llo2[i][j] = pd.read_csv("csvout/loss_all_b{}_cv{}.csv".format(i,j), index_col=0).values

### Unit statistics

##### Protoss

|n|c|m|g|f|
|:---:|:---|---:|---:|---:|
|Probe|84|50|0|1|
|Colossus|4|300|200|6|
|Zealot|73|100|0|2|
|Stalker|74|125|50|2|
|HighTemplar|75|50|150|2|
|DarkTemplar|76|125|125|2|
|Sentry|77|50|100|2|
|Immortal|83|250|100|4|
|Archon|141|250|250|4|
|Adept|311|100|25|2|

In [None]:
Cp = np.array([
 [300,200,6], # 4 colossus
 [100,0,2],   # 73 zealot
 [125,50,2],  # 74 stalker
 [50,100,2],  # 75 ht
 [125,125,2], # 76 dt
 [50,100,2],  # 77 sentry
 [250,100,4], # 83 immortal
 [250,250,4], # 141 archon
 [100,25,2]   # 311 adept
])

In [None]:
HPDPSp = np.array([
 [350,18.7], # 4 colossus
 [150,18.6], # 73 zealot
 [160,9.7],  # 74 stalker
 [80,3.2],   # 75 ht
 [120,37.2], # 76 dt
 [80,8.5],   # 77 sentry
 [300,19.2], # 83 immortal
 [360,20.0], # 141 archon
 [140,6.2]   # 311 adept
])

##### Zerg

|n|c|m|g|f|
|:---:|:---|---:|---:|---:|
|Zergling|9|25|0|0.5|
|Drone|104|50|0|1|
|Queen|105|150|0|2|
|Hydralisk|107|100|50|2|
|Ultralisk|109|300|200|6|
|Roach|110|75|25|2|
|Baneling|126|50|25|0.5|
|Ravager|688|100|100|3|
|Lurker|503|50|100|3|

In [None]:
Cz = np.array([
 [25,0,0.5],  # 9 zergling
 [150,0,2],   # 105 queen
 [100,50,2],  # 107 hydra
 [300,200,6], # 109 ultra
 [75,25,2],   # 110 roach
 [50,25,0.5], # 126 baneling
 [100,100,3]  # 688 ravager
])

In [None]:
HPDPSz = np.array([
 [35,10.0],   # 9 zergling
 [175,11.3],  # 105 queen
 [90,22.2],   # 107 hydra
 [500,57.4],  # 109 ultra
 [145,11.2],  # 110 roach
 [30,20.0],   # 126 baneling
 [120,14.0]   # 688 ravager
])

##### Concatenation

In [None]:
Cpz = np.concatenate((Cp, Cz))

In [None]:
HPDPSpz = np.concatenate((HPDPSp, HPDPSz))

### LTD1, LTD2

In [None]:
def sigmoid(x):
    return np.where(x < 0, np.exp(x) / (1 + np.exp(x)), 1 / (1 + np.exp(-x)))

def f(s1, s2, g):
    return sigmoid(g(s1) - g(s2))

def g_ltd(s):
    return np.sum(s * HPDPSpz[:,0] * HPDPSpz[:,1], axis=1)

def f_ltd(s1, s2):
    return f(s1, s2, g_ltd)

def g_ltd2(s):
    return np.sum(s * np.sqrt(HPDPSpz[:,0]) * HPDPSpz[:,1], axis=1)

def f_ltd2(s1, s2):
    return f(s1, s2, g_ltd2)

In [None]:
testtable = oo[0]
testp1, testp2, testt, testy = table_to_ssty(testtable, column_index_p1, column_index_p2)

In [None]:
testp1.shape

In [None]:
o_ltd = f_ltd(testp1, testp2)
o_ltd2 = f_ltd2(testp1, testp2)

In [None]:
def benchmark_np(outy,testy):
    correct = sum((outy < 0.5) * (testy < 0.5) + (outy > 0.5) * (testy > 0.5))
    valid = sum((testy < 0.5) + (testy > 0.5))
    return correct/valid

In [None]:
# ltd1
benchmark_np(o_ltd, testy)

In [None]:
# ltd2
benchmark_np(o_ltd2, testy)

## C. Optimizing Unit-Combination

In [None]:
def opt_logit_1step(rt, s0t, terrain_onehot, boundt, Ct, net, optimizer):
    optimizer.zero_grad()
    at = F.softmax(rt)
    st = phi_tensor(at, boundt, Ct)
    inp = torch.cat([st, s0t, terrain_onehot])
    inp = inp.view(1,-1)
    predict = net(inp)
    loss = -predict
    loss.backward()
    optimizer.step()
    return loss

In [None]:
def optimize_sgdlogit(a, a0, terrain, bound, C, net, lr):
    boundt = torch.tensor(bound).to(target_device, target_dtype)
    Ct = torch.tensor(C).to(target_device, target_dtype)
    r = np.log(a)
    rt = nn.Parameter(torch.tensor(r, requires_grad=True, device=target_device, dtype=target_dtype))
    a0t = torch.tensor(a0).to(target_device, target_dtype)
    s0t = phi_tensor(a0t, boundt, Ct).floor()  # floor
    num = len(maps)
    terrain_onehot = torch.tensor(np.eye(num)[terrain]).to(target_device, target_dtype)
    optimizer = optim.SGD([rt], lr=lr)
    at = F.softmax(rt)
    st = phi_tensor(at, boundt, Ct)
    total_iterations = 0
    ats = [at]
    for i in range(500):
        st_ = st.clone().detach()
        loss = opt_logit_1step(rt, s0t, terrain_onehot, boundt, Ct, net, optimizer)
        #print(loss)
        at = F.softmax(rt)
        st = phi_tensor(at, boundt, Ct)
        ats.append(at)
        if (i >= 50) and torch.norm(st_ - st) < 0.1:
            total_iterations = i
            break
    at = F.softmax(rt)
    return at.detach().cpu().numpy(), ats

In [None]:
def optimize_adamlogit(a, a0, terrain, bound, C, net, lr):
    boundt = torch.tensor(bound).to(target_device, target_dtype)
    Ct = torch.tensor(C).to(target_device, target_dtype)
    r = np.log(a)
    rt = nn.Parameter(torch.tensor(r, requires_grad=True, device=target_device, dtype=target_dtype))
    a0t = torch.tensor(a0).to(target_device, target_dtype)
    s0t = phi_tensor(a0t, boundt, Ct).floor()  # floor
    num = len(maps)
    terrain_onehot = torch.tensor(np.eye(num)[terrain]).to(target_device, target_dtype)
    optimizer = optim.Adam([rt], lr=lr)
    at = F.softmax(rt)
    st = phi_tensor(at, boundt, Ct)
    total_iterations = 0
    ats = [at]
    for i in range(500):
        st_ = st.clone().detach()
        loss = opt_logit_1step(rt, s0t, terrain_onehot, boundt, Ct, net, optimizer)
        #print(loss)
        at = F.softmax(rt)
        st = phi_tensor(at, boundt, Ct)
        ats.append(at)
        if (i >= 50) and torch.norm(st_ - st) < 0.1:
            total_iterations = i
            break
    at = F.softmax(rt)
    return at.detach().cpu().numpy(), ats

In [None]:
def eval_winning(a, a0, terrain, bound, C, net):
    num = len(maps)
    terrain_onehot = torch.tensor(np.eye(num)[terrain]).to(target_device, target_dtype)
    boundt = torch.tensor(bound).to(target_device, target_dtype)
    Ct = torch.tensor(C).to(target_device, target_dtype)
    at = torch.tensor(a).to(target_device, target_dtype)
    a0t = torch.tensor(a0).to(target_device, target_dtype)
    s0t = phi_tensor(a0t, boundt, Ct).floor()
    st = phi_tensor(at, boundt, Ct).floor()
    inp = torch.cat([st, s0t, terrain_onehot])
    inp = inp.view(1,-1)
    return net(inp)

In [None]:
eps = 1e-10
def phi_tensor(a, b, C):
    D = torch.matmul(a.reshape((-1,1)), b.reshape((1,-1)))
    E = torch.div(D, C + eps)
    s,_ = E.min(axis=1)
    return s

In [None]:
def get_random_a(dim):
    return np.random.dirichlet([1]*dim)

In [None]:
def initialize_armies(n_iterations):
    enemy_as = []
    ally_as = []
    for i in tqdm(range(n_iterations)):
        enemy_a = get_random_a(unitdim)
        ally_a = get_random_a(unitdim)
        enemy_as.append(enemy_a)
        ally_as.append(ally_a)
    return enemy_as, ally_as

In [None]:
def random_armies(ally_as, enemy_as, terrain_list, bound, C, objective_net):
    optimized_as = []
    valss = []
    iterations = []
    assert(len(enemy_as) == len(ally_as))
    assert(len(terrain_list) == len(ally_as))
    for i in tqdm(range(len(enemy_as))):
        niter = 500
        inter_a_s = [get_random_a(unitdim) for _ in range(niter)]
        inter_a_s[0] = ally_as[i]
        enemy_a = enemy_as[i]
        terrain = terrain_list[i]
        vals = [eval_winning(a, enemy_a, terrain, bound, C, objective_net).item() for a in inter_a_s]
        optimized_a = inter_a_s[np.argmax(vals)]
        optimized_as.append(optimized_a)
        valss.append(vals)
        iterations.append(len(inter_a_s))
    return optimized_as, valss, iterations

In [None]:
def optimize_armies(optimize_fn, ally_as, enemy_as, terrain_list, bound, C, objective_net, lr):
    optimized_as = []
    valss = []
    iterations = []
    assert(len(enemy_as) == len(ally_as))
    assert(len(terrain_list) == len(ally_as))
    for i in tqdm(range(len(enemy_as))):
        enemy_a = enemy_as[i]
        ally_a = ally_as[i]
        terrain = terrain_list[i]
        optimized_a, inter_a_s = optimize_fn(ally_a, enemy_a, terrain, bound, C, objective_net, lr)
        vals = [eval_winning(a, enemy_a, terrain, bound, C, objective_net).item() for a in inter_a_s]
        optimized_as.append(optimized_a)
        valss.append(vals)
        iterations.append(len(inter_a_s))
    return optimized_as, valss, iterations

### Base predictor

##### Experiment settings for Protoss

In [None]:
myC = Cp
unitdim = 9
column_index_p1 = column_index_p2 = p_unit_columns
target_table = oop

##### Experiment settings for Zerg

In [None]:
myC = Cz
unitdim = 7
column_index_p1 = column_index_p2 = z_unit_columns
target_table = ooz

##### training

In [None]:
traintable = pd.concat([table[0:1600] for table in target_table], axis=0)
valtable = pd.concat([table[1600:1800] for table in target_table], axis=0)
testtable = pd.concat([table[1800:2000] for table in target_table], axis=0)
trainds = table_to_dataset(traintable, column_index_p1, column_index_p2)
valds = table_to_dataset(valtable, column_index_p1, column_index_p2)
testds = table_to_dataset(testtable, column_index_p1, column_index_p2)
traindl = DataLoader(trainds, batch_size=200, shuffle=True,  pin_memory=pinning)
valdl = DataLoader(valds, batch_size=200, pin_memory=pinning)
testdl = DataLoader(testds, batch_size=200, pin_memory=pinning)

trainer.verbose = False
dnnws = BattleNet(NetDNN(len(column_index_p1), 24, 24), len(column_index_p1), len(maps), 48 + len(maps), 24, 24, 1).to(target_device, target_dtype)
net = dnnws
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)
bestweight = train(net, criterion, optimizer, traindl, testdl, valdl)
net.load_state_dict(bestweight)
loss = test(testdl, net)
loss

In [None]:
# resource bound
bound = np.array([10000, 5000, 150])

In [None]:
boundt = torch.tensor(bound).to(target_device, target_dtype)
Ct = torch.tensor(myC).to(target_device, target_dtype)

##### squad and terrain initialization

In [None]:
terrain_list = np.array([0,1,2,3,4,5] * 200)

In [None]:
enemy_as, ally_as = initialize_armies(1200)

##### experiments

In [None]:
# mc
_, randvalss, _ = random_armies(ally_as, enemy_as, terrain_list, bound, myC, net)

In [None]:
# gradient
sgd1_optimized_as, sgd1_valss, sgd1_iterations = optimize_armies(optimize_sgdlogit, ally_as, enemy_as, terrain_list, bound, myC, net, 1)
sgd01_optimized_as, sgd01_valss, sgd01_iterations = optimize_armies(optimize_sgdlogit, ally_as, enemy_as, terrain_list, bound, myC, net, 0.1)
sgd001_optimized_as, sgd001_valss, sgd001_iterations = optimize_armies(optimize_sgdlogit, ally_as, enemy_as, terrain_list, bound, myC, net, 0.01)
adam1_optimized_as, adam1_valss, adam1_iterations = optimize_armies(optimize_adamlogit, ally_as, enemy_as, terrain_list, bound, myC, net, 1)
adam01_optimized_as, adam01_valss, adam01_iterations = optimize_armies(optimize_adamlogit, ally_as, enemy_as, terrain_list, bound, myC, net, 0.1)
adam001_optimized_as, adam001_valss, adam001_iterations = optimize_armies(optimize_adamlogit, ally_as, enemy_as, terrain_list, bound, myC, net, 0.01)

In [None]:
# save
np.save("save/sgd1_all_pp",sgd1_valss)
np.save("save/sgd01_all_pp",sgd01_valss)
np.save("save/sgd001_all_pp",sgd001_valss)
np.save("save/adam1_all_pp",adam1_valss)
np.save("save/adam01_all_pp",adam01_valss)
np.save("save/adam001_all_pp",adam001_valss)
np.save("save/mc_all_pp",randvalss)

##### result plots

In [None]:
printavgminmax = lambda l0 : print(np.average(l0), np.min(l0), np.max(l0))
printavgminmaxll = lambda ll : print(np.average(ll, axis=0), np.min(ll, axis=0), np.max(ll, axis=0))

def getmaxsofar(li, match_length=500):
    """For MC, calculate the max value in li[:i] for each step i."""
    li2 = []
    maxsofar = -np.inf
    for i in li:
        if maxsofar < i:
            maxsofar = i
        li2.append(maxsofar)
    # elongate
    if len(li2) < match_length:
        li2.extend([maxsofar] * (match_length - len(li2)))
    # cut
    return li2[:match_length]

def fillmax(li, match_length=500):
    """For gradient updates, if converged at step i (i<match_length),
    then fill max values in li[i:match_length]."""
    li2 = li[:]
    # elongate
    if len(li2) < match_length:
        li2.extend([li[-1]] * (match_length - len(li2)))
    # cut
    return li2[:match_length]

printavgminmax(sgd001_iterations)
printavgminmax(adam1_iterations)
#printavgminmax(sgd001_optimized_as)
sgd001_valss_max = [fillmax(sgd001_vals) for sgd001_vals in sgd001_valss]
sgd01_valss_max = [fillmax(sgd01_vals) for sgd01_vals in sgd01_valss]
sgd1_valss_max = [fillmax(sgd1_vals) for sgd1_vals in sgd1_valss]
adam001_valss_max = [fillmax(adam001_vals) for adam001_vals in adam001_valss]
adam01_valss_max = [fillmax(adam01_vals) for adam01_vals in adam01_valss]
adam1_valss_max = [fillmax(adam1_vals) for adam1_vals in adam1_valss]
#printavgminmaxll(gdvalss_max)
#printavgminmax(max_as)
randvalss_max = [getmaxsofar(randvals) for randvals in randvalss]
#printavgminmaxll(randvalss_max)
plt.plot(range(500), np.average(sgd001_valss_max, axis=0), label="sgd 0.01")
plt.plot(range(500), np.average(sgd01_valss_max, axis=0), label="sgd 0.1")
plt.plot(range(500), np.average(sgd1_valss_max, axis=0), label="sgd 1")
plt.plot(range(500), np.average(adam001_valss_max, axis=0), label="adam 0.01")
plt.plot(range(500), np.average(adam01_valss_max, axis=0), label="adam 0.1")
plt.plot(range(500), np.average(adam1_valss_max, axis=0), label="adam 1")
plt.plot(range(500), np.average(randvalss_max, axis=0), label="mc")
plt.ylabel("average logit(y)")
plt.xlabel("steps")
plt.legend()

In [None]:
fig, axs = plt.subplots(2,2, figsize=(plt.rcParams["figure.figsize"][0] *2, plt.rcParams["figure.figsize"][1]*2))
for ax in axs[1]:
    #ax.plot(range(500), np.average(sigmoid(np.array(sgd001_valss_max)), axis=0), label="sgd 0.01")
    #ax.plot(range(500), np.average(sigmoid(np.array(sgd01_valss_max)), axis=0), label="sgd 0.1")
    ax.plot(range(500), np.average(sigmoid(np.array(sgd1_valss_max)), axis=0), label="sgd 1")
    ax.plot(range(500), np.average(sigmoid(np.array(adam001_valss_max)), axis=0), label="adam 0.01")
    ax.plot(range(500), np.average(sigmoid(np.array(adam01_valss_max)), axis=0), label="adam 0.1")
    ax.plot(range(500), np.average(sigmoid(np.array(adam1_valss_max)), axis=0), label="adam 1")
    ax.plot(range(500), np.average(sigmoid(np.array(randvalss_max)), axis=0), label="mc")
    ax.set_ylabel("y")
    ax.set_xlabel("steps")
    ax.legend()
for ax in axs[0]:
    #ax.plot(range(500), np.average(sgd001_valss_max, axis=0), label="sgd 0.01")
    #ax.plot(range(500), np.average(sgd01_valss_max, axis=0), label="sgd 0.1")
    ax.plot(range(500), np.average(sgd1_valss_max, axis=0), label="sgd 1")
    ax.plot(range(500), np.average(adam001_valss_max, axis=0), label="adam 0.01")
    ax.plot(range(500), np.average(adam01_valss_max, axis=0), label="adam 0.1")
    ax.plot(range(500), np.average(adam1_valss_max, axis=0), label="adam 1")
    ax.plot(range(500), np.average(randvalss_max, axis=0), label="mc")
    ax.set_ylabel("logit(y)")
    ax.set_xlabel("steps")
    ax.legend()
axs[0][1].set_xlim(0,100)
axs[1][1].set_xlim(0,100)