In [None]:
!pip install kaggle-environments -U > /dev/null 2>&1
!pip install keras-adversarial
!cp -r ../input/lux-ai-2021/* .

In [None]:
import numpy as np
import json
from pathlib import Path
import os
import random
from tqdm.notebook import tqdm
import torch
from torch import nn,Tensor
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split
import argparse
import math
import torchvision.transforms as transforms
from torchvision.utils import save_image
from torchvision import datasets
from torch.autograd import Variable

In [None]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    rng = np.random.RandomState(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed = 42
seed_everything(seed)

In [None]:
def to_label(action):
    strs = action.split(' ')
    unit_id = strs[1]
    if strs[0] == 'm':
        label = {'c': 5, 'n': 0, 's': 1, 'w': 2, 'e': 3}[strs[2]]
    elif strs[0] == 'bcity':
        label = 4
    else:
        label = None
    return unit_id, label


def depleted_resources(obs): #不太明白这个是什么意思
    for u in obs['updates']:
        if u.split(' ')[0] == 'r':
            return False
    return True


def create_dataset_from_json(episode_dir, team_name='Toad Brigade'):  # json文件是从哪里来的？意义是什么？
    obses = {}
    samples = []
    append = samples.append
    
    episodes = [path for path in Path(episode_dir).glob('*.json') if 'output' not in path.name]
    for filepath in tqdm(episodes): 
        with open(filepath) as f:
            json_load = json.load(f)

        ep_id = json_load['info']['EpisodeId']
        index = np.argmax([r or 0 for r in json_load['rewards']])  # 得到reward最大的index，我们只考虑reward最大的情况
        if json_load['info']['TeamNames'][index] != team_name:
            continue

        for i in range(len(json_load['steps'])-1):
            if json_load['steps'][i][index]['status'] == 'ACTIVE':
                actions = json_load['steps'][i+1][index]['action']
                obs = json_load['steps'][i][0]['observation']
                
                if depleted_resources(obs):
                    break
                
                # 感觉后面都是为了得到环境信息
                obs['player'] = index
                obs = dict([
                    (k,v) for k,v in obs.items() 
                    if k in ['step', 'updates', 'player', 'width', 'height']
                ])
                obs_id = f'{ep_id}_{i}'
                obses[obs_id] = obs
                                
                for action in actions:  #用来解析action的含义
                    unit_id, label = to_label(action)
                    if label is not None:
                        append((obs_id, unit_id, label))

    return obses, samples #返回observation（环境）和对应的行动

In [None]:
episode_dir = '../input/lux-ai-episodes'
obses, samples = create_dataset_from_json(episode_dir)
print('obses:', len(obses), 'samples:', len(samples))

In [None]:
labels = [sample[-1] for sample in samples]
actions = ['north', 'south', 'west', 'east', 'bcity']
for value, count in zip(*np.unique(labels, return_counts=True)):
    print(f'{actions[value]:^5}: {count:>3}')

In [None]:
# Input for Neural Network
def make_input(obs, unit_id, action = None):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    if action != None:
        b = np.zeros((21, 32, 32), dtype=np.float32)
    else:
        b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1, #1用于表示分层，在b矩阵中，不同的层数表示不同的资源
                    (wood + coal + uranium) / 100
                )
            else:
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100
                )
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                cities[city_id]
            )     
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1
    # action
    if action != None:
        b[20, :] = action

    return b #不明白b的元素是怎么计算的

In [None]:
class parser:
    def __init__(self):
        self.n_epochs = 2
        self.batch_size = 64
        self.lr = 0.0002
        self.b1 = 0.5
        self.b2 = 0.999
        self.n_cpu = 8
        self.latent_dim = 21 * 32 * 32
        self.channels = 21
        self.img_size = 32
        self.sample_interval = 400
opt = parser()
print(opt)
img_shape = (opt.channels, opt.img_size, opt.img_size) 
cuda = True if torch.cuda.is_available() else False
#hidden_units_1 = 500
#hidden_units_2 = 500
#input_dim = 3
#output_units = 20 * 32 * 32

In [None]:
# Neural Network for Lux AI
class BasicConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = nn.Conv2d(
            input_dim, output_dim, 
            kernel_size=kernel_size, 
            padding=(kernel_size[0] // 2, kernel_size[1] // 2)
        )
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = self.conv(x)
        h = h.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        h = self.bn(h) if self.bn is not None else h
        h = h.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        return h


class LuxNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(20, filters, (3, 3), True)
        self.blocks = nn.ModuleList([BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 5, bias=False)

    def forward(self, x):
        h = (F.relu_(self.conv0(x))).to(device=torch.device('cpu'))
        h = h.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        for block in self.blocks:
            h = F.relu_(h + block(h))
            h = h.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        h_head = h_head.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        p = self.head_p(h_head)
        p = p.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        return p

In [None]:
class Generator(nn.Module):   #生成网络
    def __init__(self):
        super(Generator, self).__init__() #超类继承
        def block(in_feat, out_feat, normalize=True):
            layers = [nn.Linear(in_feat, out_feat)]  #对传入数据应用线性转换（输入节点数，输出节点数）
            if normalize:
                layers.append(nn.BatchNorm1d(out_feat, 0.8))  #批规范化
            layers.append(nn.LeakyReLU(0.2, inplace=True))   #激活函数
            return layers
        self.model = nn.Sequential(
            *block(opt.latent_dim, 128, normalize=False),
            *block(128, 256),
            *block(256, 512),
            *block(512, 1024),
            nn.Linear(1024, int(np.prod(img_shape))),
            nn.Tanh()
        ) #快速搭建网络， np.prod 用来计算所有元素的乘积
    def forward(self, z):    #z代表输入
        #z = z.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        pre_process = LuxNet()
        if cuda:
            pre_process.cuda()
        preds_mat = pre_process(z)
        preds_mat = preds_mat.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        no_use, preds = torch.max(preds_mat, 1)
        pre_imgs = torch.rand(64,21,32,32)
        pre_imgs = pre_imgs.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        for i in range(z.shape[0]):
            for j in range(20):
                pre_imgs[i, j, :] = z[i, j, :]
            pre_imgs[i, 20, :] = preds[i]
        #print(pre_imgs.shape)
        in_img = Variable(pre_imgs.view(pre_imgs.shape[0], opt.latent_dim))
        in_img = in_img.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        img = self.model(in_img)
        img = img.view(img.size(0), *img_shape)
        img = img.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        return img, preds_mat

In [None]:
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        self.model = nn.Sequential(
            nn.Linear(int(np.prod(img_shape)), 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(512, 256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Linear(256, 1),
            nn.Sigmoid()
        )

    def forward(self, img):
        img_flat = img.view(img.size(0), -1)
        validity = self.model(img_flat)

        return validity

In [None]:
adversarial_loss = torch.nn.BCELoss()
generator = Generator()
discriminator = Discriminator()
if cuda:
    generator.cuda()
    discriminator.cuda()
    adversarial_loss.cuda()

In [None]:
class LuxDataset(Dataset):
    def __init__(self, obses, samples):
        self.obses = obses
        self.samples = samples
        
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        obs_id, unit_id, action = self.samples[idx]
        obs = self.obses[obs_id]
        state = {}
        state[0] = make_input(obs, unit_id, action)
        state[1] = make_input(obs, unit_id)
        
        return state, action #返回的就是当前的状态和当前状态应该采取的行动
    
    

In [None]:
dataloader = DataLoader(
    LuxDataset(obses, samples), 
    batch_size=opt.batch_size, 
    shuffle=False, 
    num_workers=2
)

In [None]:
optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))

In [None]:
#pre_process = LuxNet()
#criterion = nn.CrossEntropyLoss()
best_rate = 0

for epoch in range(opt.n_epochs):
    for i, (imgs_total, action) in enumerate(dataloader):
        if i < 1700:
            imgs = imgs_total[0]
        #print(imgs.shape)
        #print(imgs_total[1].shape)
            imgs = imgs.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        # Adversarial ground truths
            valid = Variable(Tensor(imgs.size(0), 1).fill_(1.0), requires_grad=False)
        #print(valid.shape)
            valid = valid.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
            fake = Variable(Tensor(imgs.size(0), 1).fill_(0.0), requires_grad=False)
        #print(fake.shape)
            fake = fake.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

        # Configure input
            real_imgs = Variable(imgs.type(Tensor))
        #print(real_imgs.shape)
            real_imgs = real_imgs.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

        # -----------------
        #  Train Generator
        # -----------------

            optimizer_G.zero_grad()

        # Sample noise as generator input
        #origin = Variable(imgs_total[1].type(Tensor))
            z = Variable(imgs_total[1].type(Tensor))
        #print(origin.shape)
        #preds_mat = pre_process(origin)
        #print(preds_mat.shape) # 64*5
        #_, preds = torch.max(preds_mat, 1)
        #print(preds.shape)
        #pre_imgs = torch.rand(64,21,32,32)
        #for i in range(imgs_total[1].shape[0]):
        #    for j in range(20):
        #        pre_imgs[i, j, :] = imgs_total[1][i, j, :]
        #    pre_imgs[i, 20, :] = preds[i]
        #print(pre_imgs.shape)
        #z = Variable(pre_imgs.view(pre_imgs.shape[0], opt.latent_dim))
        # z = Variable(imgs_total[1].type(Tensor).view(imgs_total[1].shape[0], opt.latent_dim))
        # z = Variable(Tensor(np.random.normal(0, 1, (imgs_total[1].shape[0], opt.latent_dim))))
        #print(z.shape)
        # z = Variable(Tensor(np.random.normal(0, 1, (20, opt.latent_dim))))
            z = z.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        #print(z.shape)
        # Generate a batch of images
        # gen_imgs = generator(Variable(Tensor(imgs_total[1])).to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
            gen_imgs,not_use = generator(z)
            not_use = not_use.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
        #print(gen_imgs.shape)
            gen_imgs = gen_imgs.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))

        # Loss measures generator's ability to fool the discriminator
            g_loss = adversarial_loss(discriminator(gen_imgs), valid)

            g_loss.backward()
            optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

            optimizer_D.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
            real_loss = adversarial_loss(discriminator(real_imgs), valid)
            fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake)
            d_loss = (real_loss + fake_loss) / 2

            d_loss.backward()
            optimizer_D.step()

       
            print ("[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]" % (epoch, opt.n_epochs, i, len(dataloader),d_loss.item(), g_loss.item()))
        #break
    #break
torch.save(generator, 'model.pth')  
        

In [None]:
%%writefile agent.py
import os
import numpy as np
import torch
from lux.game import Game


path = '/kaggle_simulations/agent' if os.path.exists('/kaggle_simulations') else '.'
#model = torch.jit.load(f'{path}/model.pth')
#model.eval()
model = torch.load(f'{path}/model.pth')
model.eval()


# Input for Neural Network
def make_input(obs, unit_id, action = None):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    if action != None:
        b = np.zeros((21, 32, 32), dtype=np.float32)
    else:
        b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1, #1用于表示分层，在b矩阵中，不同的层数表示不同的资源
                    (wood + coal + uranium) / 100
                )
            else:
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100
                )
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                cities[city_id]
            )     
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1
    # action
    if action != None:
        b[20, :] = action

    return b #不明白b的元素是怎么计算的


game_state = None
def get_game_state(observation):
    global game_state
    
    if observation["step"] == 0:
        game_state = Game()
        game_state._initialize(observation["updates"])
        game_state._update(observation["updates"][2:])
        game_state.id = observation["player"]
    else:
        game_state._update(observation["updates"])
    return game_state


def in_city(pos):    
    try:
        city = game_state.map.get_cell_by_pos(pos).citytile
        return city is not None and city.team == game_state.id
    except:
        return False


def call_func(obj, method, args=[]):
    return getattr(obj, method)(*args)


unit_actions = [('move', 'n'), ('move', 's'), ('move', 'w'), ('move', 'e'), ('build_city',), ('move', 'c')]
def get_action(policy, unit, dest):
    for label in np.argsort(policy)[::-1]:
        act = unit_actions[label]
        pos = unit.pos.translate(act[-1], 1) or unit.pos
        if pos not in dest or in_city(pos):
            return call_func(unit, *act), pos 
            
    return unit.move('c'), unit.pos


def agent(observation, configuration):
    global game_state
    
    game_state = get_game_state(observation)    
    player = game_state.players[observation.player]
    actions = []
    
    # City Actions
    unit_count = len(player.units)
    for city in player.cities.values():
        for city_tile in city.citytiles:
            if city_tile.can_act():
                if unit_count < player.city_tile_count: 
                    actions.append(city_tile.build_worker())
                    unit_count += 1
                elif not player.researched_uranium():
                    actions.append(city_tile.research())
                    player.research_points += 1
    
    # Worker Actions
    dest = []
    for unit in player.units:
        if unit.can_act() and (game_state.turn % 40 < 30 or not in_city(unit.pos)):
            #state = make_input(observation, unit.id, None)
            #print(state.shape)
            #state = state.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'))
            #t_state = torch.from_numpy(state)
            #print(t_state.shape)
            #t_state = t_state.view(1, 20 , 32 ,32)
            #for i in range(6):
                #t_state = torch.cat([t_state,t_state],dim=0)
                #print(t_state.shape)
            #p, pre_action = model(t_state.to(device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')))
            #action_num = 0
            #for i in range(64):
                #action_num = torch.sum(p[i, 20, :]) + action_num
            #print(action_num)
            #action_num = int(action_num / 32 / 32 / 64 * 30000) % 6
            #if action_num == 4:
                #action = unit.build_city()
            #else:
                #action = unit.move(unit_actions[action_num][1])
                
            state = make_input(observation, unit.id)
            with torch.no_grad():
                img, p = model((torch.from_numpy(state)).to('cuda' if torch.cuda.is_available() else 'cpu').unsqueeze(0))
                #print(torch.from_numpy(state).shape)
                #print(torch.from_numpy(state).unsqueeze(0).shape)
            #print(p.shape)
            policy = (p.squeeze(0)).to('cpu').numpy()
            #print(policy.size)

            action, pos = get_action(policy, unit, dest)
            actions.append(action)
            dest.append(pos)
            
            #actions.append(action)
            # dest.append(pos)

    return actions

In [None]:
from kaggle_environments import make

if __name__ == "__main__":
    env = make("lux_ai_2021", configuration={"width": 24, "height": 24, "loglevel": 2, "annotations": True}, debug=True)
    steps = env.run(['agent.py', 'agent.py'])
    env.render(mode="ipython", width=1200, height=800)