In [1]:
# !pip install kaggle-environments -U > /dev/null 2>&1
# !cp -r ../input/lux-ai-2021/* .

In [1]:
import numpy as np
import json
from pathlib import Path
import os
import random
from tqdm.notebook import tqdm
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split
import optuna
from optuna.trial import TrialState

In [2]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Preprocessing

Actions:
- m - move: m, unit who moves, direction
- bcity - build city: bcity, unit who builds
- bw - build worker, x-coord of building city, y-coord of building city
- r - research
- t - transfer: transfer, from user_1, to user_2, resourse, quantity

Updates:
- rp - research point: player, number of rp
- r - resources: r, type of resource, x-coord, y-coord, quantity
- u - user: u, worker/cart, player, user id, x-coord, y-coord, cooldown, wood, coal, uranium
- c - city: c, player, city id, number of resources, amount of consuming light at night
- ct - city tile: ct, player, city id, x-coord, y-coord, cooldown
- ccd - level of road: ccd, x-coord, y-coord, level value

In [3]:
!cd episodes && rm *_info.json && cd ..

In [4]:
from random import shuffle

def unit_label(action):
    strs = action.split(' ')
    unit_id = strs[1]
    if strs[0] == 'm':
        label = {'c': 0, 'n': 1, 's': 2, 'w': 3, 'e': 4}[strs[2]]
    elif strs[0] == 'bcity':
        label = 5
    else:
        label = None
    return unit_id, label

def city_label(action):
    strs = action.split(' ')
    ctile_coord = (strs[1], strs[2])
    if strs[0] == 'bw':
        label = 1
    elif strs[0] == 'r':
        label = 2
    else:
        label = None
    return ctile_coord, label

def research_points(update, index, res_points):
    strs = update.split(' ')
    if strs[0] == 'rp' and strs[1] == index:
        res_points = int(strs[2])
    return res_points

def units_count(update, index):
    global units_number
    
    strs = update.split(' ')
    if strs[0] == 'u' and strs[2] == index:
        units_number += 1

def city_tile_coord(update, index):
    strs = update.split(' ')
    if strs[0] == 'ct' and strs[1] == index and strs[5] == '0':
        ctile_coord = (strs[3], strs[4])
        return ctile_coord
    return None


def depleted_resources(obs):
    for u in obs['updates']:
        if u.split(' ')[0] == 'r':
            return False
    return True


def create_dataset_from_json(episode_dir, team_name='Toad Brigade'): 
    global units_number
    
    obses = {}
    unit_samples = []
    city_samples = []
    
    episodes = [path for path in Path(episode_dir).glob('*.json') if 'output' not in path.name]
    for filepath in tqdm(episodes): 
        with open(filepath) as f:
            json_load = json.load(f)

        ep_id = json_load['info']['EpisodeId']
        index = np.argmax([r or 0 for r in json_load['rewards']])
        if json_load['info']['TeamNames'][index] != team_name:
            continue

        for i in range(len(json_load['steps'])-1):
            # get there, collect all the cities and set action label to 0 for those of them 
            # who didn't act during the episode
            
            city_tiles = {}
            units_number = 0
            city_action_number = 0
            res_points = 0
            
            if json_load['steps'][i][index]['status'] == 'ACTIVE':
                actions = json_load['steps'][i+1][index]['action']
                obs = json_load['steps'][i][0]['observation']
                updates = obs['updates']
                
                for u in updates:
                    res_points = research_points(u, str(index), res_points)
                    units_count(u, str(index))
                    ctile_coord = city_tile_coord(u, str(index))
                    if ctile_coord:
                        city_tiles[ctile_coord] = 0
                
                if depleted_resources(obs):
                    break
                
                obs['player'] = index
                obs = dict([
                    (k,v) for k,v in obs.items() 
                    if k in ['step', 'updates', 'player', 'width', 'height']
                ])
                obs_id = f'{ep_id}_{i}'
                obses[obs_id] = obs
                                
                for action in actions:
                    unit_id, label = unit_label(action)
                    if label is not None:
                        unit_samples.append((obs_id, unit_id, label))
                        continue
                    ctile_coord, label = city_label(action)
                    if label is not None:
                        # count number of actions for city tiles
                        city_action_number += 1 
                        # count number of units to add
                        if label == 1:
                            units_number += 1
                        city_tiles[ctile_coord] = label
                    
                # shuffle city tiles for futher random pick of negative samples
                city_tiles_items = list(city_tiles.items())
                shuffle(city_tiles_items)
                for k, v in city_tiles_items:
                    if v > 0:
                        city_samples.append((obs_id, k, v))
                    # if no one city acts but they can - add this actions
                    # but don't add to much zero actions to prevent class imbalance
                    elif v == 0 and units_number < len(city_tiles) and city_action_number < 1:
                        city_samples.append((obs_id, k, v))
                        city_action_number += 0.5
                    elif v == 0 and res_points < 200 and city_action_number < 1:
                        city_samples.append((obs_id, k, v))
                        city_action_number += 0.5
                    
    return obses, unit_samples, city_samples

In [5]:
episode_dir = 'episodes'
obses, samples, city_samples = create_dataset_from_json(episode_dir)
print('observations:', len(obses), 'worker samples:', len(samples), 'city samples:', len(city_samples))

  0%|          | 0/284 [00:00<?, ?it/s]

observations: 68673 worker samples: 244837 city samples: 98127


In [6]:
labels = [sample[-1] for sample in samples]
actions = ['center', 'north', 'south', 'west', 'east', 'bcity']
for value, count in zip(*np.unique(labels, return_counts=True)):
    print(f'{actions[value]}: {count}')

north: 55868
south: 52308
west: 57224
east: 55439
bcity: 23998


In [7]:
labels_city = [sample[-1] for sample in city_samples]
actions_city = ['None', 'build_worker', 'research']
for value, count in zip(*np.unique(labels_city, return_counts=True)):
    print(f'{actions_city[value]}: {count}')

None: 40178
build_worker: 16987
research: 40962


In [153]:
# episode_dir = 'episodes'
# json_load = create_dataset_from_json(episode_dir)
# json_load

In [154]:
# obses['27426858_181']

In [155]:
# samples

In [156]:
# city_samples[200000]

# Training

b - training tensor of float32. b dimensions is 20x32x32

- b[0] - position of current unit
- b[1] - cargo sum/100 of current unit
- b[2, 3, 4] - position, cooldown/6, and cargo sum/100 for units from the same team
- b[5, 6, 7] - position, cooldown/6, and cargo sum/100 for units from another team
- b[8, 9] - position,  min(city fuel/city energy consumption, 10)/10
- b[10, 11] - position, cooldown/6, and cargo sum/100 for units from another team
- b[12] - amount of wood / 800
- b[13] - amount of coal / 800
- b[14] - amount of uranium / 800
- b[15] - research points / 200 of unit's team
- b[16] - research piints / 200 of another team
- b[17] - time of the day (from 0 to 1, step 0.05)
- b[18] - step of the game (from 0 to 1, step 1/360)
- b[19] - map size

Add number of cities - ? 

Add day/night - ?

In [8]:
# Input for Neural Network for workers
def make_input(obs, unit_id):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100
                )
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                cities[city_id]
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b

Data for the cities:
- b[0] - position of current city
- b[1] - min(city fuel/city energy consumption, 10)/10 of current city
- b[2, 3, 4] - position, cooldown/10, and min(city fuel/city energy consumption, 10)/10 for cities from the same team
- b[5, 6, 7] - position, cooldown/10, and min(city fuel/city energy consumption, 10)/10 for cities from another team
- b[8, 9] - position and cargo sum/100 for units from the same team 
- b[10, 11] - position and cargo sum/100 for units from from another team
- b[12] - amount of wood / 800
- b[13] - amount of coal / 800
- b[14] - amount of uranium / 800
- b[15] - research points / 200 of unit's team
- b[16] - research piints / 200 of another team
- b[17] - time of the day (from 0 to 1, step 0.05)
- b[18] - step of the game (from 0 to 1, step 1/360)
- b[19] - map size

In [9]:
# Input for Neural Network for cities
def make_city_input(obs, city_coord):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'ct':
            # CityTiles
            city_id = strs[2]
            x = int(strs[3]) 
            y = int(strs[4])
            cooldown = float(strs[5])
            if x == int(city_coord[0]) and y == int(city_coord[1]):
                b[:2, x + x_shift, y + y_shift] = (
                    1,
                    cities[city_id]
                )
            else:
                team = int(strs[1])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x + x_shift, y + y_shift] = (
                    1,
                    cooldown / 10,
                    cities[city_id]
                )
        elif input_identifier == 'u':
            team = int(strs[2])
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                (wood + coal + uranium) / 100
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b

### Set modules for NN training

In [10]:
class LuxDataset(Dataset):
    def __init__(self, obses, samples):
        self.obses = obses
        self.samples = samples
        
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        obs_id, unit_id, action = self.samples[idx]
        obs = self.obses[obs_id]
        state = make_input(obs, unit_id)
        
        return state, action
    
class LuxCityDataset(Dataset):
    def __init__(self, obses, city_samples):
        self.obses = obses
        self.samples = city_samples
        
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        obs_id, city_coord, action = self.samples[idx]
        obs = self.obses[obs_id]
        state = make_city_input(obs, city_coord)
        
        return state, action

# Neural Network for Lux AI
class BasicConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = nn.Conv2d(
            input_dim, output_dim, 
            kernel_size=kernel_size, 
            padding=(kernel_size[0] // 2, kernel_size[1] // 2)
        )
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = self.conv(x)
        h = self.bn(h) if self.bn is not None else h
        return h


class LuxNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(20, filters, (3, 3), True)
        self.blocks = nn.ModuleList([BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 6, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        p = self.head_p(h_head)
        return p
    
    
class LuxCityNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(20, filters, (3, 3), True)
        self.blocks = nn.ModuleList([BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 3, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        p = self.head_p(h_head)
        return p

### Optimize NN parameters with Optuna

In [41]:
# def objective(trial):

#     num_epochs = 10
    
#     # model for unit actions
#     model = LuxNet()
#     train, val = train_test_split(samples, test_size=0.1, random_state=42, stratify=labels)
#     batch_size = 64

#     train_loader = DataLoader(
#         LuxDataset(obses, train), 
#         batch_size=batch_size, 
#         shuffle=True, 
#         num_workers=2
#     )
#     val_loader = DataLoader(
#         LuxDataset(obses, val), 
#         batch_size=batch_size, 
#         shuffle=False, 
#         num_workers=2
#     )
#     dataloaders_dict = {"train": train_loader, "val": val_loader}

#     # Generate the optimizers.
#     criterion = nn.CrossEntropyLoss()
#     optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW", "RMSprop", "SGD"])
#     lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
#     optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

#     for epoch in range(num_epochs):
#         model.cuda()
        
#         for phase in ['train', 'val']:
#             if phase == 'train':
#                 model.train()
#             else:
#                 model.eval()
                
#             epoch_loss = 0.0
#             epoch_acc = 0
            
#             dataloader = dataloaders_dict[phase]
#             for item in dataloader:
#                 states = item[0].cuda().float()
#                 actions = item[1].cuda().long()

#                 optimizer.zero_grad()
                
#                 with torch.set_grad_enabled(phase == 'train'):
#                     policy = model(states)
#                     loss = criterion(policy, actions)
#                     _, preds = torch.max(policy, 1)

#                     if phase == 'train':
#                         loss.backward()
#                         optimizer.step()

#                     epoch_loss += loss.item() * len(policy)
#                     epoch_acc += torch.sum(preds == actions.data)

#             data_size = len(dataloader.dataset)
#             epoch_loss = epoch_loss / data_size
#             epoch_acc = epoch_acc.double() / data_size

#         trial.report(epoch_acc, epoch)

#         # Handle pruning based on the intermediate value.
#         if trial.should_prune():
#             raise optuna.exceptions.TrialPruned()

#     return epoch_acc


# study = optuna.create_study(direction="maximize")
# study.optimize(objective, n_trials=500, timeout=10*3600)

# pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
# complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# print("Study statistics: ")
# print("  Number of finished trials: ", len(study.trials))
# print("  Number of pruned trials: ", len(pruned_trials))
# print("  Number of complete trials: ", len(complete_trials))

# print("Best trial:")
# trial = study.best_trial

# print("  Value: ", trial.value)

# print("  Params: ")
# for key, value in trial.params.items():
#     print("    {}: {}".format(key, value))

### Function for NN training

In [11]:
import matplotlib.pyplot as plt
# from torch.utils.tensorboard import SummaryWriter


def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs, city=False):
#     tb = SummaryWriter()
    
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        model.cuda()
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            epoch_loss = 0.0
            epoch_acc = 0
            
            dataloader = dataloaders_dict[phase]
            for item in tqdm(dataloader, leave=False):
                states = item[0].cuda().float()
                actions = item[1].cuda().long()

                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    policy = model(states)
                    loss = criterion(policy, actions)
                    _, preds = torch.max(policy, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    epoch_loss += loss.item() * len(policy)
                    epoch_acc += torch.sum(preds == actions.data)

            data_size = len(dataloader.dataset)
            epoch_loss = epoch_loss / data_size
            epoch_acc = epoch_acc.double() / data_size

#             if phase == 'train':
#                 tb.add_scalar("Train Loss", epoch_loss, epoch)
#                 tb.add_scalar("Train Accuracy", epoch_acc, epoch)
#             else:
#                 tb.add_scalar("Val Loss", epoch_loss, epoch)
#                 tb.add_scalar("Val Accuracy", epoch_acc, epoch)
            
            print(f'Epoch {epoch + 1}/{num_epochs} | {phase:^5} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')
        
        if epoch_acc > best_acc:
            traced = torch.jit.trace(model.cpu(), torch.rand(1, 20, 32, 32))
            if city:
                traced.save('agent/model_city.pth')
            else:
                traced.save('agent/model.pth')
            best_acc = epoch_acc
            
#     tb.close()

In [12]:
# model for unit actions
model = LuxNet()
train, val = train_test_split(samples, test_size=0.1, random_state=42, stratify=labels)
batch_size = 64

train_loader = DataLoader(
    LuxDataset(obses, train), 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)
val_loader = DataLoader(
    LuxDataset(obses, val), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=2
)
dataloaders_dict = {"train": train_loader, "val": val_loader}

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3) #27e-4

# model for city actions
model_city = LuxCityNet()
train_city, val_city = train_test_split(city_samples, test_size=0.1, random_state=42, stratify=labels_city)
batch_size_city = 64

train_city_loader = DataLoader(
    LuxCityDataset(obses, train_city), 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)
val_city_loader = DataLoader(
    LuxCityDataset(obses, val_city), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=2
)
dataloaders_city_dict = {"train": train_city_loader, "val": val_city_loader}

criterion_city = nn.CrossEntropyLoss()
optimizer_city = torch.optim.AdamW(model_city.parameters(), lr=1e-3)

In [13]:
num_epochs = 20

train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 1/20 | train | Loss: 0.8466 | Acc: 0.6533


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 1/20 |  val  | Loss: 0.7211 | Acc: 0.7067


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 2/20 | train | Loss: 0.6495 | Acc: 0.7373


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 2/20 |  val  | Loss: 0.6463 | Acc: 0.7382


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 3/20 | train | Loss: 0.5929 | Acc: 0.7614


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 3/20 |  val  | Loss: 0.6027 | Acc: 0.7565


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 4/20 | train | Loss: 0.5525 | Acc: 0.7781


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 4/20 |  val  | Loss: 0.5767 | Acc: 0.7661


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 5/20 | train | Loss: 0.5256 | Acc: 0.7892


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 5/20 |  val  | Loss: 0.5431 | Acc: 0.7817


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 6/20 | train | Loss: 0.5006 | Acc: 0.8002


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 6/20 |  val  | Loss: 0.5647 | Acc: 0.7734


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 7/20 | train | Loss: 0.4822 | Acc: 0.8071


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 7/20 |  val  | Loss: 0.5359 | Acc: 0.7860


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 8/20 | train | Loss: 0.4659 | Acc: 0.8154


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 8/20 |  val  | Loss: 0.5038 | Acc: 0.7990


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 9/20 | train | Loss: 0.4517 | Acc: 0.8205


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 9/20 |  val  | Loss: 0.5098 | Acc: 0.7980


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 10/20 | train | Loss: 0.4400 | Acc: 0.8247


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 10/20 |  val  | Loss: 0.5130 | Acc: 0.7953


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 11/20 | train | Loss: 0.4293 | Acc: 0.8305


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 11/20 |  val  | Loss: 0.4950 | Acc: 0.8027


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 12/20 | train | Loss: 0.4183 | Acc: 0.8343


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 12/20 |  val  | Loss: 0.5112 | Acc: 0.7996


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 13/20 | train | Loss: 0.4118 | Acc: 0.8367


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 13/20 |  val  | Loss: 0.5080 | Acc: 0.7996


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 14/20 | train | Loss: 0.4029 | Acc: 0.8409


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 14/20 |  val  | Loss: 0.5122 | Acc: 0.7994


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 15/20 | train | Loss: 0.3937 | Acc: 0.8449


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 15/20 |  val  | Loss: 0.5020 | Acc: 0.8058


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 16/20 | train | Loss: 0.3854 | Acc: 0.8479


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 16/20 |  val  | Loss: 0.5110 | Acc: 0.8010


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 17/20 | train | Loss: 0.3807 | Acc: 0.8496


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 17/20 |  val  | Loss: 0.5091 | Acc: 0.8035


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 18/20 | train | Loss: 0.3726 | Acc: 0.8531


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 18/20 |  val  | Loss: 0.5030 | Acc: 0.8075


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 19/20 | train | Loss: 0.3669 | Acc: 0.8549


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 19/20 |  val  | Loss: 0.5174 | Acc: 0.8014


  0%|          | 0/3444 [00:00<?, ?it/s]

Epoch 20/20 | train | Loss: 0.3608 | Acc: 0.8571


  0%|          | 0/383 [00:00<?, ?it/s]

Epoch 20/20 |  val  | Loss: 0.4892 | Acc: 0.8075


In [14]:
num_epochs = 15

train_model(model_city, dataloaders_city_dict, criterion_city, optimizer_city, num_epochs=num_epochs, city=True)

  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 1/15 | train | Loss: 0.5661 | Acc: 0.7623


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 1/15 |  val  | Loss: 0.4804 | Acc: 0.8003


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 2/15 | train | Loss: 0.4752 | Acc: 0.8033


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 2/15 |  val  | Loss: 0.4849 | Acc: 0.8017


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 3/15 | train | Loss: 0.4414 | Acc: 0.8178


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 3/15 |  val  | Loss: 0.4334 | Acc: 0.8227


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 4/15 | train | Loss: 0.4211 | Acc: 0.8266


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 4/15 |  val  | Loss: 0.4534 | Acc: 0.8145


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 5/15 | train | Loss: 0.4057 | Acc: 0.8323


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 5/15 |  val  | Loss: 0.4220 | Acc: 0.8260


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 6/15 | train | Loss: 0.3905 | Acc: 0.8406


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 6/15 |  val  | Loss: 0.4104 | Acc: 0.8318


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 7/15 | train | Loss: 0.3770 | Acc: 0.8457


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 7/15 |  val  | Loss: 0.4052 | Acc: 0.8331


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 8/15 | train | Loss: 0.3667 | Acc: 0.8500


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 8/15 |  val  | Loss: 0.4043 | Acc: 0.8360


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 9/15 | train | Loss: 0.3533 | Acc: 0.8560


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 9/15 |  val  | Loss: 0.4002 | Acc: 0.8328


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 10/15 | train | Loss: 0.3432 | Acc: 0.8606


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 10/15 |  val  | Loss: 0.3910 | Acc: 0.8429


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 11/15 | train | Loss: 0.3324 | Acc: 0.8657


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 11/15 |  val  | Loss: 0.3810 | Acc: 0.8496


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 12/15 | train | Loss: 0.3238 | Acc: 0.8683


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 12/15 |  val  | Loss: 0.4028 | Acc: 0.8382


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 13/15 | train | Loss: 0.3139 | Acc: 0.8726


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 13/15 |  val  | Loss: 0.3998 | Acc: 0.8412


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 14/15 | train | Loss: 0.3054 | Acc: 0.8763


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 14/15 |  val  | Loss: 0.3935 | Acc: 0.8477


  0%|          | 0/1380 [00:00<?, ?it/s]

Epoch 15/15 | train | Loss: 0.2954 | Acc: 0.8811


  0%|          | 0/154 [00:00<?, ?it/s]

Epoch 15/15 |  val  | Loss: 0.4042 | Acc: 0.8433


In [19]:
# !tensorboard --logdir runs

# Submission

In [15]:
%%writefile agent/agent.py
import os
import numpy as np
import torch
from lux.game import Game

path = '/kaggle_simulations/agent' if os.path.exists('/kaggle_simulations') else 'agent' # change to 'agent' for tests
model = torch.jit.load(f'{path}/model.pth')
model.eval()
model_city = torch.jit.load(f'{path}/model_city.pth')
model_city.eval()

# Input for Neural Network for units
def make_input(obs, unit_id):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100
                )
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                cities[city_id]
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b


# Input for Neural Network for cities
def make_city_input(obs, city_coord):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'ct':
            # CityTiles
            city_id = strs[2]
            x = int(strs[3]) 
            y = int(strs[4])
            cooldown = float(strs[5])
            if x == int(city_coord[0]) and y == int(city_coord[1]):
                b[:2, x + x_shift, y + y_shift] = (
                    1,
                    cities[city_id]
                )
            else:
                team = int(strs[1])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x + x_shift, y + y_shift] = (
                    1,
                    cooldown / 10,
                    cities[city_id]
                )
        elif input_identifier == 'u':
            team = int(strs[2])
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                (wood + coal + uranium) / 100
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b

game_state = None
player = None


def get_game_state(observation):
    global game_state
    
    if observation["step"] == 0:
        game_state = Game()
        game_state._initialize(observation["updates"])
        game_state._update(observation["updates"][2:])
        game_state.id = observation["player"]
    else:
        game_state._update(observation["updates"])
    return game_state


def in_city(pos):    
    try:
        city = game_state.map.get_cell_by_pos(pos).citytile
        return city is not None and city.team == game_state.id
    except:
        return False
    
# check if unit has enough time and space to build a city
def build_city_is_possible(unit, pos):    
    global game_state
    global player

    if game_state.turn % 40 < 30:
        return True
    x, y = pos.x, pos.y
    for i, j in ((x-1, y), (x+1, y), (x, y-1), (x, y+1)):
        try:
            city_id = game_state.map.get_cell(i, j).citytile.cityid
        except:
            continue
        print(f'City ID - {city_id}')
        if city_id in player.cities:
            city = player.cities[city_id]
            print(f'City fuel - {city.fuel}, City light - {city.get_light_upkeep()}')
            if city.fuel > (city.get_light_upkeep() + 18) * 10:
                print('True')
                return True
    return False


def call_func(obj, method, args=[]):
    return getattr(obj, method)(*args)


# translate unit policy to action
unit_actions = [('move', 'c'), ('move', 'n'), ('move', 's'), ('move', 'w'), ('move', 'e'), ('build_city',)]
def get_unit_action(policy, unit, dest):
    for label in np.argsort(policy)[::-1]:
        act = unit_actions[label]
        pos = unit.pos.translate(act[-1], 1) or unit.pos
        if label == 4 and not build_city_is_possible(unit, pos):
            continue
        if pos not in dest or in_city(pos):
            return call_func(unit, *act), pos 
            
    return unit.move('c'), unit.pos

# translate city policy to action
city_actions = [(None,), ('build_worker',), ('research', )]
def get_city_action(policy, city_tile, unit_count):
    global player
    
    for label in np.argsort(policy)[::-1]:
        act = city_actions[label]
        if label == 1 and unit_count < player.city_tile_count:
            unit_count += 1
            res = call_func(city_tile, *act)
        elif label == 2 and not player.researched_uranium():
            player.research_points += 1
            res = call_func(city_tile, *act)
        else:
            res = None
        return res, unit_count

# agent for making actions
def agent(observation, configuration):
    global game_state
    global player
    
    game_state = get_game_state(observation)    
    player = game_state.players[observation.player]
    actions = []        
    
    # Unit Actions
    dest = []
    for unit in player.units:
        if unit.can_act() and (game_state.turn % 40 < 30 or not in_city(unit.pos)):
            state = make_input(observation, unit.id)
            with torch.no_grad():
                p = model(torch.from_numpy(state).unsqueeze(0))

            policy = p.squeeze(0).numpy()

            action, pos = get_unit_action(policy, unit, dest)
            actions.append(action)
            dest.append(pos)
    
    map_size = game_state.map.height
    map_size_dict = {12: 60, 16: 60, 24: 60, 32: 60}
    print(map_size)
    
    # City Actions
    unit_count = len(player.units)
    for city in player.cities.values():
        for city_tile in city.citytiles:
            if city_tile.can_act():
                # at first game stages try to produce maximum amount of agents and research point
                if game_state.turn < map_size_dict[map_size]:
#                 if not player.researched_uranium():
                    if unit_count < player.city_tile_count: 
                        actions.append(city_tile.build_worker())
                        unit_count += 1
                    else:
#                     elif not player.researched_uranium():
                        actions.append(city_tile.research())
                        player.research_points += 1
                # then follow NN strategy
                else:
                    state = make_city_input(observation, [city_tile.pos.x, city_tile.pos.y])
                    with torch.no_grad():
                        p = model_city(torch.from_numpy(state).unsqueeze(0))

                    policy = p.squeeze(0).numpy()

                    action, unit_count = get_city_action(policy, city_tile, unit_count)
                    if action:
                        actions.append(action)
    
    return actions

Overwriting agent/agent.py


Submit predictions

In [46]:
!cd agent && tar -czf submission.tar.gz lux agent.py main.py model.pth model_city.pth

Test agents on 12x12 field

In [16]:
from kaggle_environments import make

env = make("lux_ai_2021", configuration={"width": 12, "height": 12, "loglevel": 2, "annotations": True}, debug=False)

# first agent is yellow
# second agent is blue
steps = env.run(['agent/agent.py', 'agent.py'])

env.render(mode="ipython", width=1200, height=800)

Loading environment football failed: No module named 'gfootball'


Test agent on 16x16 field

In [17]:
env = make("lux_ai_2021", configuration={"width": 16, "height": 16, "loglevel": 2, "annotations": True}, debug=False)

# first agent is yellow
# second agent is blue
steps = env.run(['agent/agent.py', 'agent.py'])

env.render(mode="ipython", width=1200, height=800)

Test agent on 24x24 field

In [None]:
env = make("lux_ai_2021", configuration={"width": 24, "height": 24, "loglevel": 2, "annotations": True}, debug=False)

# first agent is yellow
# second agent is blue
steps = env.run(['agent/agent.py', 'agent.py'])

env.render(mode="ipython", width=1200, height=800)

Test agents on 32x32 field

In [None]:
env = make("lux_ai_2021", configuration={"width": 32, "height": 32, "loglevel": 2, "annotations": True}, debug=False)

# first agent is yellow
# second agent is blue
steps = env.run(['agent/agent.py', 'agent.py'])

env.render(mode="ipython", width=1200, height=800)

# Further ideas

- consider changing of N value for hybrid strategy depending on map size
- enlarge number of negative samples for city NN
- add negative samples for unit NN
- consider another NN architecture