For Kaggle

In [1]:
# !pip install kaggle-environments -U > /dev/null 2>&1
# !cp -r ../input/lux-ai-2021/* .

For agents validation

In [None]:
# timeout 1h lux-ai-2021 --tournament --rankSystem wins --storeReplay false --storeLogs false --maxConcurrentMatches 1 agent/main.py agent_simple/main.py submission_v4/main.py

In [1]:
import numpy as np
import json
from pathlib import Path
import os
import random
from tqdm.notebook import tqdm
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
from sklearn.model_selection import train_test_split
import optuna
from optuna.trial import TrialState

In [2]:
def seed_everything(seed_value):
    random.seed(seed_value)
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    os.environ['PYTHONHASHSEED'] = str(seed_value)
    
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed_value)
        torch.cuda.manual_seed_all(seed_value)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = True

seed_everything(42)

# Preprocessing

Actions:
- m - move: m, unit who moves, direction
- bcity - build city: bcity, unit who builds
- bw - build worker, x-coord of building city, y-coord of building city
- r - research
- t - transfer: transfer, from user_1, to user_2, resourse, quantity

Updates:
- rp - research point: player, number of rp
- r - resources: r, type of resource, x-coord, y-coord, quantity
- u - user: u, worker/cart, player, user id, x-coord, y-coord, cooldown, wood, coal, uranium
- c - city: c, player, city id, number of resources, amount of consuming light at night
- ct - city tile: ct, player, city id, x-coord, y-coord, cooldown
- ccd - level of road: ccd, x-coord, y-coord, level value

In [3]:
!cd episodes && rm *_info.json && cd ..

rm: cannot remove '*_info.json': No such file or directory


In [4]:
def unit_label(action):
    strs = action.split(' ')
    unit_id = strs[1]
    if strs[0] == 'm':
        label = {'c': None, 'n': 0, 's': 1, 'w': 2, 'e': 3}[strs[2]]
    elif strs[0] == 'bcity':
        label = 4
    else:
        label = None
    return unit_id, label

def city_label(action):
    strs = action.split(' ')
    ctile_coord = (strs[1], strs[2])
    if strs[0] == 'bw':
        label = 0
    elif strs[0] == 'r':
        label = 1
    else:
        label = None
    return ctile_coord, label

def city_tile_coord(update, index):
    strs = update.split(' ')
    if strs[0] == 'ct' and strs[1] == index:
        ctile_coord = (strs[3], strs[4])
        return ctile_coord
    return None

def depleted_resources(obs):
    for u in obs['updates']:
        if u.split(' ')[0] == 'r':
            return False
    return True


def create_dataset_from_json(episode_dir, team_name='Toad Brigade'): 
    obses = {}
    unit_samples = []
    city_samples = []
    
    episodes = [path for path in Path(episode_dir).glob('*.json') if 'output' not in path.name]
    for filepath in tqdm(episodes): 
        with open(filepath) as f:
            json_load = json.load(f)

        ep_id = json_load['info']['EpisodeId']
        index = np.argmax([r or 0 for r in json_load['rewards']])
        if json_load['info']['TeamNames'][index] != team_name:
            continue

        for i in range(len(json_load['steps'])-1):
            # get there, collect all the cities and set action label to 0 for those of them 
            # who didn't act during the episode
            
            city_tiles = {}
            
            if json_load['steps'][i][index]['status'] == 'ACTIVE':
                actions = json_load['steps'][i+1][index]['action']
                obs = json_load['steps'][i][0]['observation']
                updates = obs['updates']
                
                for u in updates:
                    ctile_coord = city_tile_coord(u, str(index))
                    if ctile_coord:
                        city_tiles[ctile_coord] = 0
                
                if depleted_resources(obs):
                    break
                
                obs['player'] = index
                obs = dict([
                    (k,v) for k,v in obs.items() 
                    if k in ['step', 'updates', 'player', 'width', 'height']
                ])
                obs_id = f'{ep_id}_{i}'
                obses[obs_id] = obs
                                
                for action in actions:
                    unit_id, label = unit_label(action)
                    if label is not None:
                        unit_samples.append((obs_id, unit_id, label))
                        continue
                    ctile_coord, label = city_label(action)
                    if label is not None:
                        city_samples.append((obs_id, ctile_coord, label))
                    
                    
    return obses, unit_samples, city_samples

In [5]:
episode_dir = 'episodes'
obses, samples, city_samples = create_dataset_from_json(episode_dir)
print('observations:', len(obses), 'worker samples:', len(samples), 'city samples:', len(city_samples))

  0%|          | 0/408 [00:00<?, ?it/s]

observations: 104676 worker samples: 454557 city samples: 81995


In [6]:
labels = [sample[-1] for sample in samples]
actions = ['north', 'south', 'west', 'east', 'bcity']
for value, count in zip(*np.unique(labels, return_counts=True)):
    print(f'{actions[value]}: {count}')

north: 102204
south: 98315
west: 106276
east: 108876
bcity: 38886


In [7]:
labels_city = [sample[-1] for sample in city_samples]
actions_city = ['build_worker', 'research']
for value, count in zip(*np.unique(labels_city, return_counts=True)):
    print(f'{actions_city[value]}: {count}')

build_worker: 25237
research: 56758


In [9]:
# episode_dir = 'episodes'
# json_load = create_dataset_from_json(episode_dir)
# json_load

In [8]:
# obses['26762301_31']

In [11]:
# samples

In [12]:
# city_samples[200000]

# Training

b - training tensor of float32. b dimensions is 20x32x32

- b[0] - position of current unit
- b[1] - cargo sum/100 of current unit
- b[2, 3, 4] - position, cooldown/6, and cargo sum/100 for units from the same team
- b[5, 6, 7] - position, cooldown/6, and cargo sum/100 for units from another team
- b[8, 9] - position,  min(city fuel/city energy consumption, 10)/10
- b[10, 11] - position, cooldown/6, and cargo sum/100 for units from another team
- b[12] - amount of wood / 800
- b[13] - amount of coal / 800
- b[14] - amount of uranium / 800
- b[15] - research points / 200 of unit's team
- b[16] - research piints / 200 of another team
- b[17] - time of the day (from 0 to 1, step 0.05)
- b[18] - step of the game (from 0 to 1, step 1/360)
- b[19] - map size

Add number of cities - ? 

Add day/night - ?

In [34]:
from math import inf

def manhattan_distance(x1, y1, x2, y2):
    return (abs(x2-x1) + abs(y2-y1))

def find_user_coords(obs, unit_id, x_shift, y_shift):
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            if strs[3] == unit_id:
                return x, y
    return None, None

def find_closest(x, y, coord_list, width, height):
    min_dist = inf
    min_coords = [None, None]
    for i, j in coord_list:
        dist = manhattan_distance(x, y, i, j, width, height)
        if dist < min_dist:
            min_dist = dist
            min_coords = [i, j]
    return min_coords
        

# Input for Neural Network for workers
def make_input(obs, unit_id):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((25, 32, 32), dtype=np.float32)
    
    x_c, y_c = find_user_coords(obs, unit_id, x_shift, y_shift)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        my_rp = 0
        
        city_tiles = []
        resources = []
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if strs[3] == unit_id: # 0:1
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:                  # 2:9
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 4
                m_dist = manhattan_distance(x_c, y_c, x, y)
#                 print(f'm_dist - {m_dist}, x_u - {x_c}, y_u - {y_c}, x - {x}, y - {y}, width - {width}, height - {height}')
                b[idx:idx + 4, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100,
                    m_dist/((width-1) + (height-1))
                )
        elif input_identifier == 'ct':  # 10:15
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 9 + (team - obs['player']) % 2 * 3
            m_dist = manhattan_distance(x_c, y_c, x, y)
#             print(f'm_dist - {m_dist}, x_u - {x_c}, y_u - {y_c}, x - {x}, y - {y}')
            b[idx:idx + 3, x, y] = (
                1,
                cities[city_id],
                m_dist/((width-1) + (height-1))
            )
            city_tiles.append([x, y])
        elif input_identifier == 'r':  # 16:19
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            access_level = {'wood': 0, 'coal': 50, 'uranium': 200}[r_type]
            access = 0 if my_rp < access_level else 1
#             print(f'access_level - {access_level}, access - {access}, my_rp - {my_rp}')
            b[{'wood': 16, 'coal': 17, 'uranium': 18}[r_type], x, y] = amt / 800
            b[19, x, y] = access
            resources.append([x, y])
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            my_rp = rp if team == obs['player'] else my_rp
            b[20 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[22, :] = obs['step'] % 40 / 40
    # Turns
    b[23, :] = obs['step'] / 360
    # Map Size
    b[24, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b

Data for the cities:
- b[0] - position of current city
- b[1] - min(city fuel/city energy consumption, 10)/10 of current city
- b[2, 3, 4] - position, cooldown/10, and min(city fuel/city energy consumption, 10)/10 for cities from the same team
- b[5, 6, 7] - position, cooldown/10, and min(city fuel/city energy consumption, 10)/10 for cities from another team
- b[8, 9] - position and cargo sum/100 for units from the same team 
- b[10, 11] - position and cargo sum/100 for units from from another team
- b[12] - amount of wood / 800
- b[13] - amount of coal / 800
- b[14] - amount of uranium / 800
- b[15] - research points / 200 of unit's team
- b[16] - research piints / 200 of another team
- b[17] - time of the day (from 0 to 1, step 0.05)
- b[18] - step of the game (from 0 to 1, step 1/360)
- b[19] - map size

In [10]:
# Input for Neural Network for cities
def make_city_input(obs, city_coord):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'ct':
            # CityTiles
            city_id = strs[2]
            x = int(strs[3]) 
            y = int(strs[4])
            cooldown = float(strs[5])
            if x == int(city_coord[0]) and y == int(city_coord[1]):
                b[:2, x + x_shift, y + y_shift] = (
                    1,
                    cities[city_id]
                )
            else:
                team = int(strs[1])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x + x_shift, y + y_shift] = (
                    1,
                    cooldown / 10,
                    cities[city_id]
                )
        elif input_identifier == 'u':
            team = int(strs[2])
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                (wood + coal + uranium) / 100
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b

### Set modules for NN training

In [47]:
class LuxDataset(Dataset):
    def __init__(self, obses, samples):
        self.obses = obses
        self.samples = samples
        
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        obs_id, unit_id, action = self.samples[idx]
        obs = self.obses[obs_id]
        state = make_input(obs, unit_id)
        
        return state, action
    
class LuxCityDataset(Dataset):
    def __init__(self, obses, city_samples):
        self.obses = obses
        self.samples = city_samples
        
    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        obs_id, city_coord, action = self.samples[idx]
        obs = self.obses[obs_id]
        state = make_city_input(obs, city_coord)
        
        return state, action

# Neural Network for Lux AI
class BasicConv2d(nn.Module):
    def __init__(self, input_dim, output_dim, kernel_size, bn):
        super().__init__()
        self.conv = nn.Conv2d(
            input_dim, output_dim, 
            kernel_size=kernel_size, 
            padding=(kernel_size[0] // 2, kernel_size[1] // 2)
        )
        self.bn = nn.BatchNorm2d(output_dim) if bn else None

    def forward(self, x):
        h = self.conv(x)
        h = self.bn(h) if self.bn is not None else h
        return h


class LuxNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(25, filters, (3, 3), True)
        self.blocks = nn.ModuleList([BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 5, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        p = self.head_p(h_head)
        return p
    
    
class LuxCityNet(nn.Module):
    def __init__(self):
        super().__init__()
        layers, filters = 12, 32
        self.conv0 = BasicConv2d(20, filters, (3, 3), True)
        self.blocks = nn.ModuleList([BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
        self.head_p = nn.Linear(filters, 2, bias=False)

    def forward(self, x):
        h = F.relu_(self.conv0(x))
        for block in self.blocks:
            h = F.relu_(h + block(h))
        h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
        p = self.head_p(h_head)
        return p

### Make convolutional network with filters 5x5 -> 3x3

In [138]:
# class LuxDataset(Dataset):
#     def __init__(self, obses, samples):
#         self.obses = obses
#         self.samples = samples
        
#     def __len__(self):
#         return len(self.samples)

#     def __getitem__(self, idx):
#         obs_id, unit_id, action = self.samples[idx]
#         obs = self.obses[obs_id]
#         state = make_input(obs, unit_id)
        
#         return state, action

# # Neural Network for Lux AI
# class BasicConv2d(nn.Module):
#     def __init__(self, input_dim, output_dim, kernel_size, bn):
#         super().__init__()
#         self.conv = nn.Conv2d(
#             input_dim, output_dim, 
#             kernel_size=kernel_size, 
#             padding=(kernel_size[0] // 2, kernel_size[1] // 2)
#         )
#         self.bn = nn.BatchNorm2d(output_dim) if bn else None

#     def forward(self, x):
#         h = self.conv(x)
#         h = self.bn(h) if self.bn is not None else h
#         return h


# class LuxNet(nn.Module):
#     def __init__(self):
#         super().__init__()
#         layers, filters = 6, 32
#         self.conv0 = BasicConv2d(20, filters, (5, 5), True)
#         self.blocks1 = nn.ModuleList([BasicConv2d(filters, filters, (5, 5), True) for _ in range(layers)])
#         self.blocks2 = nn.ModuleList([BasicConv2d(filters, filters, (3, 3), True) for _ in range(layers)])
#         self.head_p = nn.Linear(filters, 5, bias=False)

#     def forward(self, x):
#         h = F.relu_(self.conv0(x))
#         for block in self.blocks1:
#             h = F.relu_(h + block(h))
#         for block in self.blocks2:
#             h = F.relu_(h + block(h))
#         h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
#         p = self.head_p(h_head)
#         return p

In [13]:
# Epoch 1/20 | train | Loss: 0.8373 | Acc: 0.6564
# Epoch 1/20 |  val  | Loss: 0.7038 | Acc: 0.7146
# Epoch 2/20 | train | Loss: 0.6665 | Acc: 0.7308
# Epoch 2/20 |  val  | Loss: 0.6567 | Acc: 0.7342
# Epoch 3/20 | train | Loss: 0.6125 | Acc: 0.7526
# Epoch 3/20 |  val  | Loss: 0.6198 | Acc: 0.7515
# Epoch 4/20 | train | Loss: 0.5781 | Acc: 0.7673
# Epoch 4/20 |  val  | Loss: 0.5982 | Acc: 0.7592
# Epoch 5/20 | train | Loss: 0.5523 | Acc: 0.7785
# Epoch 5/20 |  val  | Loss: 0.5879 | Acc: 0.7634
# Epoch 6/20 | train | Loss: 0.5327 | Acc: 0.7867
# Epoch 6/20 |  val  | Loss: 0.5836 | Acc: 0.7670
# Epoch 7/20 | train | Loss: 0.5148 | Acc: 0.7937
# Epoch 7/20 |  val  | Loss: 0.5748 | Acc: 0.7713
# Epoch 8/20 | train | Loss: 0.5007 | Acc: 0.7991
# Epoch 8/20 |  val  | Loss: 0.5571 | Acc: 0.7774
# Epoch 9/20 | train | Loss: 0.4880 | Acc: 0.8049
# Epoch 9/20 |  val  | Loss: 0.5596 | Acc: 0.7772
# Epoch 10/20 | train | Loss: 0.4761 | Acc: 0.8099
# Epoch 10/20 |  val  | Loss: 0.5556 | Acc: 0.7784
# Epoch 11/20 | train | Loss: 0.4650 | Acc: 0.8146
# Epoch 11/20 |  val  | Loss: 0.5472 | Acc: 0.7835
# Epoch 12/20 | train | Loss: 0.4557 | Acc: 0.8184
# Epoch 12/20 |  val  | Loss: 0.5597 | Acc: 0.7803
# Epoch 13/20 | train | Loss: 0.4473 | Acc: 0.8218
# Epoch 13/20 |  val  | Loss: 0.5527 | Acc: 0.7813
# Epoch 14/20 | train | Loss: 0.4393 | Acc: 0.8255
# Epoch 14/20 |  val  | Loss: 0.5506 | Acc: 0.7813
# Epoch 15/20 | train | Loss: 0.4318 | Acc: 0.8279
# Epoch 15/20 |  val  | Loss: 0.5568 | Acc: 0.7823
# Epoch 16/20 | train | Loss: 0.4257 | Acc: 0.8305
# Epoch 16/20 |  val  | Loss: 0.5601 | Acc: 0.7817
# Epoch 17/20 | train | Loss: 0.4193 | Acc: 0.8332
# Epoch 17/20 |  val  | Loss: 0.5586 | Acc: 0.7825
# Epoch 18/20 | train | Loss: 0.4135 | Acc: 0.8360
# Epoch 18/20 |  val  | Loss: 0.5550 | Acc: 0.7828
# Epoch 19/20 | train | Loss: 0.4084 | Acc: 0.8385
# Epoch 19/20 |  val  | Loss: 0.5461 | Acc: 0.7876
# Epoch 20/20 | train | Loss: 0.4035 | Acc: 0.8403
# Epoch 20/20 |  val  | Loss: 0.5622 | Acc: 0.7844

### Make convolutional network with filters 7x7 -> 5x5 -> 3x3

In [12]:
# class LuxDataset(Dataset):
#     def __init__(self, obses, samples):
#         self.obses = obses
#         self.samples = samples
        
#     def __len__(self):
#         return len(self.samples)

#     def __getitem__(self, idx):
#         obs_id, unit_id, action = self.samples[idx]
#         obs = self.obses[obs_id]
#         state = make_input(obs, unit_id)
        
#         return state, action

# # Neural Network for Lux AI
# class BasicConv2d(nn.Module):
#     def __init__(self, input_dim, output_dim, kernel_size, bn):
#         super().__init__()
#         self.conv = nn.Conv2d(
#             input_dim, output_dim, 
#             kernel_size=kernel_size, 
#             padding=(kernel_size[0] // 2, kernel_size[1] // 2)
#         )
#         self.bn = nn.BatchNorm2d(output_dim) if bn else None

#     def forward(self, x):
#         h = self.conv(x)
#         h = self.bn(h) if self.bn is not None else h
#         return h


# class LuxNet(nn.Module):
#     def __init__(self):
#         super().__init__()
#         layers, filters = 6, 32
#         self.conv0 = BasicConv2d(20, filters, (5, 5), True)
#         self.blocks1 = nn.ModuleList([BasicConv2d(filters, filters, (5, 5), True) for _ in range(layers)])
#         self.blocks2 = nn.ModuleList([BasicConv2d(filters, filters, (5, 5), True) for _ in range(layers)])
#         self.head_p = nn.Linear(filters, 5, bias=False)

#     def forward(self, x):
#         h = F.relu_(self.conv0(x))
#         for block in self.blocks1:
#             h = F.relu_(h + block(h))
#         for block in self.blocks2:
#             h = F.relu_(h + block(h))
#         h_head = (h * x[:,:1]).view(h.size(0), h.size(1), -1).sum(-1)
#         p = self.head_p(h_head)
#         return p

### Optimize NN parameters with Optuna

In [16]:
# def objective(trial):

#     num_epochs = 10
    
#     # model for unit actions
#     model = LuxNet()
#     train, val = train_test_split(samples, test_size=0.1, random_state=42, stratify=labels)
#     batch_size = 64

#     train_loader = DataLoader(
#         LuxDataset(obses, train), 
#         batch_size=batch_size, 
#         shuffle=True, 
#         num_workers=2
#     )
#     val_loader = DataLoader(
#         LuxDataset(obses, val), 
#         batch_size=batch_size, 
#         shuffle=False, 
#         num_workers=2
#     )
#     dataloaders_dict = {"train": train_loader, "val": val_loader}

#     # Generate the optimizers.
#     criterion = nn.CrossEntropyLoss()
#     optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "AdamW", "RMSprop", "SGD"])
#     lr = trial.suggest_float("lr", 1e-5, 1e-2, log=True)
#     optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr)

#     for epoch in range(num_epochs):
#         model.cuda()
        
#         for phase in ['train', 'val']:
#             if phase == 'train':
#                 model.train()
#             else:
#                 model.eval()
                
#             epoch_loss = 0.0
#             epoch_acc = 0
            
#             dataloader = dataloaders_dict[phase]
#             for item in dataloader:
#                 states = item[0].cuda().float()
#                 actions = item[1].cuda().long()

#                 optimizer.zero_grad()
                
#                 with torch.set_grad_enabled(phase == 'train'):
#                     policy = model(states)
#                     loss = criterion(policy, actions)
#                     _, preds = torch.max(policy, 1)

#                     if phase == 'train':
#                         loss.backward()
#                         optimizer.step()

#                     epoch_loss += loss.item() * len(policy)
#                     epoch_acc += torch.sum(preds == actions.data)

#             data_size = len(dataloader.dataset)
#             epoch_loss = epoch_loss / data_size
#             epoch_acc = epoch_acc.double() / data_size

#         trial.report(epoch_acc, epoch)

#         # Handle pruning based on the intermediate value.
#         if trial.should_prune():
#             raise optuna.exceptions.TrialPruned()

#     return epoch_acc


# study = optuna.create_study(direction="maximize")
# study.optimize(objective, n_trials=500, timeout=10*3600)

# pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
# complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])

# print("Study statistics: ")
# print("  Number of finished trials: ", len(study.trials))
# print("  Number of pruned trials: ", len(pruned_trials))
# print("  Number of complete trials: ", len(complete_trials))

# print("Best trial:")
# trial = study.best_trial

# print("  Value: ", trial.value)

# print("  Params: ")
# for key, value in trial.params.items():
#     print("    {}: {}".format(key, value))

### Function for NN training

In [57]:
import matplotlib.pyplot as plt
# from torch.utils.tensorboard import SummaryWriter


def train_model(model, dataloaders_dict, criterion, optimizer, num_epochs, city=False):
#     tb = SummaryWriter()
    
    best_acc = 0.0
    
    for epoch in range(num_epochs):
        model.cuda()
        
        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
                
            epoch_loss = 0.0
            epoch_acc = 0
            
            dataloader = dataloaders_dict[phase]
            for item in tqdm(dataloader, leave=False):
                states = item[0].cuda().float()
                actions = item[1].cuda().long()

                optimizer.zero_grad()
                
                with torch.set_grad_enabled(phase == 'train'):
                    policy = model(states)
                    loss = criterion(policy, actions)
                    _, preds = torch.max(policy, 1)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                    epoch_loss += loss.item() * len(policy)
                    epoch_acc += torch.sum(preds == actions.data)

            data_size = len(dataloader.dataset)
            epoch_loss = epoch_loss / data_size
            epoch_acc = epoch_acc.double() / data_size

#             if phase == 'train':
#                 tb.add_scalar("Train Loss", epoch_loss, epoch)
#                 tb.add_scalar("Train Accuracy", epoch_acc, epoch)
#             else:
#                 tb.add_scalar("Val Loss", epoch_loss, epoch)
#                 tb.add_scalar("Val Accuracy", epoch_acc, epoch)
            
            print(f'Epoch {epoch + 1}/{num_epochs} | {phase:^5} | Loss: {epoch_loss:.4f} | Acc: {epoch_acc:.4f}')
        
        if epoch_acc > best_acc:
            if city:
                traced = torch.jit.trace(model.cpu(), torch.rand(1, 20, 32, 32))
                traced.save('agent/model_city.pth')
            else:
                traced = torch.jit.trace(model.cpu(), torch.rand(1, 25, 32, 32))
                traced.save('agent/model.pth')
            best_acc = epoch_acc
            
#     tb.close()

In [58]:
# model for unit actions
model = LuxNet()
train, val = train_test_split(samples, test_size=0.1, random_state=42, stratify=labels)
batch_size = 128

train_loader = DataLoader(
    LuxDataset(obses, train), 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)
val_loader = DataLoader(
    LuxDataset(obses, val), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=2
)
dataloaders_dict = {"train": train_loader, "val": val_loader}

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3) #27e-4

# model for city actions
model_city = LuxCityNet()
train_city, val_city = train_test_split(city_samples, test_size=0.1, random_state=42, stratify=labels_city)
batch_size_city = 128

train_city_loader = DataLoader(
    LuxCityDataset(obses, train_city), 
    batch_size=batch_size, 
    shuffle=True, 
    num_workers=2
)
val_city_loader = DataLoader(
    LuxCityDataset(obses, val_city), 
    batch_size=batch_size, 
    shuffle=False, 
    num_workers=2
)
dataloaders_city_dict = {"train": train_city_loader, "val": val_city_loader}

criterion_city = nn.CrossEntropyLoss()
optimizer_city = torch.optim.AdamW(model_city.parameters(), lr=1e-3)

In [52]:
num_epochs = 5

train_model(model, dataloaders_dict, criterion, optimizer, num_epochs=num_epochs)

  0%|          | 0/3197 [00:00<?, ?it/s]

Epoch 1/5 | train | Loss: 0.4348 | Acc: 0.8262


  0%|          | 0/356 [00:00<?, ?it/s]

Epoch 1/5 |  val  | Loss: 0.5376 | Acc: 0.7895


  0%|          | 0/3197 [00:00<?, ?it/s]

Epoch 2/5 | train | Loss: 0.4309 | Acc: 0.8282


  0%|          | 0/356 [00:00<?, ?it/s]

Epoch 2/5 |  val  | Loss: 0.5395 | Acc: 0.7873


  0%|          | 0/3197 [00:00<?, ?it/s]

Epoch 3/5 | train | Loss: 0.4268 | Acc: 0.8296


  0%|          | 0/356 [00:00<?, ?it/s]

Epoch 3/5 |  val  | Loss: 0.5367 | Acc: 0.7894


  0%|          | 0/3197 [00:00<?, ?it/s]

Epoch 4/5 | train | Loss: 0.4234 | Acc: 0.8309


  0%|          | 0/356 [00:00<?, ?it/s]

Epoch 4/5 |  val  | Loss: 0.5428 | Acc: 0.7859


  0%|          | 0/3197 [00:00<?, ?it/s]

Epoch 5/5 | train | Loss: 0.4200 | Acc: 0.8328


  0%|          | 0/356 [00:00<?, ?it/s]

Epoch 5/5 |  val  | Loss: 0.5443 | Acc: 0.7852


In [108]:
# torch.Size([64, 32, 32, 32]) torch.Size([64, 1, 32, 32])

In [92]:
# torch.Size([64, 8, 32, 32]) torch.Size([64, 1, 32, 32])

In [59]:
num_epochs = 10

train_model(model_city, dataloaders_city_dict, criterion_city, optimizer_city, num_epochs=num_epochs, city=True)

  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 1/10 | train | Loss: 0.3175 | Acc: 0.8647


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 1/10 |  val  | Loss: 0.2654 | Acc: 0.8854


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 2/10 | train | Loss: 0.2448 | Acc: 0.8982


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 2/10 |  val  | Loss: 0.2469 | Acc: 0.8974


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 3/10 | train | Loss: 0.2269 | Acc: 0.9062


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 3/10 |  val  | Loss: 0.2323 | Acc: 0.9028


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 4/10 | train | Loss: 0.2132 | Acc: 0.9118


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 4/10 |  val  | Loss: 0.2390 | Acc: 0.9012


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 5/10 | train | Loss: 0.2036 | Acc: 0.9148


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 5/10 |  val  | Loss: 0.2303 | Acc: 0.9035


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 6/10 | train | Loss: 0.1973 | Acc: 0.9179


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 6/10 |  val  | Loss: 0.2389 | Acc: 0.9024


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 7/10 | train | Loss: 0.1905 | Acc: 0.9209


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 7/10 |  val  | Loss: 0.2430 | Acc: 0.8978


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 8/10 | train | Loss: 0.1804 | Acc: 0.9256


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 8/10 |  val  | Loss: 0.2172 | Acc: 0.9094


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 9/10 | train | Loss: 0.1739 | Acc: 0.9282


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 9/10 |  val  | Loss: 0.2466 | Acc: 0.8943


  0%|          | 0/577 [00:00<?, ?it/s]

Epoch 10/10 | train | Loss: 0.1658 | Acc: 0.9317


  0%|          | 0/65 [00:00<?, ?it/s]

Epoch 10/10 |  val  | Loss: 0.2368 | Acc: 0.8991


In [None]:
# !tensorboard --logdir runs

# Submission

In [69]:
%%writefile agent/agent.py
import os
import numpy as np
import torch
from math import inf
from lux.game import Game

path = '/kaggle_simulations/agent' if os.path.exists('/kaggle_simulations') else 'agent' # change to 'agent' for tests
model = torch.jit.load(f'{path}/model.pth')
model.eval()
model_city = torch.jit.load(f'{path}/model_city.pth')
model_city.eval()

def manhattan_distance(x1, y1, x2, y2):
    return (abs(x2-x1) + abs(y2-y1))

def find_user_coords(obs, unit_id, x_shift, y_shift):
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            if strs[3] == unit_id:
                return x, y
    return None, None

def find_closest(x, y, coord_list, width, height):
    min_dist = inf
    min_coords = [None, None]
    for i, j in coord_list:
        dist = manhattan_distance(x, y, i, j, width, height)
        if dist < min_dist:
            min_dist = dist
            min_coords = [i, j]
    return min_coords
        

# Input for Neural Network for workers
def make_input(obs, unit_id):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((25, 32, 32), dtype=np.float32)
    
    x_c, y_c = find_user_coords(obs, unit_id, x_shift, y_shift)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        my_rp = 0
        
        city_tiles = []
        resources = []
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if strs[3] == unit_id: # 0:1
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:                  # 2:9
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 4
                m_dist = manhattan_distance(x_c, y_c, x, y)
#                 print(f'm_dist - {m_dist}, x_u - {x_c}, y_u - {y_c}, x - {x}, y - {y}, width - {width}, height - {height}')
                b[idx:idx + 4, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100,
                    m_dist/((width-1) + (height-1))
                )
        elif input_identifier == 'ct':  # 10:15
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 9 + (team - obs['player']) % 2 * 3
            m_dist = manhattan_distance(x_c, y_c, x, y)
#             print(f'm_dist - {m_dist}, x_u - {x_c}, y_u - {y_c}, x - {x}, y - {y}')
            b[idx:idx + 3, x, y] = (
                1,
                cities[city_id],
                m_dist/((width-1) + (height-1))
            )
            city_tiles.append([x, y])
        elif input_identifier == 'r':  # 16:19
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            access_level = {'wood': 0, 'coal': 50, 'uranium': 200}[r_type]
            access = 0 if my_rp < access_level else 1
#             print(f'access_level - {access_level}, access - {access}, my_rp - {my_rp}')
            b[{'wood': 16, 'coal': 17, 'uranium': 18}[r_type], x, y] = amt / 800
            b[19, x, y] = access
            resources.append([x, y])
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            my_rp = rp if team == obs['player'] else my_rp
            b[20 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[22, :] = obs['step'] % 40 / 40
    # Turns
    b[23, :] = obs['step'] / 360
    # Map Size
    b[24, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b


# Input for Neural Network for cities
def make_city_input(obs, city_coord):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'ct':
            # CityTiles
            city_id = strs[2]
            x = int(strs[3]) 
            y = int(strs[4])
            cooldown = float(strs[5])
            if x == int(city_coord[0]) and y == int(city_coord[1]):
                b[:2, x + x_shift, y + y_shift] = (
                    1,
                    cities[city_id]
                )
            else:
                team = int(strs[1])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x + x_shift, y + y_shift] = (
                    1,
                    cooldown / 10,
                    cities[city_id]
                )
        elif input_identifier == 'u':
            team = int(strs[2])
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                (wood + coal + uranium) / 100
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b

game_state = None
player = None


def get_game_state(observation):
    global game_state
    
    if observation["step"] == 0:
        game_state = Game()
        game_state._initialize(observation["updates"])
        game_state._update(observation["updates"][2:])
        game_state.id = observation["player"]
    else:
        game_state._update(observation["updates"])
    return game_state


# check if unit is in city or not
def in_city(pos):    
    try:
        city = game_state.map.get_cell_by_pos(pos).citytile
        return city is not None and city.team == game_state.id
    except:
        return False
    
# check if unit has enough time and space to build a city
def build_city_is_possible(unit, pos):    
    global game_state
    global player

    if game_state.turn % 40 < 30:
        return True
    x, y = pos.x, pos.y
    for i, j in ((x-1, y), (x+1, y), (x, y-1), (x, y+1)):
        try:
            city_id = game_state.map.get_cell(i, j).citytile.cityid
        except:
            continue
        print(f'City ID - {city_id}')
        if city_id in player.cities:
            city = player.cities[city_id]
            print(f'City fuel - {city.fuel}, City light - {city.get_light_upkeep()}')
            if city.fuel > (city.get_light_upkeep() + 18) * 10:
                print('True')
                return True
    return False


def call_func(obj, method, args=[]):
    return getattr(obj, method)(*args)


# translate unit policy to action
unit_actions = [('move', 'n'), ('move', 's'), ('move', 'w'), ('move', 'e'), ('build_city',)]
def get_unit_action(policy, unit, dest):
    for label in np.argsort(policy)[::-1]:
        act = unit_actions[label]
        pos = unit.pos.translate(act[-1], 1) or unit.pos
        if label == 4 and not build_city_is_possible(unit, pos):
            return unit.move('c'), unit.pos
        if pos not in dest or in_city(pos):
            return call_func(unit, *act), pos 
            
    return unit.move('c'), unit.pos

# translate city policy to action
city_actions = [('build_worker',), ('research', )]
def get_city_action(policy, city_tile, unit_count):
    global player
    
    for label in np.argsort(policy)[::-1]:
        act = city_actions[label]
        if label == 0 and unit_count < player.city_tile_count:
            unit_count += 1
            res = call_func(city_tile, *act)
        elif label == 1 and not player.researched_uranium():
            player.research_points += 1
            res = call_func(city_tile, *act)
        else:
            res = None
        return res, unit_count

# agent for making actions
def agent(observation, configuration):
    global game_state
    global player
    
    game_state = get_game_state(observation)    
    player = game_state.players[observation.player]
    actions = []        
    
    # Unit Actions
    dest = []
    for unit in player.units:
        if unit.can_act() and (game_state.turn % 40 < 30 or not in_city(unit.pos)):
            state = make_input(observation, unit.id)
            with torch.no_grad():
                p = model(torch.from_numpy(state).unsqueeze(0))

            policy = p.squeeze(0).numpy()

            action, pos = get_unit_action(policy, unit, dest)
            actions.append(action)
            dest.append(pos)
    
    # City Actions
    unit_count = len(player.units)
    for city in player.cities.values():
        for city_tile in city.citytiles:
            if city_tile.can_act():
                state = make_city_input(observation, [city_tile.pos.x, city_tile.pos.y])
                with torch.no_grad():
                    p = model_city(torch.from_numpy(state).unsqueeze(0))

                policy = p.squeeze(0).numpy()

                action, unit_count = get_city_action(policy, city_tile, unit_count)
                if action:
                    actions.append(action)
    
    return actions

Overwriting agent/agent.py


Submit predictions

In [67]:
!cd agent && tar -czf submission.tar.gz lux agent.py main.py model.pth model_city.pth

Test agents on 12x12 field

In [76]:
# from kaggle_environments import make

# env = make("lux_ai_2021", configuration={"width": 12, "height": 12, "loglevel": 2, "annotations": True}, debug=False)

# # first agent is yellow
# # second agent is blue
# steps = env.run(['agent/agent.py', 'agent.py'])

# env.render(mode="ipython", width=1200, height=800)

Test agent on 16x16 field

In [82]:
# env = make("lux_ai_2021", configuration={"width": 16, "height": 16, "loglevel": 2, "annotations": True}, debug=False)

# # first agent is yellow
# # second agent is blue
# steps = env.run(['agent/agent.py', 'agent.py'])

# env.render(mode="ipython", width=1200, height=800)

Test agent on 24x24 field

In [81]:
# env = make("lux_ai_2021", configuration={"width": 24, "height": 24, "loglevel": 2, "annotations": True}, debug=False)

# # first agent is yellow
# # second agent is blue
# steps = env.run(['agent/agent.py', 'agent.py'])

# env.render(mode="ipython", width=1200, height=800)

Test agents on 32x32 field

In [80]:
# env = make("lux_ai_2021", configuration={"width": 32, "height": 32, "loglevel": 2, "annotations": True}, debug=False)

# # first agent is yellow
# # second agent is blue
# steps = env.run(['agent/agent.py', 'agent.py'])

# env.render(mode="ipython", width=1200, height=800)

# NNs ensemble

In [1]:
import os
import numpy as np
import torch
from lux.game import Game

path = '/kaggle_simulations/agent' if os.path.exists('/kaggle_simulations') else '.' # change to 'agent' for tests
# unit NNs
model_v2 = torch.jit.load(f'{path}/model_v2.pth')
model_v2.eval()
model_v4 = torch.jit.load(f'{path}/model_v4.pth')
model_v4.eval()
model_v5 = torch.jit.load(f'{path}/model_v5.pth')
model_v5.eval()
model_v11 = torch.jit.load(f'{path}/model_v11.pth')
model_v11.eval()
# city NNs
model_city_v2 = torch.jit.load(f'{path}/model_city_v2.pth')
model_city_v2.eval()
model_city_v4 = torch.jit.load(f'{path}/model_city_v4.pth')
model_city_v4.eval()
model_city_v5 = torch.jit.load(f'{path}/model_city_v5.pth')
model_city_v5.eval()
model_city_v11 = torch.jit.load(f'{path}/model_city_v11.pth')
model_city_v11.eval()

# Input for Neural Network for units
def make_input(obs, unit_id):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'u':
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            if unit_id == strs[3]:
                # Position and Cargo
                b[:2, x, y] = (
                    1,
                    (wood + coal + uranium) / 100
                )
            else:
                # Units
                team = int(strs[2])
                cooldown = float(strs[6])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x, y] = (
                    1,
                    cooldown / 6,
                    (wood + coal + uranium) / 100
                )
        elif input_identifier == 'ct':
            # CityTiles
            team = int(strs[1])
            city_id = strs[2]
            x = int(strs[3]) + x_shift
            y = int(strs[4]) + y_shift
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                cities[city_id]
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b


# Input for Neural Network for cities
def make_city_input(obs, city_coord):
    width, height = obs['width'], obs['height']
    x_shift = (32 - width) // 2
    y_shift = (32 - height) // 2
    cities = {}
    
    b = np.zeros((20, 32, 32), dtype=np.float32)
    
    for update in obs['updates']:
        strs = update.split(' ')
        input_identifier = strs[0]
        
        if input_identifier == 'ct':
            # CityTiles
            city_id = strs[2]
            x = int(strs[3]) 
            y = int(strs[4])
            cooldown = float(strs[5])
            if x == int(city_coord[0]) and y == int(city_coord[1]):
                b[:2, x + x_shift, y + y_shift] = (
                    1,
                    cities[city_id]
                )
            else:
                team = int(strs[1])
                idx = 2 + (team - obs['player']) % 2 * 3
                b[idx:idx + 3, x + x_shift, y + y_shift] = (
                    1,
                    cooldown / 10,
                    cities[city_id]
                )
        elif input_identifier == 'u':
            team = int(strs[2])
            x = int(strs[4]) + x_shift
            y = int(strs[5]) + y_shift
            wood = int(strs[7])
            coal = int(strs[8])
            uranium = int(strs[9])
            idx = 8 + (team - obs['player']) % 2 * 2
            b[idx:idx + 2, x, y] = (
                1,
                (wood + coal + uranium) / 100
            )
        elif input_identifier == 'r':
            # Resources
            r_type = strs[1]
            x = int(strs[2]) + x_shift
            y = int(strs[3]) + y_shift
            amt = int(float(strs[4]))
            b[{'wood': 12, 'coal': 13, 'uranium': 14}[r_type], x, y] = amt / 800
        elif input_identifier == 'rp':
            # Research Points
            team = int(strs[1])
            rp = int(strs[2])
            b[15 + (team - obs['player']) % 2, :] = min(rp, 200) / 200
        elif input_identifier == 'c':
            # Cities
            city_id = strs[2]
            fuel = float(strs[3])
            lightupkeep = float(strs[4])
            cities[city_id] = min(fuel / lightupkeep, 10) / 10
    
    # Day/Night Cycle
    b[17, :] = obs['step'] % 40 / 40
    # Turns
    b[18, :] = obs['step'] / 360
    # Map Size
    b[19, x_shift:32 - x_shift, y_shift:32 - y_shift] = 1

    return b

game_state = None
player = None


def get_game_state(observation):
    global game_state
    
    if observation["step"] == 0:
        game_state = Game()
        game_state._initialize(observation["updates"])
        game_state._update(observation["updates"][2:])
        game_state.id = observation["player"]
    else:
        game_state._update(observation["updates"])
    return game_state


def in_city(pos):    
    try:
        city = game_state.map.get_cell_by_pos(pos).citytile
        return city is not None and city.team == game_state.id
    except:
        return False
    
# check if unit has enough time and space to build a city
def build_city_is_possible(unit, pos):    
    global game_state
    global player

    if game_state.turn % 40 < 30:
        return True
    x, y = pos.x, pos.y
    for i, j in ((x-1, y), (x+1, y), (x, y-1), (x, y+1)):
        try:
            city_id = game_state.map.get_cell(i, j).citytile.cityid
        except:
            continue
        if city_id in player.cities:
            city = player.cities[city_id]
            print(f'City fuel - {city.fuel}, City light - {city.get_light_upkeep()}')
            if city.fuel > (city.get_light_upkeep() + 18) * 10:
                return True
    return False


def call_func(obj, method, args=[]):
    return getattr(obj, method)(*args)


# translate unit policy to action
unit_actions = [('move', 'n'), ('move', 's'), ('move', 'w'), ('move', 'e'), ('build_city',)]
def get_unit_action(policy, unit, dest):
    for label in np.argsort(policy)[::-1]:
        act = unit_actions[label]
        pos = unit.pos.translate(act[-1], 1) or unit.pos
        if label == 4 and not build_city_is_possible(unit, pos):
            return unit.move('c'), unit.pos
        if pos not in dest or in_city(pos):
            return call_func(unit, *act), pos 
            
    return unit.move('c'), unit.pos

# translate city policy to action
city_actions = [('build_worker',), ('research', )]
def get_city_action(policy, city_tile, unit_count):
    global player
    
    for label in np.argsort(policy)[::-1]:
        act = city_actions[label]
        if label == 0 and unit_count < player.city_tile_count:
            unit_count += 1
            res = call_func(city_tile, *act)
        elif label == 1 and not player.researched_uranium():
            player.research_points += 1
            res = call_func(city_tile, *act)
        else:
            res = None
        return res, unit_count

# agent for making actions
def agent(observation, configuration):
    global game_state
    global player
    
    game_state = get_game_state(observation)    
    player = game_state.players[observation.player]
    actions = []        

    # Unit Actions
    dest = []
    for unit in player.units:
        if unit.can_act() and (game_state.turn % 40 < 30 or not in_city(unit.pos)):
            state = make_input(observation, unit.id)
            with torch.no_grad():
                p_2 = model_v2(torch.from_numpy(state).unsqueeze(0))
                p_4 = model_v4(torch.from_numpy(state).unsqueeze(0))
                #p_5 = model_v5(torch.from_numpy(state).unsqueeze(0))
                p_11 = model_v11(torch.from_numpy(state).unsqueeze(0))

            policy_2 = p_2.squeeze(0).numpy()
            policy_4 = p_4.squeeze(0).numpy()
            #policy_5 = p_5.squeeze(0).numpy()
            policy_11 = p_11.squeeze(0).numpy()

            policy = [sum(x) for x in zip(*[policy_2, policy_4, policy_11])]

            action, pos = get_unit_action(policy, unit, dest)
            actions.append(action)
            dest.append(pos)

    # City Actions
    unit_count = len(player.units)
    for city in player.cities.values():
        for city_tile in city.citytiles:
            if city_tile.can_act():
                # at first game stages try to produce maximum amount of agents and research point
                if game_state.turn < 60:
                    if unit_count < player.city_tile_count: 
                        actions.append(city_tile.build_worker())
                        unit_count += 1
                    elif not player.researched_uranium():
                        actions.append(city_tile.research())
                        player.research_points += 1
                # then follow NN strategy
                else:
                    state = make_city_input(observation, [city_tile.pos.x, city_tile.pos.y])
                    with torch.no_grad():
                        p_2 = model_city_v2(torch.from_numpy(state).unsqueeze(0))
                        p_4 = model_city_v4(torch.from_numpy(state).unsqueeze(0))
                        #p_5 = model_city_v5(torch.from_numpy(state).unsqueeze(0))
                        p_11 = model_city_v11(torch.from_numpy(state).unsqueeze(0))

                    policy_2 = p_2.squeeze(0).numpy()
                    policy_4 = p_4.squeeze(0).numpy()
                    #policy_5 = p_5.squeeze(0).numpy()
                    policy_11 = p_11.squeeze(0).numpy()

                    policy = [sum(x) for x in zip(*[policy_2, policy_4, policy_11])]

                    action, unit_count = get_city_action(policy, city_tile, unit_count)
                    if action:
                        actions.append(action)
    
    return actions

Overwriting agent/agent.py


# Futher Ideas

- add previous positions of units
- add places where units cant move (adversarial cities and other units)


- train unit NN not to do anything
- train city NN not to do anything


- make ensemble of 3 best NNs that makes decision by voting or randomly selects action from presented


- increase filter number and decrease layers number and vice versa
- regularization?
- dropout?