# Init and imports

In [1]:
import pickle

def save_obj(obj, name ):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name ):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [2]:
#root = "/media/bigdata/juan/bw/RL/models/v6"
root = "."
dir_episodes = root+"/../../dataset/episodes"

In [3]:
import os

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from PIL import Image

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter


'\n# For use only CPU\n#os.environ["CUDA_VISIBLE_DEVICES"]="-1"    \n\nimport tensorflow as tf\n#physical_devices = tf.config.list_physical_devices(\'GPU\')\n#tf.config.experimental.set_memory_growth(physical_devices[0], enable=True)\n\nfrom tensorflow.keras.models import Sequential, Model\nfrom tensorflow.keras.layers import Dense, LSTM, InputLayer, Conv2D, Conv1D, MaxPooling2D, Flatten, Concatenate, TimeDistributed, Dropout, Conv2DTranspose, Reshape, Layer, Lambda\nfrom tensorflow.keras.optimizers import RMSprop\nimport tensorflow.keras.backend as K\nfrom tensorflow.keras.models import Model\nfrom tensorflow.keras.layers import Input, LSTM, Dense\n'

In [4]:
torch.cuda.is_available()

True

In [5]:
torch.cuda.current_device()

0

In [6]:
torch.cuda.device(0)

<torch.cuda.device at 0x7f0892ba9400>

In [7]:
torch.cuda.device_count()

1

In [8]:
torch.cuda.get_device_name(0)

'NVIDIA GeForce GTX 1650'

## Load and make Dataset

In [9]:
MS_WHOLE_GAME = (32*60*1000)
SIDE_MAP = 8000

SIDE_MINIMAP = 256
CANT_MINIMAPS = 9
SIDE_LAYERS = 128
CANT_LAYERS_PLAYERS = 16
CANT_LAYERS_MAPS = 2

CANT_ACTIONS = 9
CANT_UNITS = 8

NORMALIZE_POSITIONS_ACTION = False

In [10]:
### Episode manager.

import os
import json
from os import listdir
from os.path import isdir, isfile, join

# List of state-action.
# State: 
#        Scalar: timestamp, mineral, oil
#        Layers x2 (first and second) view:
#              Layers x3 (current, aliados, enemigos):
#                    Layers 128x128: Base, Barraca, Torreta, Recolector, SoldadoRaso, SoldadoEntrenado, 
#                                    Tanque, TanquePesado, Life, Creada, estaConstruyendoUnidad, Moviendose,
#                                    Recolectando, ConstruyendoBuild, Atacando, YendoAtacar.
#              Layer Map 128x128: AmountMineral, BasesZones
#        Minimap 256x256: MinimapR, MinimapG, MinimapB, FuturoCercano, AreaLocal, 
#                         First, Second, ActualCerrando, PixelsTimeUpdated
#
# Action: 
#         For each action (noaction, surrender, update, move, recollect, buildBuilding, buildUnit, attack, cancelAction):
#               Probability action (1)
#               Unit Type (8): Base, Barraca, Torreta, Recolector, SoldadoRaso, SoldadoEntrenado, Tanque, TanquePesado
#         For each action (noaction, surrender, update, move, recollect, buildBuilding, buildUnit, attack, cancelAction):
#               Local select first and second ( 2 x 128x128 )
#         For each action (noaction, update, move, recollect, buildBuilding, buildUnit, attack, cancelAction):
#               Global select first and second ( 2 x 256x256 )
# Important: The global output is the position for the next action, it's for the next action recive the correct state.


# 'state_scalar', 'state_first_layers', 'state_second_layers', 'state_minimaps'
# 'action_name', 'params'(dict of pair param-value)

pos_actions_by_name = {'noAction':0, 'surrender':1, 'update':2, 'move':3, 'recollect':4, 'buildBuilding':5, 'buildUnit':6, 'attack':7, 'cancelAction':8}
name_by_pos_actions = {0:'noAction', 1:'surrender', 2:'update', 3:'move', 4:'recollect', 5:'buildBuilding', 6:'buildUnit', 7:'attack', 8:'cancelAction'}
pos_units_by_name = {"base":0, "barraca":1, "torreta":2, "recolector":3, "soldadoRaso":4, "soldadoEntrenado":5, "tanque":6, "tanquePesado":7}
name_by_pos_units = {0:"base", 1:"barraca", 2:"torreta", 3:"recolector", 4:"soldadoRaso", 5:"soldadoEntrenado", 6:"tanque", 7:"tanquePesado"}
pos_local_by_name = {"x1Local":0, "y1Local":1, "x2Local":2, "y2Local":3}
pos_global_by_name = {"x1NextGlobal":0, "y1NextGlobal":1, "x2NextGlobal":2, "y2NextGlobal":3}

def index_localfirst(action_name):
    if action_name == 'update':
        return 0
    elif action_name == 'move':
        return 1
    elif action_name == 'recollect':
        return 3
    elif action_name == 'buildUnit':
        return 5
    elif action_name == 'buildBuilding':
        return 6
    elif action_name == 'attack':
        return 8
    elif action_name == 'cancelAction':
        return 10
    else:
        return -1

def index_localsecond(action_name):
    if action_name == 'move':
        return 2
    elif action_name == 'recollect':
        return 4
    elif action_name == 'buildBuilding':
        return 7
    elif action_name == 'attack':
        return 9
    else:
        return -1
        
def index_global(action_name):
    if action_name == 'noAction':
        return 0
    elif action_name == 'update':
        return 1
    elif action_name == 'move':
        return 2
    elif action_name == 'recollect':
        return 3
    elif action_name == 'buildUnit':
        return 4
    elif action_name == 'buildBuilding':
        return 5
    elif action_name == 'attack':
        return 6
    elif action_name == 'cancelAction':
        return 7
    else:
        return -1
    
def episodes_from_disk_():
    list_episodes = [f for f in listdir(dir_episodes) if (isdir(join(dir_episodes, f)) and f != "." and f != "..")]
    return len(list_episodes)

def get_steps_episode_(num_episode):
    mypath = dir_episodes +"/"+ str(num_episode) +"/"
    onlydirs = [f for f in listdir(mypath) if isdir(join(mypath, f))]
    return len(onlydirs)

def get_img_and_resize_(num_episode, num_step, name, new_size):
    data_path = dir_episodes +"/"+ str(num_episode) +"/"+ str(num_step) +"/"+ name + ".png"
    img = Image.open(data_path)
    img = img.resize(new_size, Image.NEAREST) 
    img_array = np.asarray(img)
    img_array = np.array(img_array).astype('float32')/255.
    return img_array

def get_img_(num_episode, num_step, name):
    data_path = dir_episodes +"/"+ str(num_episode) +"/"+ str(num_step) +"/"+ name + ".png"
    img = Image.open(data_path)
    img_array = np.asarray(img)
    img_array = np.array(img_array).astype('float32')/255.
    return img_array

def create_step_struct_():
    step = {}
    step['state_layers_np'] = np.empty([1, (CANT_LAYERS_PLAYERS*3 +CANT_LAYERS_MAPS)*2, SIDE_LAYERS, SIDE_LAYERS])
    step['state_minimaps_np'] = np.empty([1, CANT_MINIMAPS,SIDE_MINIMAP, SIDE_MINIMAP])
    return step

def get_step_(num_episode, num_step, step):
    f = open(dir_episodes +"/"+ str(num_episode) +"/"+ str(num_step) +"/scalars.json")
    scalars =json.load(f)
    f.close()
        
    # State
    step['state_scalar'] = [scalars['timestamp'], scalars['local']['minerals'], scalars['local']['oils']]

    # I use threads
    index_layer = 0
    for a in ['current', 'aliades', 'enemies']:
        for b in ["base", "barraca", "torreta", "recolector", "soldadoraso", "soldadoentrenado", "tanque", "tanquepesado", "life", "creada", "buildunit", "buildbuilding", "moving", "recollecting", "attacking", "gotoattack"]:
            img = get_img_(num_episode, num_step, 'state_'+a+'_first_'+b)
            step['state_layers_np'][0,index_layer,:,:] = img[:,:]
            index_layer += 1
    img = get_img_(num_episode, num_step, 'state_objectmap_first_mineral')
    step['state_layers_np'][0,index_layer,:,:] = img[:,:]
    index_layer += 1
    img = get_img_(num_episode, num_step, 'state_objectmap_first_baseszones')
    step['state_layers_np'][0,index_layer,:,:] = img[:,:]
    index_layer += 1
    
    for a in ['current', 'aliades', 'enemies']:
        for b in ["base", "barraca", "torreta", "recolector", "soldadoraso", "soldadoentrenado", "tanque", "tanquepesado", "life", "creada", "buildunit", "buildbuilding", "moving", "recollecting", "attacking", "gotoattack"]:
            img = get_img_(num_episode, num_step, 'state_'+a+'_second_'+b)
            step['state_layers_np'][0,index_layer,:,:] = img[:,:]
            index_layer += 1
    img = get_img_(num_episode, num_step, 'state_objectmap_second_mineral')
    step['state_layers_np'][0,index_layer,:,:] = img[:,:]
    index_layer += 1
    img = get_img_(num_episode, num_step, 'state_objectmap_second_baseszones')
    step['state_layers_np'][0,index_layer,:,:] = img[:,:]

    index_layer = 0
    for a in ["minimapR", "minimapG", "minimapB", "update", "first", "second", "currentclosed", "futureclosed", "pixelstimeupdated"]:
        img = get_img_(num_episode, num_step, 'state_global_'+a)
        step['state_minimaps_np'][0,index_layer,:,:] = img[:,:]
        index_layer += 1

    step['state_scalar'] = torch.FloatTensor([step['state_scalar']])
    step['state_layers'] = torch.FloatTensor(step['state_layers_np'])
    step['state_minimaps'] = torch.FloatTensor(step['state_minimaps_np'])

    # Action
    step['action_name'] = scalars['action']['name']  
    step['params'] = {}
    for n in scalars['action']:
        if n != 'name' and (n in pos_units_by_name or n in pos_local_by_name):
            step['params'][n] = scalars['action'][n]
    for n in scalars:
        if n in pos_global_by_name:
            step['params'][n] = scalars[n]
                
    if NORMALIZE_POSITIONS_ACTION:
        for n in pos_local_by_name:
            step['params'][n] /= SIDE_LAYERS
            step['params'][n] -= 0.5
        for n in pos_global_by_name:
            step['params'][n] /= SIDE_MAP
            step['params'][n] -= 0.5

    return step

# 1 reward when the team win, -1 if lose.
# 1 reward when a enemy died.
def get_rewards_and_return_(num_episode, gamma, N):
    steps = get_steps_episode_(num_episode)
    if N == -1:
        N = steps
    rewards = []
    for step in reversed(range(steps)):
        f = open(dir_episodes +"/"+ str(num_episode) +"/"+ str(step) +"/scalars.json")
        scalars = json.load(f)
        f.close()
        rew = 0
        if step == steps-1: # Last step (end of the episode)
            if scalars['endgame']['win'] == 1:
                rew = 1
            else:
                rew = -1
        rew += scalars['enemies_deads']
        rewards.insert(0, rew)
    returns = []
    for step in range(steps):
        ret = 0
        for i in reversed(range(step, step+N)):
            if i < steps:
                ret = rewards[i] + gamma*ret
        returns.append(ret)
    return rewards, returns

def get_pos_of_local_and_global_(local_output, global_output):
    ret = {}
    for ind, a in enumerate(pos_actions_by_name):
        ret[a] = {}
        
        ret[a]['local'] = {}
        index_max = np.argmax(local_output[:,:,2*ind])
        x_first = index_max % SIDE_LAYERS
        y_first = index_max // SIDE_LAYERS
        y_first = SIDE_LAYERS -y_first -1
        index_max = np.argmax(local_output[:,:,2*ind+1])
        x_second = index_max % SIDE_LAYERS
        y_second = index_max // SIDE_LAYERS
        y_second = SIDE_LAYERS -y_second -1
        ret[a]['local']['first'] = (x_first, y_first)
        ret[a]['local']['second'] = (x_second, y_second)
        
        ret[a]['global'] = {}
        index_max = np.argmax(global_output[:,:,2*ind])
        x_first = index_max % SIDE_MINIMAP
        y_first = index_max // SIDE_MINIMAP
        y_first = SIDE_MINIMAP -y_first -1
        index_max = np.argmax(global_output[:,:,2*ind+1])
        x_second = index_max % SIDE_MINIMAP
        y_second = index_max // SIDE_MINIMAP
        y_second = SIDE_MINIMAP -y_second -1
        ret[a]['global']['first'] = (x_first, y_first)
        ret[a]['global']['second'] = (x_second, y_second)
        
    return ret
    
def save_image_(data, filename):
    im = Image.fromarray(data)
    im = im.convert('RGB')
    im.save(filename)
    
def save_image_of_local_and_global_(local_output, global_output, name):
    for a in pos_actions_by_name:
        ind = index_localfirst(a)
        if ind >= 0:
            values = local_output[ind,:,:]
            values = values.detach().numpy()
            save_image_(values * 255, root+"/saves/images/local_"+name+"_"+a+"_first.png")
        ind = index_localsecond(a)
        if ind >= 0:
            values = local_output[ind,:,:]
            values = values.detach().numpy()
            save_image_(values * 255, root+"/saves/images/local_"+name+"_"+a+"_second.png")
            
        ind = index_global(a)
        if ind >= 0:
            values = global_output[2*ind,:,:]
            values = values.detach().numpy()
            save_image_(values * 255, root+"/saves/images/global_"+name+"_"+a+"_first.png")
            values = global_output[2*ind+1,:,:]
            values = values.detach().numpy()
            save_image_(values * 255, root+"/saves/images/global_"+name+"_"+a+"_second.png")

def get_action_from_output_(actions, units, local, global_sel):

    index_action = np.argmax(actions)
    
    index_unit = -1
    if pos_actions_by_name['buildBuilding'] == index_action:
        index_unit = np.argmax(units[:3])
    elif pos_actions_by_name['buildUnit'] == index_action:
        index_unit = np.argmax(units[3:])+3
        
    action = ['noaction', 'surrender', 'update', 'move', 'recollect', 'buildbuilding', 'buildunit', 'attack', 'cancelaction'][index_action]
    unit_type = ['base', 'barraca', 'torreta', 'recolector', 'soldadoraso', 'soldadoentrenado', 'tanque', 'tanquepesado'][index_unit]
    
    ind = index_localfirst(name_by_pos_actions[index_action])
    x1 = -1
    y1 = -1
    if ind >= 0:
        index_max = np.argmax(local[ind,:,:])
        x1 = index_max % SIDE_LAYERS
        y1 = index_max // SIDE_LAYERS
    x2 = -1
    y2 = -1
    ind = index_localsecond(name_by_pos_actions[index_action])
    if ind >= 0:
        index_max = np.argmax(local[ind,:,:])
        x2 = index_max % SIDE_LAYERS
        y2 = index_max // SIDE_LAYERS
    
    ind = index_global(name_by_pos_actions[index_action])
    x1_g = -1
    y1_g = -1
    x2_g = -1
    y2_g = -1
    if ind >= 0:
        index_max = np.argmax(global_sel[2*ind,:,:])
        x1_g = index_max % SIDE_MINIMAP
        y1_g = index_max // SIDE_MINIMAP
        index_max = np.argmax(global_sel[2*ind+1,:,:])
        x2_g = index_max % SIDE_MINIMAP
        y2_g = index_max // SIDE_MINIMAP
    
    y1 = SIDE_LAYERS -y1 -1
    y2 = SIDE_LAYERS -y2 -1
    y1_g = SIDE_MINIMAP -y1_g -1
    y2_g = SIDE_MINIMAP -y2_g -1
    
    for x in pos_actions_by_name:
        print(x+": "+str(actions[pos_actions_by_name[x]]))

    unit_type = "None"
    if index_unit >= 0:
        unit_type = name_by_pos_units[index_unit]
        
    print(name_by_pos_actions[index_action]+" "+unit_type+", localfirst("+str(x1)+","+str(y1)+"), localsecond("+str(x2)+","+str(y2)+"), globalfirst("+str(x1_g)+","+str(y1_g)+"), globalsecond("+str(x2_g)+","+str(y2_g)+")")



In [11]:
# Read and send data from/to c++ 

import os
import struct
import array
import json

fd_read = None
fd_write = None
try:
    fd_read = open("/tmp/fifo_bw_topython", "rb")
    fd_write = open("/tmp/fifo_bw_tocpp", "wb")
except:
    print("Error to try open episodes experience fifo files")
    fd_read = None
    fd_write = None

def episodes_from_net():
    fd_write.write(b'episodes\n')
    fd_write.flush()
    
    buf = ""
    while True:
        byte = fd_read.read(1)
        c = byte.decode("utf-8")
        if c == '\n':
            byte = fd_read.read(1) # Read the end of the line '\0'
            break
        else:
            buf += c
            
    return int(str(buf))

def set_epochs_net(epochs):
    fd_write.write(b'epochs,'+bytes(str(epochs), encoding='utf8')+b'\n')
    fd_write.flush()

def get_step_from_net():
    buf = ""
    
    index = None
    isLast = None
    jsonScalar = None
    localSize = None
    globalSize = None
    
    while True:
        byte = fd_read.read(1)
        c = byte.decode("utf-8")
        if c == '\n':
            byte = fd_read.read(1) # Read the end of the line '\0'
            c = byte.decode("utf-8")
            if index is None:
                index = int(buf)
            elif isLast is None:
                isLast = (int(buf) == 1)
            elif jsonScalar is None:
                jsonScalar = json.loads(str(buf))
            elif localSize is None:
                localSize = int(buf)
            else:
                globalSize = int(buf)
                break
            buf = ""
        else:
            buf += c

    bytes_local = fd_read.read(localSize)
    bytes_global = fd_read.read(globalSize)

    local_np = np.frombuffer(bytes_local, dtype='float32')
    global_np = np.frombuffer(bytes_global, dtype='float32')
    
    return index, isLast, jsonScalar, local_np, global_np

episode_prefetched = []
STEPS_IN_MEMORY = 600

def prefetch_next_episode_net():
    global episode_prefetched
    
    # Delete on disk all 
    os.system("rm "+root+"/saves/tmp/*")
    
    episode_prefetched = []
    isLast = False
    
    fd_write.write(b'start_steps\n')
    fd_write.flush()
    
    num_step = 0
    while not isLast:
        index, isLast, jsonScalar, local_np, global_np = get_step_from_net()
        if num_step < STEPS_IN_MEMORY:
            episode_prefetched.append({'scalar':jsonScalar, 'local':local_np, 'global':global_np, 'where':'memory'})
        else:
            episode_prefetched.append({'scalar':jsonScalar, 'local':None, 'global':None, 'where':'disk'})
            np.save(root+"/saves/tmp/local_"+str(num_step)+".npy", local_np)
            np.save(root+"/saves/tmp/global_"+str(num_step)+".npy", global_np)
        num_step += 1

# Handle steps in memory and disk
def handle_disk_episode(current_step):
    global episode_prefetched
    if episode_prefetched[current_step]['where'] != 'memory':
        print("load "+str(current_step))
        # Remove all past steps.
        for s in range(0,current_step):
            if episode_prefetched[s]['where'] == 'memory':
                episode_prefetched[s]['local'] = None
                episode_prefetched[s]['global'] = None
                episode_prefetched[s]['where'] = 'deleted'
        # Get next STEPS_IN_MEMORY steps to memory
        for s in range(current_step,min(current_step+STEPS_IN_MEMORY,len(episode_prefetched))):
            episode_prefetched[s]['local'] = np.load(root+"/saves/tmp/local_"+str(s)+".npy")
            episode_prefetched[s]['global'] = np.load(root+"/saves/tmp/global_"+str(s)+".npy")
            #os.system("rm "+root+"/saves/tmp/local_"+str(s)+".npy")
            #os.system("rm "+root+"/saves/tmp/global_"+str(s)+".npy")
            episode_prefetched[s]['where'] = 'memory'
    
def get_rewards_and_return_net(gamma, N):
    global episode_prefetched
    
    steps = len(episode_prefetched)
    if N == -1:
        N = steps
    rewards = []
    for step in reversed(range(steps)):
        rew = 0
        if step == steps-1: # Last step (end of the episode)
            if episode_prefetched[step]['scalar']['endgame']['win'] == 1:
                rew = 1
            else:
                rew = -1
        rew += episode_prefetched[step]['scalar']['enemies_deads']
        rewards.insert(0, rew)
    returns = []
    for step in range(steps):
        ret = 0
        for i in reversed(range(step, step+N)):
            if i < steps:
                ret = rewards[i] + gamma*ret
        returns.append(ret)
    return rewards, returns

def get_step_net(num_step, step):
    global episode_prefetched
    
    handle_disk_episode(num_step)
    
    scalars = episode_prefetched[num_step]['scalar']
    
    # State, local and global
    step['state_scalar'] = [scalars['timestamp'], scalars['local']['minerals'], scalars['local']['oils']]

    step['state_scalar'] = torch.FloatTensor([step['state_scalar']])
    
    
    #step['state_layers'] = torch.FloatTensor(episode_prefetched[num_step]['local']) # TODO reshape?
    #step['state_minimaps'] = torch.FloatTensor(episode_prefetched[num_step]['global']) # TODO reshape?
    
    local_np = episode_prefetched[num_step]['local'].reshape(( 1, ((3*CANT_LAYERS_PLAYERS+CANT_LAYERS_MAPS)*2), SIDE_LAYERS, SIDE_LAYERS))
    global_np = episode_prefetched[num_step]['global'].reshape(( 1, CANT_MINIMAPS, SIDE_MINIMAP, SIDE_MINIMAP))
    
    step['state_layers_np'][:,:,:,:] = local_np[:,:,:,:]
    step['state_minimaps_np'][:,:,:,:] = global_np[:,:,:,:]

    step['state_layers'] = torch.FloatTensor(step['state_layers_np'])
    step['state_minimaps'] = torch.FloatTensor(step['state_minimaps_np'])
    
    

    # Action
    step['action_name'] = scalars['action']['name']  
    step['params'] = {}
    for n in scalars['action']:
        if n != 'name' and (n in pos_units_by_name or n in pos_local_by_name):
            step['params'][n] = scalars['action'][n]
    for n in scalars:
        if n in pos_global_by_name:
            step['params'][n] = scalars[n]
                
    if NORMALIZE_POSITIONS_ACTION:
        for n in pos_local_by_name:
            step['params'][n] /= SIDE_LAYERS
            step['params'][n] -= 0.5
        for n in pos_global_by_name:
            step['params'][n] /= SIDE_MAP
            step['params'][n] -= 0.5

    return step


Error to try open episodes experience fifo files


In [12]:
# API to read the episodes

def episodes_from_disk():
    return episodes_from_disk_()

def get_steps_episode(num_episode):
    return get_steps_episode_(num_episode)

def create_step_struct():
    return create_step_struct_()
    
def get_step(num_episode, num_step, step):
    return get_step_(num_episode, num_step, step)

# N is the count of rewards to future to calculate return, -1 is all
def get_rewards_and_return(num_episode, gamma, N):
    return get_rewards_and_return_(num_episode, gamma, N)


'''
def episodes_from_exp():
    return episodes_from_disk_()

def set_epochs_exp(epochs):
    pass

index_current_episode = -1
def prefetch_next_episode_exp():
    global index_current_episode
    index_current_episode += 1
    
def get_steps_episode_exp():
    global index_current_episode
    return get_steps_episode_(index_current_episode)

def get_step_exp(num_step, step):
    global index_current_episode
    return get_step_(index_current_episode, num_step, step)

# N is the count of rewards to future to calculate return, -1 is all
def get_rewards_and_return_exp(gamma, N):
    global index_current_episode
    return get_rewards_and_return_(index_current_episode, gamma, N)
'''
def set_epochs_exp(epochs):
    return set_epochs_net(epochs)
    
def episodes_from_exp():
    return episodes_from_net()

def prefetch_next_episode_exp():
    prefetch_next_episode_net()
    
def get_steps_episode_exp():
    return len(episode_prefetched)

def get_step_exp(num_step, step):
    return get_step_net(num_step, step)

# N is the count of rewards to future to calculate return, -1 is all
def get_rewards_and_return_exp(gamma, N):
    return get_rewards_and_return_net(gamma, N)



def get_pos_of_local_and_global(local_output, global_output):
    return get_pos_of_local_and_global_(local_output, global_output)
    
def save_image_of_local_and_global(local_output, global_output, name):
    return save_image_of_local_and_global_(local_output, global_output, name)

def get_action_from_output(actions, units, local, global_sel):
    return get_action_from_output_(actions, units, local, global_sel)

## Make Model

In [13]:

start_neurons = 16

padding_mode = 'replicate'
#padding_mode = 'zeros'

# OJO: la deconv2d tiene stride 2 y no tiene activation, me vino asi del otro
# OJO: Conv2d local y global tiene kernel de 1x1
# OJO: mejorar esta red dejando una red llamada base hasta el final de LSTM y un cabezal para cada salida,
#      el forwarding primero lo hago en la base y luego incovoco cada head
# Se tiene que hacer lo anterior de poner primero y una sola vez la base porque el estado de LSTM
# se debe ajustar una vez.

class Net(nn.Module):

    def __init__(self, dropout_amount):
        super(Net, self).__init__()
        # Layers input:   (SIDE_LAYERS, SIDE_LAYERS, (CANT_LAYERS_PLAYERS*3 +CANT_LAYERS_MAPS)*2)
        # Minimaps input: (SIDE_MINIMAP, SIDE_MINIMAP, CANT_MINIMAPS)
        # Scalars input: (3)
        #
        # Actions output: (CANT_ACTIONS)
        # Units output:   (CANT_UNITS)
        # Local output:   (11, SIDE_LAYERS, SIDE_LAYERS) -> (11, 1, 1)
        # Global output:  ((CANT_ACTIONS-1)*2, SIDE_MINIMAP, SIDE_MINIMAP) -> ((CANT_ACTIONS-1)*2, 1, 1)

        self.dropout_amount = dropout_amount
        
        self.policy_return = True
        self.value_return = True
        
        # Make LSTM hidden state
        # The axes semantics are (num_layers, minibatch_size, hidden_dim)
        #self.hidden_state = (torch.zeros(2, 1, 64), torch.zeros(2, 1, 64))
        self.register_buffer('cell_state', torch.zeros(2, 1, 64))
        self.register_buffer('hidden_state', torch.zeros(2, 1, 64))
            
        self.make_first_part()
        self.make_middle_part()
        self.make_last_part()
        
        self.make_value_part()
    
    def set_policy_return(self, enable):
        self.policy_return = enable
    def set_value_return(self, enable):
        self.value_return = enable
        
    def make_first_part(self):
        # Dense scalar, scalar input
        self.dense_x = nn.Sequential(
            nn.Linear(3, 16),
            nn.Sigmoid()
        )
        
        # First Conv2D, minimaps input
        self.conv1 = nn.Sequential(
            nn.Conv2d(CANT_MINIMAPS, start_neurons*1, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*1, start_neurons*1, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        self.pool1 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout_amount)
        )
        
        # Conv2D, layers input
        self.conv1_layers = nn.Sequential(
            nn.Conv2d((CANT_LAYERS_PLAYERS*3 +CANT_LAYERS_MAPS)*2, start_neurons*1, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        
        # Concatenate minimaps and layers Conv2D
        # Concatenate: pool1 and conv1_layers, return pool1
        
        # Second Conv2D
        self.conv2 = nn.Sequential(
            nn.Conv2d(start_neurons*2, start_neurons*2, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*2, start_neurons*2, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        self.pool2 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout_amount)
        )
        
        # Third Conv2D
        self.conv3 = nn.Sequential(
            nn.Conv2d(start_neurons*2, start_neurons*4, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*4, start_neurons*4, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        self.pool3 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout_amount)
        )
        
        # Fourth Conv2D
        self.conv4 = nn.Sequential(
            nn.Conv2d(start_neurons*4, start_neurons*8, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*8, start_neurons*8, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        self.pool4 = nn.Sequential(
            nn.MaxPool2d(2),
            nn.Dropout(self.dropout_amount)
        )
        
    def make_middle_part(self):
        self.conv_middle = nn.Sequential(
            nn.Conv2d(start_neurons*8, start_neurons*16, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        
        # Flatten
        # To (minibatch, -1), return conv_middle_flatten
        
        # Concatenate scalar dense_x and flatten
        # Concatenate: dense_x and conv_middle_flatten, return x_concat
        
        # LSTM
        self.lstm_layer = nn.LSTM(input_size=start_neurons*16 * 16*16 + 16, 
                                  hidden_size=64, 
                                  num_layers=2, 
                                  dropout=self.dropout_amount)
        
        # Dense
        self.out_dense_preaction = nn.Sequential(
            nn.Linear(64, 64),
            nn.Sigmoid(),
            nn.Dropout(self.dropout_amount)
        )
        
        # Dense, output action
        self.output_actions = nn.Sequential(
            nn.Linear(64, CANT_ACTIONS)
        )

        # Dense, output units
        self.output_units = nn.Sequential(
            nn.Linear(64, CANT_UNITS)
        )
        
        # Dense
        self.dense_middle_output = nn.Sequential(
            nn.Linear(64, start_neurons*16 * 16*16),
            nn.Sigmoid()
        )

        # Reshape
        # To: (minibatch, start_neurons*16, 16, 16), return convm
        
    def make_last_part(self):
        # First DeConv2D
        self.deconv4 = nn.ConvTranspose2d(start_neurons*16, 
                                          start_neurons*8, 
                                          kernel_size=3, 
                                          padding=1, 
                                          output_padding=1, 
                                          stride=2)
        # Concatenate: deconv4 and conv4, return uconv4
        self.uconv4 = nn.Sequential(
            nn.Dropout(self.dropout_amount),
            nn.Conv2d(start_neurons*16, start_neurons*8, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*8, start_neurons*8, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        
        # Second DeConv2D
        self.deconv3 = nn.ConvTranspose2d(start_neurons*8, 
                                          start_neurons*4, 
                                          kernel_size=3, 
                                          padding=1, 
                                          output_padding=1, 
                                          stride=2)
        # Concatenate: deconv3 and conv3, return uconv3
        self.uconv3 = nn.Sequential(
            nn.Dropout(self.dropout_amount),
            nn.Conv2d(start_neurons*8, start_neurons*4, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*4, start_neurons*4, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        
        # Third DeConv2D
        self.deconv2 = nn.ConvTranspose2d(start_neurons*4, 
                                          start_neurons*2, 
                                          kernel_size=3, 
                                          padding=1, 
                                          output_padding=1, 
                                          stride=2)
        # Concatenate: deconv2 and conv2, return uconv2
        self.uconv2 = nn.Sequential(
            nn.Dropout(self.dropout_amount),
            nn.Conv2d(start_neurons*4, start_neurons*2, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*2, start_neurons*2, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU()
        )
        
        # Fourth DeConv2D
        self.deconv1 = nn.ConvTranspose2d(start_neurons*2, 
                                          start_neurons*1, 
                                          kernel_size=3, 
                                          padding=1, 
                                          output_padding=1, 
                                          stride=2)
        # Concatenate: deconv1 and conv1, return uconv1
        self.uconv1 = nn.Sequential(
            nn.Dropout(self.dropout_amount),
            nn.Conv2d(start_neurons*2, start_neurons*1, kernel_size=3, padding=1, padding_mode=padding_mode),
            nn.ReLU(),
            nn.Conv2d(start_neurons*1, start_neurons*1, kernel_size=3, padding=1, padding_mode=padding_mode)
        )

        # Conv2D, output local
        self.output_local = nn.Sequential(
            nn.Conv2d(start_neurons*2, 11, kernel_size=1, padding=0),
            nn.Conv2d(11, 11*2, kernel_size=SIDE_LAYERS, padding=0, groups=11)
        )
        
        # Conv2D, output global
        self.output_global = nn.Sequential(
            nn.Conv2d(start_neurons*1, (CANT_ACTIONS-1)*2, kernel_size=1, padding=0),
            nn.Conv2d( (CANT_ACTIONS-1)*2, (CANT_ACTIONS-1)*2*2, kernel_size=SIDE_MINIMAP, padding=0, groups=(CANT_ACTIONS-1)*2)
        )
    
    def make_value_part(self):
        self.value = nn.Sequential(
            nn.Linear(start_neurons*16 * 16*16 + 16, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1)
        )
        
    def reset_states(self):
        nn.init.zeros_(self.cell_state)
        nn.init.zeros_(self.hidden_state)
    
    def swap_hidden_state(self):
        self.cell_state = self.cell_state_out.clone().detach()
        self.hidden_state = self.hidden_state_out.clone().detach()
        #self.hidden_state[0][:] = self.hidden_state_out[0][:]
        #self.hidden_state[1][:] = self.hidden_state_out[1][:]
    
    def get_hidden_state(self):
        return (self.cell_state.clone().detach(), self.hidden_state.clone().detach())
    
    def set_hidden_state(self, cell_state, hidden_state):
        self.cell_state[:] = cell_state[:]
        self.hidden_state[:] = hidden_state[:]
        
    def forward(self, scalar_input, layers_input, global_input):
        output_actions_t = None
        output_units_t = None
        output_local_t = None
        output_global_t = None
        value_t = None
        
        # First part
        dense_x_t = self.dense_x(scalar_input)
        conv1_t = self.conv1(global_input)
        pool1_t = self.pool1(conv1_t)
        conv1_layers_t = self.conv1_layers(layers_input)
        pool1_t = torch.cat( (pool1_t, conv1_layers_t), dim=1)
        conv2_t = self.conv2(pool1_t)
        pool2_t = self.pool2(conv2_t)
        conv3_t = self.conv3(pool2_t)
        pool3_t = self.pool3(conv3_t)
        conv4_t = self.conv4(pool3_t)
        pool4_t = self.pool4(conv4_t)
        
        # Middle part
        conv_middle_t = self.conv_middle(pool4_t)
        conv_middle_flatten_t = conv_middle_t.view(1, -1) # First arg is minibatch
        x_concat_t = torch.cat( (dense_x_t, conv_middle_flatten_t), dim=1)
        x_concat_t = x_concat_t.view(1, 1, -1) # Add sequent dimension, second arg is minibatch
        if self.policy_return:
            lstm_layer_t, hidden_state_out = self.lstm_layer(x_concat_t, (self.cell_state, self.hidden_state) )
            self.cell_state_out = hidden_state_out[0]
            self.hidden_state_out = hidden_state_out[1]
            lstm_layer_t = lstm_layer_t.view(1, -1) # Delete sequent dimension, first arg is minibatch
            out_dense_preaction_t = self.out_dense_preaction(lstm_layer_t)
            dense_middle_output_t = self.dense_middle_output(out_dense_preaction_t)
            convm_t = dense_middle_output_t.view(1, start_neurons*16, 16, 16) # First arg is minibatch

            # Last part
            deconv4_t = self.deconv4(convm_t)
            uconv4_t = torch.cat( (deconv4_t, conv4_t), dim=1)
            uconv4_t = self.uconv4(uconv4_t)
            deconv3_t = self.deconv3(uconv4_t)
            uconv3_t = torch.cat( (deconv3_t, conv3_t), dim=1)
            uconv3_t = self.uconv3(uconv3_t)
            deconv2_t = self.deconv2(uconv3_t)
            uconv2_t = torch.cat( (deconv2_t, conv2_t), dim=1)
            uconv2_t = self.uconv2(uconv2_t)
            deconv1_t = self.deconv1(uconv2_t)
            uconv1_t = torch.cat( (deconv1_t, conv1_t), dim=1)
            uconv1_t = self.uconv1(uconv1_t)

            # Outputs
            output_actions_t = self.output_actions(out_dense_preaction_t)
            output_units_t = self.output_units(out_dense_preaction_t)
            output_local_t = self.output_local(uconv2_t)
            output_global_t = self.output_global(uconv1_t)
            
        # Value
        if self.value_return:
            value_t = self.value(x_concat_t)
        
        # Only policy
        if self.policy_return and not self.value_return:
            return output_actions_t, output_units_t, output_local_t, output_global_t
        # Only value
        elif not self.policy_return and self.value_return:
            return value_t
        # both
        else:
            return output_actions_t, output_units_t, output_local_t, output_global_t, value_t

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


## Training

In [14]:
# Function that return input and output for the network

## Used in pre-train

def get_actions_output(state, output):
    #ret = torch.zeros(1,CANT_ACTIONS)
    ret = torch.zeros(1, dtype=torch.long)
    index_action = pos_actions_by_name[state['action_name']]
    #ret[0,index_action] = 1.0
    ret[0] = index_action
    return ret, output

def get_units_output(state, output):
    ret = None
    if state['action_name'] == 'buildUnit':
        output = output[:, 3:]
        #ret = torch.zeros(1,5)
        ret = torch.zeros(1, dtype=torch.long)
        for x in pos_units_by_name:
            if x in step['params'] and step['params'][x] > 0.5:
                index = pos_units_by_name[x]
                if index >= 3:
                    #ret[0,index-3] = 1.0
                    ret[0] = index-3
                break
    elif state['action_name'] == 'buildBuilding':
        output = output[:, :3]
        #ret = torch.zeros(1,3)
        ret = torch.zeros(1, dtype=torch.long)
        for x in pos_units_by_name:
            if x in step['params'] and step['params'][x] > 0.5:
                index = pos_units_by_name[x]
                if index < 3:
                    #ret[0,index] = 1.0
                    ret[0] = index
            break
    return ret, output

def get_local_output(state, output):
    ret = None
    
    index_first_action = index_localfirst(state['action_name'])
    index_second_action = index_localsecond(state['action_name'])
    
    # Just first
    if index_first_action >= 0 and index_second_action < 0:
        output = output[:,[index_first_action*2, index_first_action*2+1],:,:]
        ret = torch.zeros(1, 2, 1, 1)
        ret[0,0,0,0] = state['params']['x1Local']
        ret[0,1,0,0] = state['params']['y1Local']
    # Just second
    if index_second_action >= 0 and index_first_action < 0:
        output = output[:,[index_second_action*2, index_second_action*2+1],:,:]
        ret = torch.zeros(1, 2, 1, 1)
        ret[0,0,0,0] = state['params']['x2Local']
        ret[0,1,0,0] = state['params']['y2Local']
    # Both
    if index_first_action >= 0 and index_second_action >= 0:
        output = output[:, [index_first_action*2, index_first_action*2+1, index_second_action*2, index_second_action*2+1] , :, :]
        ret = torch.zeros(1, 4, 1, 1)
        ret[0,0,0,0] = state['params']['x1Local']
        ret[0,1,0,0] = state['params']['y1Local']
        ret[0,2,0,0] = state['params']['x2Local']
        ret[0,3,0,0] = state['params']['y2Local']
        
    return ret, output

def get_global_output(state, output):
    index_action = index_global(state['action_name'])
    
    output = output[:, [index_action*2*2, index_action*2*2+1, index_action*2*2+2, index_action*2*2+3], :, :]
    ret = torch.zeros(1, 4, 1, 1)
    ret[0,0,0,0] = state['params']['x1NextGlobal']
    ret[0,1,0,0] = state['params']['y1NextGlobal']
    ret[0,2,0,0] = state['params']['x2NextGlobal']
    ret[0,3,0,0] = state['params']['y2NextGlobal']
    
    return ret, output

In [19]:
# Set training params

platform = 'cpu'
if torch.cuda.is_available():
    platform = 'cuda'

# For both
DROPOUT = 0.0
GAMMA = 0.99
LEARNING_RATE = 0.00001

# For pre-traing
epochs = 5
BATCH_SIZE = 32
pretraining_from_disk = True

# For training
BATCH_SIZE_ONPOLICY = 128
N_ROLL_QVALUE = 4



net = Net(DROPOUT)
net = net.to(platform)

step = create_step_struct()

optimizer = torch.optim.RMSprop(net.parameters(), lr=LEARNING_RATE)

# Loss functions only for pre-train
lossfunc_actions = nn.CrossEntropyLoss() # nn.NLLLoss()
lossfunc_units = nn.CrossEntropyLoss() # nn.NLLLoss()
lossfunc_local = nn.MSELoss()
lossfunc_global = nn.MSELoss()

lossfunc_value = nn.MSELoss()

In [20]:
def count_parameters(model):
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        print(name+": "+str(param))
        total_params+=param
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(net)


dense_x.0.weight: 48
dense_x.0.bias: 16
conv1.0.weight: 1296
conv1.0.bias: 16
conv1.2.weight: 2304
conv1.2.bias: 16
conv1_layers.0.weight: 14400
conv1_layers.0.bias: 16
conv2.0.weight: 9216
conv2.0.bias: 32
conv2.2.weight: 9216
conv2.2.bias: 32
conv3.0.weight: 18432
conv3.0.bias: 64
conv3.2.weight: 36864
conv3.2.bias: 64
conv4.0.weight: 73728
conv4.0.bias: 128
conv4.2.weight: 147456
conv4.2.bias: 128
conv_middle.0.weight: 294912
conv_middle.0.bias: 256
lstm_layer.weight_ih_l0: 16781312
lstm_layer.weight_hh_l0: 16384
lstm_layer.bias_ih_l0: 256
lstm_layer.bias_hh_l0: 256
lstm_layer.weight_ih_l1: 16384
lstm_layer.weight_hh_l1: 16384
lstm_layer.bias_ih_l1: 256
lstm_layer.bias_hh_l1: 256
out_dense_preaction.0.weight: 4096
out_dense_preaction.0.bias: 64
output_actions.0.weight: 576
output_actions.0.bias: 9
output_units.0.weight: 512
output_units.0.bias: 8
dense_middle_output.0.weight: 4194304
dense_middle_output.0.bias: 65536
deconv4.weight: 294912
deconv4.bias: 128
uconv4.1.weight: 294912
u

33542547

In [11]:
# Load weights last training model.
net.load_state_dict(torch.load(root+'/saves/final/model.pth'))
net.eval()

Net(
  (dense_x): Sequential(
    (0): Linear(in_features=3, out_features=16, bias=True)
    (1): Sigmoid()
  )
  (conv1): Sequential(
    (0): Conv2d(9, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (1): ReLU()
    (2): Conv2d(16, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (3): ReLU()
  )
  (pool1): Sequential(
    (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (1): Dropout(p=0.0, inplace=False)
  )
  (conv1_layers): Sequential(
    (0): Conv2d(100, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (1): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (1): ReLU()
    (2): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), padding_mode=replicate)
    (3): ReLU()
  )
  (pool2): Sequential(
    (0): MaxPool2d(kernel_size=2, stride=2, padding=

### Pre-Train

In [21]:
# For save metrics with tensorboard

writer = SummaryWriter(root+'/saves/metrics')

# Save struct graph
net.reset_states()
get_step(0,0,step)
dense_scalar_input = step['state_scalar'].to(platform)
conv_layers_input = step['state_layers'].to(platform)
conv_minimap_input = step['state_minimaps'].to(platform)
writer.add_graph(net, [dense_scalar_input, conv_layers_input, conv_minimap_input])


In [22]:
from time import time
import math

amount_episodes = 0
if pretraining_from_disk:
    amount_episodes = episodes_from_disk()
else:
    amount_episodes = episodes_from_exp()
    set_epochs_exp(epochs)
            
# Count all the batches
global_batch_iter = 0   

for epoch in range(epochs):
    print("Epoch "+str(epoch))
    
    shuffled_episodes = np.arange(amount_episodes)
    np.random.shuffle(shuffled_episodes)
    
    for index_e in range(amount_episodes):
        print("Episode "+str(index_e))

        # Init for metrics
        training_actions_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_units_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_local_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_global_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        amount_units_loss = 0
        amount_local_loss = 0
        # For time
        get_step_time = 0
        get_input_time = 0
        zero_grad_time = 0
        forward_time = 0
        get_target_time = 0
        convert_platform_time = 0
        loss_time = 0
        backpropagation_time = 0
        update_time = 0
        episode_time = 0
        hidden_state_time = 0
        
        # Reset hidden state
        net.reset_states()
        
        # Get steps amount, prefetching, rewards and returns.
        e = -1
        steps = None
        rewards = None
        returns = None
        if pretraining_from_disk:
            e = shuffled_episodes[index_e]
            steps = get_steps_episode(e)
        
            # Rewards and return
            rewards, returns = get_rewards_and_return(e, GAMMA, -1)
        else:
            # Prefetch whole episode
            print("Start prefetch...")
            prefetch_next_episode_exp()
            print("End prefetch...")

            steps = get_steps_episode_exp()
        
            # Rewards and returns
            rewards, returns = get_rewards_and_return_exp(GAMMA, -1)

        # Ignore small episodes
        if steps < 10:
            print("Ignore "+str(index_e))
            continue
            
        # Reset gradient
        net.zero_grad()
        
        # Forward and backpropagation count before to update weights
        # When batch_count reach BATCH_SIZE then I update the weights
        batch_count = 0
        
        # Loop all state-action from episode
        net.set_value_return(True)
        net.set_policy_return(True)
        for index in range(steps):
            if index%500 == 0:
                print(str(index)+"/"+str(steps))
                    
            t0 = time()
            
            # Get step
            if pretraining_from_disk:
                get_step(e, index, step)
            else:
                get_step_exp(index, step)
                
            t1 = time()
            
            # Get inputs
            dense_scalar_input = step['state_scalar'].to(platform)
            conv_layers_input = step['state_layers'].to(platform)
            conv_minimap_input = step['state_minimaps'].to(platform)
            
            t2 = time()

            
            
            t3 = time()

            # Foreward
            actions_out, units_out, local_out, global_out, value_out = net(dense_scalar_input, conv_layers_input, conv_minimap_input)
            
            t4 = time()
            
            # Get target and prepare output
            actions_target, actions_out = get_actions_output(step, actions_out)
            units_target, units_out = get_units_output(step, units_out)
            local_target, local_out = get_local_output(step, local_out)
            global_target, global_out = get_global_output(step, global_out)
            
            value_target = torch.FloatTensor([returns[index]]).to(platform)
            
            t5 = time()
            
            # Convert platform
            actions_target = actions_target.to(platform)
            if units_target is not None:
                units_target = units_target.to(platform)
            if local_target is not None:
                local_target = local_target.to(platform)
            global_target = global_target.to(platform)
            
            t6 = time()
            
            # Calculate loss
            loss_units = None
            loss_local = None
            loss_actions = lossfunc_actions(actions_out, actions_target) / BATCH_SIZE
            if units_target is not None:
                loss_units = lossfunc_units(units_out, units_target) / BATCH_SIZE
            if local_target is not None:
                loss_local = lossfunc_local(local_out, local_target) / BATCH_SIZE
            loss_global = lossfunc_global(global_out, global_target) / BATCH_SIZE
            
            loss_value = lossfunc_value(value_out[0][0], value_target) / BATCH_SIZE
            
            #print("Value target: "+str(returns[index])+", value out: "+str(value_out.item())+", loss: "+str(loss_value.item()))
            
            t7 = time()
            
            # Backpropagate errors
            loss_actions.backward(retain_graph=True)
            if units_target is not None:
                loss_units.backward(retain_graph=True)
            if local_target is not None:
                loss_local.backward(retain_graph=True)
            loss_global.backward(retain_graph=True)
            
            loss_value.backward(retain_graph=True)
            
            t8 = time()
            
            # Is batch end or episode end? Update weights
            batch_count += 1
            if batch_count >= BATCH_SIZE or index == (steps-1):
                global_batch_iter += 1
                batch_count = 0
                optimizer.step() # Update weights
                net.zero_grad() # Reset gradient
            
            t9 = time()
            
            # Update hidden state
            net.swap_hidden_state()
        
            t10 = time()

            # For metrics
            training_actions_loss['value'] += loss_actions.item()
            training_actions_loss['min'] = min(loss_actions.item(), training_actions_loss['min'])
            training_actions_loss['max'] = max(loss_actions.item(), training_actions_loss['max'])
            if loss_units is not None:
                training_units_loss['value'] += loss_units.item()
                amount_units_loss += 1
                training_units_loss['min'] = min(loss_units.item(), training_units_loss['min'])
                training_units_loss['max'] = max(loss_units.item(), training_units_loss['max'])
            if loss_local is not None:
                training_local_loss['value'] += loss_local.item()
                amount_local_loss += 1
                training_local_loss['min'] = min(loss_local.item(), training_local_loss['min'])
                training_local_loss['max'] = max(loss_local.item(), training_local_loss['max'])
            training_global_loss['value'] += loss_global.item()
            training_global_loss['min'] = min(loss_global.item(), training_global_loss['min'])
            training_global_loss['max'] = max(loss_global.item(), training_global_loss['max'])
        
            # Save metrics for each batch
            if batch_count == 0:
                # Calulate the real batch size
                really_batch_size = BATCH_SIZE
                if index == (steps-1):
                    really_batch_size = steps - (steps//BATCH_SIZE)*BATCH_SIZE
                writer.add_scalar('training actions loss', (training_actions_loss['value']*BATCH_SIZE)/really_batch_size, global_batch_iter)
                writer.add_scalar('training actions loss min', training_actions_loss['min'], global_batch_iter)
                writer.add_scalar('training actions loss max', training_actions_loss['max'], global_batch_iter)
                if amount_units_loss > 0:
                    writer.add_scalar('training units loss', (training_units_loss['value']*BATCH_SIZE)/amount_units_loss, global_batch_iter)
                    writer.add_scalar('training units loss min', training_units_loss['min'], global_batch_iter)
                    writer.add_scalar('training units loss max', training_units_loss['max'], global_batch_iter)
                if amount_local_loss > 0:
                    writer.add_scalar('training local loss', (training_local_loss['value']*BATCH_SIZE)/amount_local_loss, global_batch_iter)
                    writer.add_scalar('training local loss min', training_local_loss['min'], global_batch_iter)
                    writer.add_scalar('training local loss max', training_local_loss['max'], global_batch_iter)
                writer.add_scalar('training global loss', (training_global_loss['value']*BATCH_SIZE)/really_batch_size, global_batch_iter)
                writer.add_scalar('training global loss min', training_global_loss['min'], global_batch_iter)
                writer.add_scalar('training global loss max', training_global_loss['max'], global_batch_iter)
                
                print("Global loss: "+str(training_global_loss['value']/really_batch_size))
                
                # Init for metrics
                training_actions_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
                training_units_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
                training_local_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
                training_global_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
                amount_units_loss = 0
                amount_local_loss = 0
                
            # Times for metric
            get_step_time += t1-t0
            get_input_time += t2-t1
            zero_grad_time += t3-t2
            forward_time += t4-t3
            get_target_time += t5-t4
            convert_platform_time += t6-t5
            loss_time += t7-t6
            backpropagation_time += t8-t7
            update_time += t9-t8
            hidden_state_time += t10-t9
            episode_time += t10-t0
            
        
        # Print time metric for each episode
        info_string_time = "Epoch "+str(epoch)+", Episode "+str(index_e)+", Steps: "+str(steps)+"\n"
        info_string_time += "get_step_time: "+str(get_step_time)+"\n"
        info_string_time += "get_input_time: "+str(get_input_time)+"\n"
        info_string_time += "zero_grad_time: "+str(zero_grad_time)+"\n"
        info_string_time += "forward_time: "+str(forward_time)+"\n"
        info_string_time += "get_target_time: "+str(get_target_time)+"\n"
        info_string_time += "convert_platform_time: "+str(convert_platform_time)+"\n"
        info_string_time += "loss_time: "+str(loss_time)+"\n"
        info_string_time += "backpropagation_time: "+str(backpropagation_time)+"\n"
        info_string_time += "update_time: "+str(update_time)+"\n"
        info_string_time += "hidden_state_time: "+str(hidden_state_time)+"\n"
        info_string_time += "episode time: "+str(episode_time)+"\n\n"
        #print(info_string_time)
        f = open(root+"/saves/metrics/time.txt", "a")
        f.write(info_string_time)
        f.close()
    
        # Save training model
        #if index_e % 100 == 0:
        torch.save(net.state_dict(), root+'/saves/training/pre_model_'+str(epoch)+'_'+str(index_e)+'.pth')
        
        

Epoch 0
Episode 0
0/3909
Global loss: 22493.147521972656
Global loss: 22437.609008789062


KeyboardInterrupt: 

In [23]:
writer.close()

### Training - RL on-policy

In [11]:
# Function that return input and output for the network

## Used in train with experience
            
def get_actions_output_exp(state, output):
    output = F.log_softmax(output[0], dim=0) # Apply log_softmax
    index_action = pos_actions_by_name[state['action_name']]
    return output[index_action]

def get_units_output_exp(state, output):
    if state['action_name'] == 'buildUnit':
        output = F.log_softmax(output[0][3:], dim=0) # Get units output and apply log_softmax
        for x in pos_units_by_name:
            if x in step['params'] and step['params'][x] > 0.5:
                index = pos_units_by_name[x]
                if index >= 3:
                    return output[index-3]
    elif state['action_name'] == 'buildBuilding':
        output = F.log_softmax(output[0][:3], dim=0) # Get building output and apply log_softmax
        for x in pos_units_by_name:
            if x in step['params'] and step['params'][x] > 0.5:
                index = pos_units_by_name[x]
                if index < 3:
                    return output[index]
    return None

def get_local_output_exp(state, output):
    index_first_action = index_localfirst(state['action_name'])
    index_second_action = index_localsecond(state['action_name'])
    
    first_local_t = None
    second_local_t = None
    
    if index_first_action >= 0 and index_second_action < 0:
        first_local_t = output[0, index_first_action, :, :]
        first_local_t = first_local_t.view(-1) # Flat to one dim
        first_local_t = F.log_softmax(first_local_t, dim=0) # Calculate log_softmax
        first_local_t = first_local_t.view(SIDE_LAYERS, SIDE_LAYERS) # Reshape to a matrix
        x1 = int(state['params']['x1Local'])
        y1 = int(state['params']['y1Local'])
        y1 = SIDE_LAYERS -y1 -1
        first_local_t = first_local_t[y1, x1] # Select the action pixel
    if index_second_action >= 0 and index_first_action < 0:
        second_local_t = output[0, index_second_action, :, :]
        second_local_t = second_local_t.view(-1) # Flat to one dim
        second_local_t = F.log_softmax(second_local_t, dim=0) # Calculate log_softmax
        second_local_t = second_local_t.view(SIDE_LAYERS, SIDE_LAYERS) # Reshape to a matrix
        x2 = int(state['params']['x2Local'])
        y2 = int(state['params']['y2Local'])
        y2 = SIDE_LAYERS -y2 -1
        second_local_t = second_local_t[y2, x2] # Select the action pixel
        
    return first_local_t, second_local_t

def get_global_output_exp(state, output):
    ratio = SIDE_MAP/SIDE_MINIMAP
    x1 = int(state['params']['x1NextGlobal']/ratio)
    y1 = int(state['params']['y1NextGlobal']/ratio)
    x2 = int(state['params']['x2NextGlobal']/ratio)
    y2 = int(state['params']['y2NextGlobal']/ratio)

    if x1 < 0:
        x1 = 0
    if x1 >= SIDE_MINIMAP:
        x1 = SIDE_MINIMAP-1
    if x2 < 0:
        x2 = 0
    if x2 >= SIDE_MINIMAP:
        x2 = SIDE_MINIMAP-1
    if y1 < 0:
        y1 = 0
    if y1 >= SIDE_MINIMAP:
        y1 = SIDE_MINIMAP-1
    if y2 < 0:
        y2 = 0
    if y2 >= SIDE_MINIMAP:
        y2 = SIDE_MINIMAP-1
    
    y1 = SIDE_MINIMAP -y1 -1
    y2 = SIDE_MINIMAP -y2 -1
    
    index_action = index_global(state['action_name'])
    
    first_global_t = output[0, index_action*2, :, :]
    first_global_t = first_global_t.view(-1) # Flat to one dim
    first_global_t = F.log_softmax(first_global_t, dim=0) # Calculate log_softmax
    first_global_t = first_global_t.view(SIDE_MINIMAP, SIDE_MINIMAP) # Reshape to a matrix
    first_global_t = first_global_t[x1, y1] # Select the action pixel
    
    second_global_t = output[0, index_action*2+1, :, :]
    second_global_t = second_global_t.view(-1) # Flat to one dim
    second_global_t = F.log_softmax(second_global_t, dim=0) # Calculate log_softmax
    second_global_t = second_global_t.view(SIDE_MINIMAP, SIDE_MINIMAP) # Reshape to a matrix
    second_global_t = second_global_t[x2, y2] # Select the action pixel
    
    return first_global_t, second_global_t

In [12]:
# For save metrics with tensorboard

writer = SummaryWriter(root+'/saves/metrics')

# Save struct graph
net.reset_states()
net.set_value_return(True)
get_step(0,0,step)
dense_scalar_input = step['state_scalar'].to(platform)
conv_layers_input = step['state_layers'].to(platform)
conv_minimap_input = step['state_minimaps'].to(platform)
writer.add_graph(net, [dense_scalar_input, conv_layers_input, conv_minimap_input])

In [13]:
import time

net.set_value_return(True)

amount_episodes = episodes_from_exp()
set_epochs_exp(epochs)

# Count all the batches
global_batch_iter = 0

for epoch in range(epochs):
    print("Epoch "+str(epoch))
    
    for index_e in range(amount_episodes):
        print("Episode "+str(index_e))

        # Init for metrics
        training_actions_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_units_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_local1_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_local2_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_global1_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        training_global2_loss = {'value': 0.0, 'min': float('+inf'), 'max': float('-inf')}
        amount_units_loss = 0
        amount_local1_loss = 0
        amount_local2_loss = 0
        amount_global1_loss = 0
        amount_global2_loss = 0
        
        # Reset hidden state
        net.reset_states()
        
        # Prefetch whole episode
        print("Start prefetch...")
        prefetch_next_episode_exp()
        print("End prefetch...")
        
        steps = get_steps_episode_exp()
        
        # Ignore small episodes
        if steps < 10:
            print("Ignore "+str(index_e))
            continue
            
        # Rewards and partial Q (until 4 steps to the future)
        rewards, Q_partial = get_rewards_and_return_exp(GAMMA, N_ROLL_QVALUE)
        
        # Get perdicted values from net.
        # Predict V(s) for each step in the episode
        predicted_values = []
        net.set_value_return(True)
        net.set_policy_return(False)
        for index in range(steps):
            get_step_exp(index, step) # get step
            # Get inputs
            dense_scalar_input = step['state_scalar'].to(platform)
            conv_layers_input = step['state_layers'].to(platform)
            conv_minimap_input = step['state_minimaps'].to(platform)
            # Forward only value
            value_out = net(dense_scalar_input, conv_layers_input, conv_minimap_input)
            predicted_values.append(value_out.item())
        
        # Calculate Q with predicted values as baseline
        # Q(s,a) = Q_partial + gamma^n * V(sn)
        Q_values = []
        for i in range(len(Q_partial)):
            pos_v_pred = i+N_ROLL_QVALUE
            q = Q_partial[i]
            if pos_v_pred < len(Q_partial):
                q += (GAMMA ** N_ROLL_QVALUE) * predicted_values[pos_v_pred]
            Q_values.append(q)
        
        #Q_values = np.ones(len(Q_partial))
        
        # Reset hidden state again
        net.reset_states()
        
        # Reset gradient
        net.zero_grad()
        
        # Forward and backpropagation count before to update weights
        # When batch_count reach BATCH_SIZE then I update the weights
        batch_count = 0
        
        # Loop all state-action from episode
        net.set_value_return(True)
        net.set_policy_return(True)
        for index in range(steps):
            if index%500 == 0:
                print(str(index)+"/"+str(steps))

            # Get step
            get_step_exp(index, step)
            
            # Get inputs
            dense_scalar_input = step['state_scalar'].to(platform)
            conv_layers_input = step['state_layers'].to(platform)
            conv_minimap_input = step['state_minimaps'].to(platform)

            # Forward
            actions_out, units_out, local_out, global_out, value_out = net(dense_scalar_input, conv_layers_input, conv_minimap_input)
            value_out = value_out[0,0]
            
            # Get target and prepare output (apply log_softmax and return only the taken action)
            action_out_exp = get_actions_output_exp(step, actions_out)
            unit_out_exp = get_units_output_exp(step, units_out)
            local1_out, local2_out = get_local_output_exp(step, local_out)
            global1_out, global2_out = get_global_output_exp(step, global_out)
            
            # Loss value and advantage of current action
            value_target = torch.FloatTensor([Q_values[index]]).to(platform)
            loss_value_v = lossfunc_value(value_out, value_target) / BATCH_SIZE_ONPOLICY
            adv_value = value_target - value_out.detach() # Advantage = Q(s,a) - V(s)
            
            print("Value target: "+str(value_target.item())+", value out: "+str(value_out.item())+", loss: "+str(loss_value_v.item()))
            
            
            # Calculate loss
            loss_unit = None
            loss_local1 = None
            loss_local2 = None
            loss_action = -(adv_value * action_out_exp) / BATCH_SIZE_ONPOLICY
            if unit_out_exp is not None:
                loss_unit = -(adv_value * unit_out_exp) / BATCH_SIZE_ONPOLICY
            if local1_out is not None:
                loss_local1 = -(adv_value * local1_out) / BATCH_SIZE_ONPOLICY
            if local2_out is not None:
                loss_local2 = -(adv_value * local2_out) / BATCH_SIZE_ONPOLICY
            loss_global1 = -(adv_value * global1_out) / BATCH_SIZE_ONPOLICY
            loss_global2 = -(adv_value * global2_out) / BATCH_SIZE_ONPOLICY
            
            # Backpropagate errors
            loss_action.backward(retain_graph=True)
            if unit_out_exp is not None:
                loss_unit.backward(retain_graph=True)
            if loss_local1 is not None:
                loss_local1.backward(retain_graph=True)
            if loss_local2 is not None:
                loss_local2.backward(retain_graph=True)
            loss_global1.backward(retain_graph=True)
            loss_global2.backward(retain_graph=True)
            
            loss_value_v.backward(retain_graph=True)

            # Is batch end or episode end? Update weights
            batch_count += 1
            if batch_count >= BATCH_SIZE_ONPOLICY or index == (steps-1):
                global_batch_iter += 1
                batch_count = 0
                optimizer.step() # Update weights
                net.zero_grad() # Reset gradient
            
            # Update hidden state
            net.swap_hidden_state()
        
            # For metrics
            training_actions_loss['value'] += loss_action.item()
            training_actions_loss['min'] = min(loss_action.item(), training_actions_loss['min'])
            training_actions_loss['max'] = max(loss_action.item(), training_actions_loss['max'])
            if loss_unit is not None:
                training_units_loss['value'] += loss_unit.item()
                amount_units_loss += 1
                training_units_loss['min'] = min(loss_unit.item(), training_units_loss['min'])
                training_units_loss['max'] = max(loss_unit.item(), training_units_loss['max'])
            if loss_local1 is not None:
                training_local1_loss['value'] += loss_local1.item()
                amount_local1_loss += 1
                training_local1_loss['min'] = min(loss_local1.item(), training_local1_loss['min'])
                training_local1_loss['max'] = max(loss_local1.item(), training_local1_loss['max'])
            if loss_local2 is not None:
                training_local2_loss['value'] += loss_local2.item()
                amount_local2_loss += 1
                training_local2_loss['min'] = min(loss_local2.item(), training_local2_loss['min'])
                training_local2_loss['max'] = max(loss_local2.item(), training_local2_loss['max'])    
            training_global1_loss['value'] += loss_global1.item()
            training_global1_loss['min'] = min(loss_global1.item(), training_global1_loss['min'])
            training_global1_loss['max'] = max(loss_global1.item(), training_global1_loss['max'])
            training_global2_loss['value'] += loss_global2.item()
            training_global2_loss['min'] = min(loss_global2.item(), training_global2_loss['min'])
            training_global2_loss['max'] = max(loss_global2.item(), training_global2_loss['max'])
        
        # Save metrics.
        writer.add_scalar('training actions loss', training_actions_loss['value']/steps, epoch * amount_episodes + index_e)
        writer.add_scalar('training actions loss min', training_actions_loss['min'], epoch * amount_episodes + index_e)
        writer.add_scalar('training actions loss max', training_actions_loss['max'], epoch * amount_episodes + index_e)
        if amount_units_loss > 0:
            writer.add_scalar('training units loss', training_units_loss['value']/amount_units_loss, epoch * amount_episodes + index_e)
            writer.add_scalar('training units loss min', training_units_loss['min'], epoch * amount_episodes + index_e)
            writer.add_scalar('training units loss max', training_units_loss['max'], epoch * amount_episodes + index_e)
        if amount_local1_loss > 0:
            writer.add_scalar('training local1 loss', training_local1_loss['value']/amount_local1_loss, epoch * amount_episodes + index_e)
            writer.add_scalar('training local1 loss min', training_local1_loss['min'], epoch * amount_episodes + index_e)
            writer.add_scalar('training local1 loss max', training_local1_loss['max'], epoch * amount_episodes + index_e)
        if amount_local2_loss > 0:
            writer.add_scalar('training local2 loss', training_local2_loss['value']/amount_local2_loss, epoch * amount_episodes + index_e)
            writer.add_scalar('training local2 loss min', training_local2_loss['min'], epoch * amount_episodes + index_e)
            writer.add_scalar('training local2 loss max', training_local2_loss['max'], epoch * amount_episodes + index_e)
        writer.add_scalar('training global1 loss', training_global1_loss['value']/steps, epoch * amount_episodes + index_e)
        writer.add_scalar('training global1 loss min', training_global1_loss['min'], epoch * amount_episodes + index_e)
        writer.add_scalar('training global1 loss max', training_global1_loss['max'], epoch * amount_episodes + index_e)
        writer.add_scalar('training global2 loss', training_global2_loss['value']/steps, epoch * amount_episodes + index_e)
        writer.add_scalar('training global2 loss min', training_global2_loss['min'], epoch * amount_episodes + index_e)
        writer.add_scalar('training global2 loss max', training_global2_loss['max'], epoch * amount_episodes + index_e)
        
        # Save training model
        if index_e % 100 == 0:
            torch.save(net.state_dict(), root+'/saves/training/model_'+str(epoch)+'_'+str(index_e)+'.pth')

Epoch 0
Episode 0
Start prefetch...
End prefetch...
Value target: 0.03374188020825386, value out: 0.03512560576200485, loss: 2.5874276943227414e-08
Value target: 0.03374188020825386, value out: 0.035125985741615295, loss: 2.5888487797942616e-08
Value target: 0.03374188020825386, value out: 0.035125985741615295, loss: 2.5888487797942616e-08
Value target: 0.03374188020825386, value out: 0.035125985741615295, loss: 2.5888487797942616e-08
Value target: 0.03374188020825386, value out: 0.035125985741615295, loss: 2.5888487797942616e-08
Value target: 0.03374188020825386, value out: 0.035125985741615295, loss: 2.5888487797942616e-08
Value target: 0.033742841333150864, value out: 0.035125985741615295, loss: 2.585254677001103e-08
Value target: 0.03374364972114563, value out: 0.035125985741615295, loss: 2.582233804560019e-08
Value target: 0.03374364972114563, value out: 0.035125985741615295, loss: 2.582233804560019e-08
Value target: 0.03374364972114563, value out: 0.035125985741615295, loss: 2.58

Value target: -1.2693006992340088, value out: -1.321367859840393, loss: 6.1613391153514385e-06
Value target: -1.2704681158065796, value out: -1.321367859840393, loss: 5.888145096832886e-06
Value target: -1.2713301181793213, value out: -1.321367859840393, loss: 5.690398666047258e-06
Value target: -1.2713301181793213, value out: -1.321367859840393, loss: 5.690398666047258e-06
Value target: -1.2713301181793213, value out: -1.321367859840393, loss: 5.690398666047258e-06
Value target: -1.2713301181793213, value out: -1.3225831985473633, loss: 5.9701774262066465e-06
Value target: -1.2713301181793213, value out: -1.323480486869812, loss: 6.181047410791507e-06
Value target: -1.2716422080993652, value out: -1.323480486869812, loss: 6.107288754719775e-06
Value target: -1.2772042751312256, value out: -1.323480486869812, loss: 4.867017651122296e-06
Value target: -1.2772042751312256, value out: -1.323480486869812, loss: 4.867017651122296e-06
Value target: -1.2772042751312256, value out: -1.32348048

Value target: -1.2835198640823364, value out: 0.016318175941705704, loss: 0.004911567084491253
Value target: -1.2836633920669556, value out: 0.016318175941705704, loss: 0.004912651609629393
Value target: -1.2838716506958008, value out: 0.016318175941705704, loss: 0.00491422601044178
Value target: -1.2838716506958008, value out: 0.016318175941705704, loss: 0.00491422601044178
Value target: -1.2838716506958008, value out: 0.016318175941705704, loss: 0.00491422601044178
Value target: -1.285191535949707, value out: 0.016313888132572174, loss: 0.0049241757951676846
Value target: -1.2852864265441895, value out: 0.01631481572985649, loss: 0.004924900829792023
Value target: -1.2853069305419922, value out: 0.01631481572985649, loss: 0.0049250563606619835
Value target: -1.2853069305419922, value out: 0.01631481572985649, loss: 0.0049250563606619835
Value target: -1.2853069305419922, value out: 0.01630561798810959, loss: 0.004924985580146313
Value target: -1.2853069305419922, value out: 0.0163065

Value target: -1.2924500703811646, value out: -0.10532820969820023, loss: 0.005682493094354868
Value target: -1.2925422191619873, value out: -0.10532740503549576, loss: 0.0056833834387362
Value target: -1.2925978899002075, value out: -0.10532472282648087, loss: 0.005683941766619682
Value target: -1.292717456817627, value out: -0.10532199591398239, loss: 0.005685112439095974
Value target: -1.292717456817627, value out: -0.10531873255968094, loss: 0.005685143172740936
Value target: -1.292717456817627, value out: -0.10531602054834366, loss: 0.005685169715434313
Value target: -1.292717456817627, value out: -0.10531441122293472, loss: 0.005685185547918081
Value target: -1.2927360534667969, value out: -0.10531080514192581, loss: 0.005685397889465094
Value target: -1.2927360534667969, value out: -0.10531080514192581, loss: 0.005685397889465094
Value target: -1.2927360534667969, value out: -0.10531080514192581, loss: 0.005685397889465094
Value target: -1.2927360534667969, value out: -0.1053108

Value target: -1.2945327758789062, value out: -1.4813783168792725, loss: 0.00018973508849740028
Value target: -1.2946245670318604, value out: -1.4866586923599243, loss: 0.00020041906100232154
Value target: -1.2945870161056519, value out: -1.481587290763855, loss: 0.00019004946807399392
Value target: -1.2946984767913818, value out: -1.4814107418060303, loss: 0.00018946452473755926
Value target: -1.2956403493881226, value out: -1.4866784811019897, loss: 0.00019834547128994018
Value target: -1.295130968093872, value out: -1.486695647239685, loss: 0.0001994403573917225
Value target: -1.295366644859314, value out: -1.4815361499786377, loss: 0.0001883645891211927
Value target: -1.295285940170288, value out: -1.4867093563079834, loss: 0.00019914633594453335
Value target: -1.2958933115005493, value out: -1.481641411781311, loss: 0.0001875128218671307
Value target: -1.29537832736969, value out: -0.7700993418693542, loss: 0.0018152500269934535
Value target: -1.2959636449813843, value out: -0.767

Value target: -1.301673173904419, value out: -0.50699782371521, loss: 0.0071762376464903355
Value target: -1.301673173904419, value out: -0.50699782371521, loss: 0.0071762376464903355
Value target: -1.301673173904419, value out: -0.50699782371521, loss: 0.0071762376464903355
Value target: -1.301673173904419, value out: -0.5070151090621948, loss: 0.007175926119089127
Value target: -1.3020001649856567, value out: -0.5070151090621948, loss: 0.007181832101196051
Value target: -1.3016164302825928, value out: -0.5070151090621948, loss: 0.007174901198595762
Value target: -1.3028407096862793, value out: -0.5070151090621948, loss: 0.007197027560323477
Value target: -1.3024808168411255, value out: -0.5070151090621948, loss: 0.007190519478172064
Value target: -1.3020673990249634, value out: -0.5069987177848816, loss: 0.007183343172073364
Value target: -1.3033920526504517, value out: -0.5069916844367981, loss: 0.0072074271738529205
Value target: -1.3011069297790527, value out: -0.5069161057472229,

Value target: -2.7315971851348877, value out: -2.8436105251312256, loss: 0.00016955389583017677
Value target: -2.7315971851348877, value out: -2.8433101177215576, loss: 0.0001686456671450287
Value target: -2.7316040992736816, value out: -2.844428539276123, loss: 0.00017201830632984638
Value target: -2.7316040992736816, value out: -2.8436481952667236, loss: 0.0001696470135357231
Value target: -2.731605052947998, value out: -2.8436481952667236, loss: 0.00016964413225650787
Value target: -2.731605052947998, value out: -2.8436481952667236, loss: 0.00016964413225650787
Value target: -2.731605052947998, value out: -2.843655586242676, loss: 0.0001696665131021291
Value target: -2.7314929962158203, value out: -2.843655586242676, loss: 0.00017000603838823736
Value target: -2.732086420059204, value out: -2.843656539916992, loss: 0.0001682147558312863
Value target: -2.732086420059204, value out: -2.843656539916992, loss: 0.0001682147558312863
Value target: -2.732086420059204, value out: -2.8436565

Value target: -0.23088446259498596, value out: -0.24035543203353882, loss: 2.038619584254775e-07
Value target: -0.23090878129005432, value out: -0.24035543203353882, loss: 2.028163805789518e-07
Value target: -0.23088115453720093, value out: -0.24035543203353882, loss: 2.0400437961143325e-07
Value target: -0.23084302246570587, value out: -0.24035543203353882, loss: 2.0564984026805178e-07
Value target: -0.2308708131313324, value out: -0.24035543203353882, loss: 2.0444998938273784e-07
Value target: -0.2308708131313324, value out: -0.23772352933883667, loss: 1.1509735031722812e-07
Value target: -0.2308708131313324, value out: -0.23769468069076538, loss: 1.1413031586471334e-07
Value target: -0.2308708131313324, value out: -0.23765519261360168, loss: 1.1281325384970842e-07
Value target: -0.2308708131313324, value out: -0.23768329620361328, loss: 1.137498202297138e-07
Value target: -0.23086878657341003, value out: -0.23768329620361328, loss: 1.1381750653072231e-07
Value target: -0.23086130619

Value target: -0.2308923751115799, value out: -0.21511274576187134, loss: 7.238276680254785e-07
Value target: -0.2308923751115799, value out: -0.21511203050613403, loss: 7.23893208487425e-07
Value target: -0.23089231550693512, value out: -0.2151120901107788, loss: 7.238822945510037e-07
Value target: -0.23093703389167786, value out: -0.2151120901107788, loss: 7.279908800228441e-07
Value target: -0.23089094460010529, value out: -0.2151120901107788, loss: 7.237565000650648e-07
Value target: -0.23093514144420624, value out: -0.2151120901107788, loss: 7.278167686308734e-07
Value target: -0.23089145123958588, value out: -0.21511191129684448, loss: 7.238194257297437e-07
Value target: -0.23089225590229034, value out: -0.21515631675720215, loss: 7.19824924999557e-07
Value target: -0.23089534044265747, value out: -0.21511048078536987, loss: 7.243075401675014e-07
Value target: -0.2309015840291977, value out: -0.21515455842018127, loss: 7.208396368696413e-07
Value target: -0.23089353740215302, val

Value target: -0.23092180490493774, value out: -0.23486097157001495, loss: 6.25686880084686e-08
Value target: -0.2309219092130661, value out: -0.23486174643039703, loss: 6.258998297425933e-08
Value target: -0.2309219092130661, value out: -0.23486174643039703, loss: 6.258998297425933e-08
Value target: -0.2309219092130661, value out: -0.23486174643039703, loss: 6.258998297425933e-08
Value target: -0.2309219092130661, value out: -0.23486174643039703, loss: 6.258998297425933e-08
Value target: -0.23085767030715942, value out: -0.2348618358373642, loss: 6.465057111881833e-08
Value target: -0.23085899651050568, value out: -0.2348618358373642, loss: 6.460775381356143e-08
Value target: -0.23092225193977356, value out: -0.2348618358373642, loss: 6.258193963049052e-08
Value target: -0.23085778951644897, value out: -0.2348618358373642, loss: 6.464671997719051e-08
Value target: -0.23084032535552979, value out: -0.23479245603084564, loss: 6.298119359371412e-08
Value target: -0.23085114359855652, val

Value target: -0.23087003827095032, value out: -0.2327212691307068, loss: 1.862530218943448e-08
Value target: -0.23093284666538239, value out: -0.23272180557250977, loss: 1.7393338325177865e-08
Value target: -0.23087754845619202, value out: -0.23265340924263, loss: 1.7139573316171663e-08
Value target: -0.23093296587467194, value out: -0.23272228240966797, loss: 1.7400292762204117e-08
Value target: -0.23086683452129364, value out: -0.23265445232391357, loss: 1.7367268512202827e-08
Value target: -0.23093365132808685, value out: -0.23199942708015442, loss: 7.472880980685659e-09
Value target: -0.2308586984872818, value out: -0.2319384217262268, loss: 7.66975194466113e-09
Value target: -0.230934277176857, value out: -0.2319994568824768, loss: 7.46452499811312e-09
Value target: -0.23085521161556244, value out: -0.23192757368087769, loss: 7.565529536179838e-09
Value target: -0.23093493282794952, value out: -0.23200005292892456, loss: 7.463690110398602e-09
Value target: -0.23084862530231476, v

Value target: -0.2309948056936264, value out: -0.23118315637111664, loss: 4.031361355849583e-10
Value target: -0.2309948056936264, value out: -0.2312024086713791, loss: 4.897613425924874e-10
Value target: -0.230995774269104, value out: -0.2312024086713791, loss: 4.852020452084105e-10
Value target: -0.23099617660045624, value out: -0.2312024086713791, loss: 4.833143885107916e-10
Value target: -0.23103487491607666, value out: -0.2312024086713791, loss: 3.189495334510184e-10
Value target: -0.23099486529827118, value out: -0.2312024086713791, loss: 4.894801231003498e-10
Value target: -0.2309764176607132, value out: -0.23120252788066864, loss: 5.809753789165484e-10
Value target: -0.23098409175872803, value out: -0.23120425641536713, loss: 5.50823608946871e-10
Value target: -0.2309236377477646, value out: -0.23124437034130096, loss: 1.1689703649508942e-09
Value target: -0.2309502512216568, value out: -0.23120485246181488, loss: 7.366112719786599e-10
Value target: -0.23094642162322998, value 

Value target: -0.2521854639053345, value out: -0.2625250220298767, loss: 1.4446819704971858e-06
Value target: -0.2521854639053345, value out: -0.26257508993148804, loss: 1.4587071746063884e-06
Value target: -0.25218647718429565, value out: -0.2625286877155304, loss: 1.4454233223659685e-06
Value target: -0.25218647718429565, value out: -0.26253020763397217, loss: 1.4458481700785342e-06
Value target: -0.2521866261959076, value out: -0.26253020763397217, loss: 1.4458064470090903e-06
Value target: -0.2521866261959076, value out: -0.26253020763397217, loss: 1.4458064470090903e-06
Value target: -0.2521866261959076, value out: -0.26253125071525574, loss: 1.4460980537478463e-06
Value target: -0.25222715735435486, value out: -0.26253125071525574, loss: 1.4347883734444622e-06
Value target: -0.25221630930900574, value out: -0.2625313997268677, loss: 1.4378526884684106e-06
Value target: -0.25221630930900574, value out: -0.2625313997268677, loss: 1.4378526884684106e-06
Value target: -0.252216309309

Value target: -0.2845408618450165, value out: -0.2962128221988678, loss: 3.096241982802894e-07
Value target: -0.2845458984375, value out: -0.2962128221988678, loss: 3.0935706263335305e-07
Value target: -0.28454166650772095, value out: -0.2962128221988678, loss: 3.095815372944344e-07
Value target: -0.2845362424850464, value out: -0.2962128221988678, loss: 3.0986933552412665e-07
Value target: -0.28458666801452637, value out: -0.2962128221988678, loss: 3.071987464409176e-07
Value target: -0.28458666801452637, value out: -0.29613053798675537, loss: 3.2661995419402956e-07
Value target: -0.28458666801452637, value out: -0.2961260974407196, loss: 3.263687347043742e-07
Value target: -0.28458666801452637, value out: -0.2961204946041107, loss: 3.2605186106593465e-07
Value target: -0.28458666801452637, value out: -0.29617294669151306, loss: 3.2902417501645687e-07
Value target: -0.28458747267723083, value out: -0.29617294669151306, loss: 3.289784729076928e-07
Value target: -0.2845848798751831, val

Value target: -0.28465571999549866, value out: -0.29603737592697144, loss: 3.765758549434395e-07
Value target: -0.28465571999549866, value out: -0.29603874683380127, loss: 3.766665486182319e-07
Value target: -0.28465673327445984, value out: -0.296038955450058, loss: 3.766133431781782e-07
Value target: -0.2846669852733612, value out: -0.296038955450058, loss: 3.7593520119116874e-07
Value target: -0.2846575677394867, value out: -0.296038955450058, loss: 3.765580913750455e-07
Value target: -0.2846708297729492, value out: -0.296038955450058, loss: 3.7568105426544207e-07
Value target: -0.2846589684486389, value out: -0.29604005813598633, loss: 3.765383667087008e-07
Value target: -0.28466132283210754, value out: -0.2960507273674011, loss: 3.7708878153353e-07
Value target: -0.28466108441352844, value out: -0.29604095220565796, loss: 3.764575353670807e-07
Value target: -0.2846601903438568, value out: -0.29605475068092346, loss: 3.7743023995062686e-07
Value target: -0.28466343879699707, value o

Value target: -0.28474006056785583, value out: -0.2957359552383423, loss: 4.875391255154682e-07
Value target: -0.28474006056785583, value out: -0.2957359552383423, loss: 4.875391255154682e-07
Value target: -0.28474006056785583, value out: -0.2957359552383423, loss: 4.875391255154682e-07
Value target: -0.2847307622432709, value out: -0.29573631286621094, loss: 4.883957558377006e-07
Value target: -0.28473368287086487, value out: -0.29573631286621094, loss: 4.881365498476953e-07
Value target: -0.2847411334514618, value out: -0.29573631286621094, loss: 4.874756882600195e-07
Value target: -0.28473272919654846, value out: -0.29573631286621094, loss: 4.88221189698379e-07
Value target: -0.28472819924354553, value out: -0.2957266569137573, loss: 4.877663855040737e-07
Value target: -0.2847324311733246, value out: -0.2957296371459961, loss: 4.876553703070385e-07
Value target: -0.284729927778244, value out: -0.29573747515678406, loss: 4.885729936177086e-07
Value target: -0.2847181260585785, value 

Value target: -0.28477010130882263, value out: -0.29542404413223267, loss: 6.168831419017806e-07
Value target: -0.2847823202610016, value out: -0.2952214181423187, loss: 7.16939268841088e-07
Value target: -0.284767746925354, value out: -0.2952188551425934, loss: 7.185898880379682e-07
Value target: -0.2847839593887329, value out: -0.2952227294445038, loss: 7.168942488533503e-07
Value target: -0.2847672998905182, value out: -0.2952115833759308, loss: 7.176516874096706e-07
Value target: -0.28478556871414185, value out: -0.29522445797920227, loss: 7.169106197579822e-07
Value target: -0.28476682305336, value out: -0.295209139585495, loss: 7.173813969529874e-07
Value target: -0.28477680683135986, value out: -0.295226126909256, loss: 7.183439834079763e-07
Value target: -0.28478729724884033, value out: -0.2952086627483368, loss: 7.145056315494003e-07
Value target: -0.28476792573928833, value out: -0.2952277362346649, loss: 7.197870672825957e-07
Value target: -0.2847888767719269, value out: -0.

Value target: -0.28493592143058777, value out: -0.29481297731399536, loss: 1.1085936648669303e-06
Value target: -0.2849181592464447, value out: -0.29481297731399536, loss: 1.1125844139314722e-06
Value target: -0.2849319279193878, value out: -0.294813334941864, loss: 1.1095704621766345e-06
Value target: -0.2849302887916565, value out: -0.2948087453842163, loss: 1.1089080089732306e-06
Value target: -0.28492504358291626, value out: -0.2948213219642639, loss: 1.11291285520565e-06
Value target: -0.2849237024784088, value out: -0.29480302333831787, loss: 1.1091020724052214e-06
Value target: -0.28493595123291016, value out: -0.29481711983680725, loss: 1.1095170293629053e-06
Value target: -0.2849214971065521, value out: -0.2948153614997864, loss: 1.1123698868686915e-06
Value target: -0.2849402129650116, value out: -0.2948099374771118, loss: 1.1069483889514231e-06
Value target: -0.2849368453025818, value out: -0.294808566570282, loss: 1.107396428778884e-06
Value target: -0.2849423885345459, val

Value target: -0.2929626405239105, value out: -0.30497977137565613, loss: 1.9515059648256283e-06
Value target: -0.2929801642894745, value out: -0.30497977137565613, loss: 1.9458186670817668e-06
Value target: -0.29297271370887756, value out: -0.30498006939888, loss: 1.9483322830637917e-06
Value target: -0.29297271370887756, value out: -0.30498006939888, loss: 1.9483322830637917e-06
Value target: -0.29297271370887756, value out: -0.30498006939888, loss: 1.9483322830637917e-06
Value target: -0.29297271370887756, value out: -0.3049983084201813, loss: 1.9542558220564388e-06
Value target: -0.29297271370887756, value out: -0.3049905598163605, loss: 1.9517383407219313e-06
Value target: -0.29296594858169556, value out: -0.3049905598163605, loss: 1.9539361346687656e-06
Value target: -0.2929781973361969, value out: -0.3049905598163605, loss: 1.94995755009586e-06
Value target: -0.2929789423942566, value out: -0.3049905598163605, loss: 1.9497156245051883e-06
Value target: -0.29302310943603516, valu

Value target: -0.3219568133354187, value out: -0.33493396639823914, loss: 4.1276106799159606e-07
Value target: -0.3219568133354187, value out: -0.33492588996887207, loss: 4.122474308587698e-07
Value target: -0.3219568133354187, value out: -0.33494502305984497, loss: 4.1346467583025515e-07
Value target: -0.3219572603702545, value out: -0.33494502305984497, loss: 4.134362256991153e-07
Value target: -0.32195788621902466, value out: -0.33494502305984497, loss: 4.133963784624939e-07
Value target: -0.32195788621902466, value out: -0.33494502305984497, loss: 4.133963784624939e-07
Value target: -0.32195788621902466, value out: -0.33494502305984497, loss: 4.133963784624939e-07
Value target: -0.32195788621902466, value out: -0.3349454402923584, loss: 4.134229527608113e-07
Value target: -0.32195788621902466, value out: -0.3349460959434509, loss: 4.1346467583025515e-07
Value target: -0.3219638168811798, value out: -0.3349460959434509, loss: 4.1308720710730995e-07
Value target: -0.32196420431137085

Value target: -0.32200953364372253, value out: -0.31245478987693787, loss: 2.6538700126366166e-07
Value target: -0.3220010995864868, value out: -0.3124553859233856, loss: 2.648856138875999e-07
Value target: -0.3220021426677704, value out: -0.3124622404575348, loss: 2.645631980158214e-07
Value target: -0.3220023214817047, value out: -0.31245583295822144, loss: 2.649286159339681e-07
Value target: -0.32200324535369873, value out: -0.3124643862247467, loss: 2.645053314154211e-07
Value target: -0.32200342416763306, value out: -0.3124568462371826, loss: 2.649335897331184e-07
Value target: -0.3220072090625763, value out: -0.3124578595161438, loss: 2.650874364462652e-07
Value target: -0.322004109621048, value out: -0.31245800852775574, loss: 2.649071006999293e-07
Value target: -0.3220078945159912, value out: -0.3124587833881378, loss: 2.6507419192967063e-07
Value target: -0.32200583815574646, value out: -0.3124590814113617, loss: 2.6494350890970964e-07
Value target: -0.3220062851905823, value 

Value target: -0.32205161452293396, value out: -0.3207132816314697, loss: 7.222317854882476e-09
Value target: -0.3220510482788086, value out: -0.3207132816314697, loss: 7.216207631444149e-09
Value target: -0.3220512866973877, value out: -0.3207132816314697, loss: 7.218780240236811e-09
Value target: -0.32205015420913696, value out: -0.3207102417945862, loss: 7.239375765522027e-09
Value target: -0.3220491111278534, value out: -0.3207124173641205, loss: 7.2046377752599255e-09
Value target: -0.32204583287239075, value out: -0.32071399688720703, loss: 7.152367142992944e-09
Value target: -0.32202622294425964, value out: -0.32071202993392944, loss: 6.964125720543279e-09
Value target: -0.32205232977867126, value out: -0.3207108974456787, loss: 7.2558092867325286e-09
Value target: -0.3220618665218353, value out: -0.32070988416671753, loss: 7.3703878555875235e-09
Value target: -0.32200920581817627, value out: -0.32070690393447876, loss: 6.838670074671427e-09
Value target: -0.3220326006412506, va

Value target: -0.3220762610435486, value out: -0.32472848892211914, loss: 4.62783731336458e-08
Value target: -0.3220706582069397, value out: -0.3247252106666565, loss: 4.635953132492432e-08
Value target: -0.3220771849155426, value out: -0.32472944259643555, loss: 4.627941407875369e-08
Value target: -0.32207027077674866, value out: -0.32472217082977295, loss: 4.626693694831374e-08
Value target: -0.3220764398574829, value out: -0.32473045587539673, loss: 4.6340794312982325e-08
Value target: -0.3220783472061157, value out: -0.32472294569015503, loss: 4.6012505805492765e-08
Value target: -0.32207190990448, value out: -0.32473140954971313, loss: 4.6532488084949364e-08
Value target: -0.32207930088043213, value out: -0.3247225284576416, loss: 4.596481417706855e-08
Value target: -0.3220718502998352, value out: -0.324728786945343, loss: 4.644284601340587e-08
Value target: -0.32208025455474854, value out: -0.32473254203796387, loss: 4.628045502386158e-08
Value target: -0.3220798075199127, value 

Value target: -0.32211658358573914, value out: -0.3300784230232239, loss: 7.203510108411137e-07
Value target: -0.32212021946907043, value out: -0.3300786018371582, loss: 7.197256195468071e-07
Value target: -0.32211825251579285, value out: -0.33006975054740906, loss: 7.18480919204012e-07
Value target: -0.3221283257007599, value out: -0.3300774395465851, loss: 7.180501597758848e-07
Value target: -0.32212039828300476, value out: -0.33006972074508667, loss: 7.180878469625895e-07
Value target: -0.3221350312232971, value out: -0.3300721347332001, loss: 7.15881981250277e-07
Value target: -0.3221282958984375, value out: -0.33007103204727173, loss: 7.168983984229271e-07
Value target: -0.32212868332862854, value out: -0.3300814628601074, loss: 7.187125561358698e-07
Value target: -0.3221304714679718, value out: -0.3164919912815094, loss: 5.677225090039428e-07
Value target: -0.3221324682235718, value out: -0.3165069818496704, loss: 5.651089054481417e-07
Value target: -0.3221327066421509, value out

Value target: -0.3367297351360321, value out: -0.35053837299346924, loss: 2.5767362785700243e-06
Value target: -0.3367297351360321, value out: -0.35053837299346924, loss: 2.5767362785700243e-06
Value target: -0.3367297351360321, value out: -0.350547194480896, loss: 2.580029558885144e-06
Value target: -0.3367297351360321, value out: -0.35054251551628113, loss: 2.5782824195630383e-06
Value target: -0.33672723174095154, value out: -0.35054251551628113, loss: 2.5792171527427854e-06
Value target: -0.33673352003097534, value out: -0.35054251551628113, loss: 2.5768697469175095e-06
Value target: -0.3367345333099365, value out: -0.35054251551628113, loss: 2.5764916244952474e-06
Value target: -0.3367639183998108, value out: -0.35054251551628113, loss: 2.5655372155597433e-06
Value target: -0.33677002787590027, value out: -0.350539892911911, loss: 2.5622864541219315e-06
Value target: -0.3367815911769867, value out: -0.3493499159812927, loss: 3.7610188883263618e-06
Value target: -0.3367787301540375

Value target: -0.37729889154434204, value out: -0.3926480710506439, loss: 5.774444389317068e-07
Value target: -0.3772980868816376, value out: -0.3926480710506439, loss: 5.775049771727936e-07
Value target: -0.3772980868816376, value out: -0.3926480710506439, loss: 5.775049771727936e-07
Value target: -0.3772980868816376, value out: -0.3926480710506439, loss: 5.775049771727936e-07
Value target: -0.3772980868816376, value out: -0.3926488757133484, loss: 5.775655154138803e-07
Value target: -0.3772980868816376, value out: -0.39264804124832153, loss: 5.775027034360392e-07
Value target: -0.37730491161346436, value out: -0.39264804124832153, loss: 5.769892936768883e-07
Value target: -0.37730494141578674, value out: -0.39264804124832153, loss: 5.769870767835528e-07
Value target: -0.37730494141578674, value out: -0.39264804124832153, loss: 5.769870767835528e-07
Value target: -0.37730512022972107, value out: -0.39264804124832153, loss: 5.769736048932828e-07
Value target: -0.37730512022972107, valu

Value target: -0.37731093168258667, value out: -0.3921213448047638, loss: 6.376405394803442e-07
Value target: -0.37731480598449707, value out: -0.39212173223495483, loss: 6.373402925419214e-07
Value target: -0.37731096148490906, value out: -0.39211905002593994, loss: 6.374403938025353e-07
Value target: -0.37731507420539856, value out: -0.3921223282814026, loss: 6.373684868776763e-07
Value target: -0.37731075286865234, value out: -0.3921220898628235, loss: 6.377200634233304e-07
Value target: -0.3773108124732971, value out: -0.391859233379364, loss: 6.783863568671222e-07
Value target: -0.3773108124732971, value out: -0.3918552100658417, loss: 6.780112471460598e-07
Value target: -0.3773108124732971, value out: -0.3918594419956207, loss: 6.784058541597915e-07
Value target: -0.3773108124732971, value out: -0.39185500144958496, loss: 6.779918066968094e-07
Value target: -0.3773108124732971, value out: -0.39185500144958496, loss: 6.779918066968094e-07
Value target: -0.3773108124732971, value o

Value target: -0.377315878868103, value out: -0.391237735748291, loss: 7.815245339770627e-07
Value target: -0.3773000240325928, value out: -0.39123278856277466, loss: 7.827496801837697e-07
Value target: -0.3773263096809387, value out: -0.391238272190094, loss: 7.804140977896168e-07
Value target: -0.37733617424964905, value out: -0.3912314176559448, loss: 7.785394586790062e-07
Value target: -0.3772786259651184, value out: -0.39122796058654785, loss: 7.846126095500949e-07
Value target: -0.37730899453163147, value out: -0.3912113904953003, loss: 7.79341178258619e-07
Value target: -0.3773209750652313, value out: -0.3912386894226074, loss: 7.810595548107813e-07
Value target: -0.37732604146003723, value out: -0.3912489414215088, loss: 7.816416882633348e-07
Value target: -0.37731853127479553, value out: -0.3911893367767334, loss: 7.758033575555601e-07
Value target: -0.3773263096809387, value out: -0.39122068881988525, loss: 7.784425974932674e-07
Value target: -0.3773263096809387, value out: -

Value target: -0.3773322105407715, value out: -0.38998472690582275, loss: 1.053198502631858e-06
Value target: -0.3773265480995178, value out: -0.3899890184402466, loss: 1.0548562840995146e-06
Value target: -0.37733253836631775, value out: -0.3899829387664795, loss: 1.0528463008085964e-06
Value target: -0.37732502818107605, value out: -0.3899838924407959, loss: 1.0542555628489936e-06
Value target: -0.3773328363895416, value out: -0.3899896740913391, loss: 1.053918026627798e-06
Value target: -0.37732815742492676, value out: -0.3899838328361511, loss: 1.053724531629996e-06
Value target: -0.3773331344127655, value out: -0.3899899125099182, loss: 1.0539081358729163e-06
Value target: -0.37732526659965515, value out: -0.38998234272003174, loss: 1.053957703334163e-06
Value target: -0.37732556462287903, value out: -0.3899902105331421, loss: 1.0552188314250088e-06
Value target: -0.37733298540115356, value out: -0.38998550176620483, loss: 1.053198502631858e-06
Value target: -0.37732994556427, val

Value target: -0.3773662745952606, value out: -0.38887298107147217, loss: 1.5045943655422889e-06
Value target: -0.37735486030578613, value out: -0.3888905644416809, loss: 1.5121872820600402e-06
Value target: -0.3773563802242279, value out: -0.38890165090560913, loss: 1.5146963505685562e-06
Value target: -0.377361923456192, value out: -0.3880407214164734, loss: 2.036370233327034e-06
Value target: -0.37735986709594727, value out: -0.3880630433559418, loss: 2.0456782294786535e-06
Value target: -0.37735962867736816, value out: -0.38805124163627625, loss: 2.041260586338467e-06
Value target: -0.3773626387119293, value out: -0.3880529999732971, loss: 2.0407826468726853e-06
Value target: -0.3773603141307831, value out: -0.38805896043777466, loss: 2.043947233687504e-06
Value target: -0.377360463142395, value out: -0.38805675506591797, loss: 2.0430477434274508e-06
Value target: -0.377359539270401, value out: -0.3880564868450165, loss: 2.0432980818441138e-06
Value target: -0.37735843658447266, va

Value target: -0.38808178901672363, value out: -0.40400221943855286, loss: 3.425136583246058e-06
Value target: -0.3880864977836609, value out: -0.40400221943855286, loss: 3.423110911171534e-06
Value target: -0.38808542490005493, value out: -0.40400221943855286, loss: 3.423572479732684e-06
Value target: -0.3880884051322937, value out: -0.40400221943855286, loss: 3.4222903195768595e-06
Value target: -0.38809528946876526, value out: -0.40400105714797974, loss: 3.4188306017313153e-06
Value target: -0.3881004750728607, value out: -0.40256088972091675, loss: 4.9786567615228705e-06
Value target: -0.3880963623523712, value out: -0.402559757232666, loss: 4.980709491064772e-06
Value target: -0.38809657096862793, value out: -0.4025627374649048, loss: 4.982618520443793e-06
Value target: -0.3880976736545563, value out: -0.40256983041763306, loss: 4.986745807400439e-06
Value target: -0.38809654116630554, value out: -0.40257519483566284, loss: 4.991224159311969e-06
Value target: -0.38809654116630554,

Value target: -0.4439224302768707, value out: -0.4615543484687805, loss: 7.61971932661254e-07
Value target: -0.4439224302768707, value out: -0.4615533947944641, loss: 7.618895097039058e-07
Value target: -0.443932443857193, value out: -0.4615533947944641, loss: 7.610242960254254e-07
Value target: -0.44393226504325867, value out: -0.4615533947944641, loss: 7.610398142787744e-07
Value target: -0.44393226504325867, value out: -0.4615533947944641, loss: 7.610398142787744e-07
Value target: -0.4439323842525482, value out: -0.4615533947944641, loss: 7.610294687765418e-07
Value target: -0.4439323842525482, value out: -0.4615638256072998, loss: 7.619307211825799e-07
Value target: -0.4439323842525482, value out: -0.46156370639801025, loss: 7.619203756803472e-07
Value target: -0.44392529129981995, value out: -0.46156370639801025, loss: 7.625336024830176e-07
Value target: -0.44392475485801697, value out: -0.46156376600265503, loss: 7.625851026205055e-07
Value target: -0.4439264237880707, value out:

Value target: -0.44394412636756897, value out: -0.46011486649513245, loss: 7.601535685353156e-07
Value target: -0.443938672542572, value out: -0.46011242270469666, loss: 7.604366487612424e-07
Value target: -0.443938672542572, value out: -0.45941683650016785, loss: 7.678639803998522e-07
Value target: -0.443938672542572, value out: -0.4594096839427948, loss: 7.671545176890504e-07
Value target: -0.443938672542572, value out: -0.45941677689552307, loss: 7.678580686842906e-07
Value target: -0.443938672542572, value out: -0.45941099524497986, loss: 7.672845185879851e-07
Value target: -0.443938672542572, value out: -0.45941099524497986, loss: 7.672845185879851e-07
Value target: -0.443938672542572, value out: -0.45941099524497986, loss: 7.672845185879851e-07
Value target: -0.443938672542572, value out: -0.45941099524497986, loss: 7.672845185879851e-07
Value target: -0.44393855333328247, value out: -0.45941099524497986, loss: 7.672963420191081e-07
Value target: -0.4439442455768585, value out: -

Value target: -0.4439714848995209, value out: -0.4579637944698334, loss: 7.894545319686586e-07
Value target: -0.4439103901386261, value out: -0.45795968174934387, loss: 7.958975629662746e-07
Value target: -0.44394806027412415, value out: -0.4579441249370575, loss: 7.898782996562659e-07
Value target: -0.44396308064460754, value out: -0.4579755365848541, loss: 7.91729462434887e-07
Value target: -0.4439654052257538, value out: -0.4579811990261078, loss: 7.921067322058661e-07
Value target: -0.44395917654037476, value out: -0.45791855454444885, loss: 7.857428272473044e-07
Value target: -0.4439658224582672, value out: -0.45795729756355286, loss: 7.893603424236062e-07
Value target: -0.4439658224582672, value out: -0.45797255635261536, loss: 7.910830390756018e-07
Value target: -0.4439658224582672, value out: -0.45797494053840637, loss: 7.913523631941644e-07
Value target: -0.4439658224582672, value out: -0.4579685628414154, loss: 7.906319865469413e-07
Value target: -0.4439658224582672, value ou

Value target: -0.4439627230167389, value out: -0.45541882514953613, loss: 8.634360710857436e-07
Value target: -0.4439801275730133, value out: -0.4554275870323181, loss: 8.621337883596425e-07
Value target: -0.44397154450416565, value out: -0.455413818359375, loss: 8.613528734713327e-07
Value target: -0.44398075342178345, value out: -0.4554281234741211, loss: 8.621202596259536e-07
Value target: -0.4439655840396881, value out: -0.45541155338287354, loss: 8.619092568551423e-07
Value target: -0.4439680576324463, value out: -0.4554286599159241, loss: 8.641144404464285e-07
Value target: -0.44398075342178345, value out: -0.45542001724243164, loss: 8.608997177361744e-07
Value target: -0.44396907091140747, value out: -0.4554293155670166, loss: 8.640605528853484e-07
Value target: -0.4439813494682312, value out: -0.4554145336151123, loss: 8.599848797530285e-07
Value target: -0.44396787881851196, value out: -0.4554164409637451, loss: 8.622998848295538e-07
Value target: -0.443981796503067, value out

Value target: -0.44400304555892944, value out: -0.4519840478897095, loss: 1.1374356745363912e-06
Value target: -0.4440051019191742, value out: -0.45200031995773315, loss: 1.141491338785272e-06
Value target: -0.4440041780471802, value out: -0.4519961476325989, loss: 1.1405639952499769e-06
Value target: -0.44400373101234436, value out: -0.4519979953765869, loss: 1.1412190588089288e-06
Value target: -0.44400033354759216, value out: -0.4519992470741272, loss: 1.1425468073866796e-06
Value target: -0.4440001845359802, value out: -0.4520013928413391, loss: 1.1432024393798201e-06
Value target: -0.4440050423145294, value out: -0.4520002007484436, loss: 1.1414743994464516e-06
Value target: -0.4439970850944519, value out: -0.45200008153915405, loss: 1.1437135754022165e-06
Value target: -0.4440033733844757, value out: -0.451996386051178, loss: 1.140861627391132e-06
Value target: -0.4439944922924042, value out: -0.4519962668418884, loss: 1.1433643294367357e-06
Value target: -0.44400012493133545, va

FileNotFoundError: [Errno 2] No such file or directory: './episodes'

In [15]:
writer.close()

## Save model

In [14]:

net.reset_states()

net.set_value_return(True)
net.set_policy_return(True)
    
torch.save(net.state_dict(), root+'/saves/final/model.pth')

# Save model for C++ can read it
net.reset_states()
get_step(0,0,step)
dense_scalar_input = step['state_scalar'].to(platform)
conv_layers_input = step['state_layers'].to(platform)
conv_minimap_input = step['state_minimaps'].to(platform)
traced_script_module = torch.jit.trace(net, [dense_scalar_input, conv_layers_input, conv_minimap_input])
traced_script_module.save(root+"/saves/final/model.pt")


## Test

In [24]:
## Test: V3

for e in [0]:

    net.reset_states()

    net.set_value_return(True)
    net.set_policy_return(True)
        
    # Loop all state-action from episode
    steps = get_steps_episode(e)
    for index in range(steps):
        print("## "+str(index)+" ##")
        
        # Get step
        get_step(e, index, step)
        #save_obj(step, "../../step_"+str(index))
        
        # Get inputs
        dense_scalar_input = step['state_scalar'].to(platform)
        conv_layers_input = step['state_layers'].to(platform)
        conv_minimap_input = step['state_minimaps'].to(platform)
        
        # Foreward
        actions_out, units_out, local_out, global_out, value_out = net(dense_scalar_input, conv_layers_input, conv_minimap_input)

        # Update hidden state
        net.swap_hidden_state()
        
        # Apply softmax and sigmoid
        actions_out = F.softmax(actions_out)
        #local_out = F.sigmoid(local_out)
        #global_out = F.sigmoid(global_out)
        
        # Get output to cpu memory
        actions_out = actions_out.to('cpu')
        units_out = units_out.to('cpu')
        local_out = local_out.to('cpu')
        global_out = global_out.to('cpu')
        
        #save_image_of_local_and_global(local_out[0], global_out[0]   *255*128    , str(index))
        print("Final intent (value "+str(value_out.item())+"):")
        #print(local_out)
        print(str(step['params']['x1NextGlobal'])+" "+str(step['params']['y1NextGlobal'])+" "+str(step['params']['x2NextGlobal'])+" "+str(step['params']['y2NextGlobal']))
        for name in pos_actions_by_name:
            ind = index_global(name)
            if ind >= 0:
                print(name+": "+str(global_out[0,ind*2*2,0,0].item())+", "+str(global_out[0,ind*2*2+1,0,0].item())+"  -  "+str(global_out[0,ind*2*2+2,0,0].item())+", "+str(global_out[0,ind*2*2+3,0,0].item()))
        '''
        get_action_from_output(actions_out[0].detach().numpy(), 
                               units_out[0].detach().numpy(), 
                               local_out[0].detach().numpy(), 
                               global_out[0].detach().numpy())
        '''
        # Target
        #target_global, global_out = get_global_output(step, global_out)
        #save_image_(target_global[0,0,:,:].numpy()*255, "./saves/images/target_"+str(index)+"_global1.png")
        #save_image_(target_global[0,1,:,:].numpy()*255, "./saves/images/target_"+str(index)+"_global2.png")
        
        print("\n")
        print("\n")
        

## 0 ##
Final intent (value 0.008969221264123917):
407.0 6924.0 407.0 6924.0
noAction: 0.91986483335495, 0.7934528589248657  -  2.987717390060425, 3.10398530960083
update: 1.6852599382400513, 1.552628517150879  -  0.9739874601364136, 0.9271934628486633
move: -0.013913656584918499, 0.02016008086502552  -  -0.009773009456694126, -0.119686059653759
recollect: 0.22818680107593536, -0.10603801906108856  -  -0.049154747277498245, 0.17645685374736786
buildBuilding: -0.11759383976459503, -0.09222570061683655  -  0.07283014804124832, -0.0803900957107544
buildUnit: 2.5050840377807617, 2.323606491088867  -  1.4873448610305786, 1.341713309288025
attack: -0.005798066034913063, -0.00484981806948781  -  0.07878922671079636, -0.012209885753691196
cancelAction: 0.01590992696583271, 0.05410386249423027  -  -0.268903911113739, 0.11721356213092804




## 1 ##
Final intent (value 0.008969221264123917):
407.0 6924.0 407.0 6924.0
noAction: 0.9198649525642395, 0.7934529781341553  -  2.987717390060425, 3.10398

  actions_out = F.softmax(actions_out)


Final intent (value 0.008969221264123917):
407.399994 6924.229004 407.399994 6924.229004
noAction: 0.91986483335495, 0.7934529185295105  -  2.987717628479004, 3.10398530960083
update: 1.6852601766586304, 1.552628517150879  -  0.9739874005317688, 0.927193820476532
move: -0.013913658447563648, 0.020160052925348282  -  -0.009773029945790768, -0.11968604475259781
recollect: 0.22818684577941895, -0.10603796690702438  -  -0.04915471002459526, 0.17645686864852905
buildBuilding: -0.11759380251169205, -0.09222564846277237  -  0.07283005118370056, -0.0803900808095932
buildUnit: 2.5050840377807617, 2.3236067295074463  -  1.4873448610305786, 1.3417131900787354
attack: -0.005798084661364555, -0.004849785938858986  -  0.07878923416137695, -0.012209877371788025
cancelAction: 0.01590990088880062, 0.0541037991642952  -  -0.2689038813114166, 0.11721353232860565




## 3 ##
Final intent (value 0.008969221264123917):
407.399994 6924.229004 407.399994 6924.229004
noAction: 0.9198649525642395, 0.79345297813

Final intent (value 0.008969511836767197):
407.399994 6924.229004 407.399994 6924.229004
noAction: 0.9198671579360962, 0.7934529185295105  -  2.9877171516418457, 3.1039886474609375
update: 1.6852641105651855, 1.552628517150879  -  0.9739881753921509, 0.9271915555000305
move: -0.013914610259234905, 0.02016245760023594  -  -0.009773523546755314, -0.11968760937452316
recollect: 0.22818560898303986, -0.1060427576303482  -  -0.04915608838200569, 0.1764565408229828
buildBuilding: -0.1175941526889801, -0.09222537279129028  -  0.07283219695091248, -0.08038895577192307
buildUnit: 2.5050859451293945, 2.323605537414551  -  1.4873477220535278, 1.3417143821716309
attack: -0.005799492821097374, -0.004849583841860294  -  0.07878796011209488, -0.012209569104015827
cancelAction: 0.01591152511537075, 0.054101958870887756  -  -0.26890504360198975, 0.11721548438072205




## 15 ##
Final intent (value 0.008969511836767197):
407.399994 6924.229004 407.399994 6924.229004
noAction: 0.9198673367500305, 0.79345

Final intent (value 0.008969433605670929):
407.399994 6924.229004 407.399994 6924.229004
noAction: 0.9198671579360962, 0.7934529185295105  -  2.9877171516418457, 3.1039886474609375
update: 1.685263752937317, 1.552628517150879  -  0.9739882946014404, 0.9271916151046753
move: -0.013914589770138264, 0.02016247995197773  -  -0.009773521684110165, -0.11968760937452316
recollect: 0.22818554937839508, -0.10604275017976761  -  -0.049156129360198975, 0.17645655572414398
buildBuilding: -0.11759412288665771, -0.09222540259361267  -  0.07283224165439606, -0.08038894832134247
buildUnit: 2.5050859451293945, 2.323605537414551  -  1.4873477220535278, 1.3417142629623413
attack: -0.005799447186291218, -0.004849561024457216  -  0.07878796756267548, -0.012209579348564148
cancelAction: 0.01591152884066105, 0.05410195514559746  -  -0.26890507340431213, 0.11721549928188324




## 27 ##
Final intent (value 0.008969433605670929):
407.399994 6924.229004 407.399994 6924.229004
noAction: 0.9198673367500305, 0.793

KeyboardInterrupt: 