In [1]:
import db
import numpy as np
from tensorflow import keras
from serializer import GameState1DSerializer
import sys
import datetime as dt
import numpy as np
import json
import random as rd
import agent_wrapper
import randomAgents

action_map = {}
counter = 0

for action in ["attack", "move"]:
    for q in range(-14,15):
        for r in range(-14, 15):
            action_map[counter] = f"{action},{q},{r}"
            counter += 1
            

action_map_inverse = {v:k for k,v in action_map.items()}

#print(action_map, action_map_inverse)

def prepare_training_data(from_timestamp = 0):
    
    if(isinstance(from_timestamp, dt.datetime)):
        from_timestamp = int(from_timestamp.timestamp())
        
    replays = db.get_all_experiences({ "time": { "$gt": from_timestamp}})
    
    print(len(replays))
    
    def _get_score_from_state(state: dict):
        
        state = agent_wrapper.fix_state(state)
        
        for key in ["player1", "player2", "player3", "player4"]:
            if state[key]["name"] == "JutricKafica1":
                return state[key]["score"] + state[key]["health"] + state[key]["power"]
        return 0
    
    
    rewards = [_get_score_from_state(replay["sp"]) - _get_score_from_state(replay["s"]) for replay in replays]

    _seralizer = GameState1DSerializer()

    serialized = [
        _seralizer.serialize_single(x) for x in replays
    ]
    
    actions = [
        replay['a']["action"] for replay in replays
    ]

    return serialized, rewards, actions




def create_targets(training_data, rewards, actions):
    
    n = len(training_data)
    model_inputs = []
    
    for i in range(2, n):
        model_inputs.append(np.hstack([training_data[i], training_data[i-1], training_data[i-2]]))
    
    states = model_inputs[:-1]
    next_states = model_inputs[1:]
    
    return states, actions[2:-1], next_states, rewards[2:-1]
    
    

class DQNAgent:
    
    def __init__(self, state_size, action_size):
        
        print(type(state_size), type(action_size))
        
        self.state_size = state_size
        self.action_size = action_size
        
        self.gamma = 0.99
        self.epsilon = 1
        self.epsilon_min = 0.1
        self.epsilon_decay = 0.995
        self.update_rate = 300
        
        self.model = self._build_model(state_size, action_size)
        self.target_model = self._build_model(state_size, action_size)
        self.target_model.set_weights(self.model.get_weights())
        self.model.summary()
        
        self.last_updated = int((dt.datetime.now() - dt.timedelta(hours = 1)).timestamp())
    
    
    def _build_model(self, state_size, action_size):
        
        # Define the model architecture
        model = keras.Sequential()
        model.add(keras.layers.Input(shape=(state_size,)))
        model.add(keras.layers.Dense(128, activation='relu'))
        model.add(keras.layers.Dense(64, activation='relu'))
        model.add(keras.layers.Dense(action_size, activation='linear'))

        # Compile the model with an optimizer and a loss function
        model.compile(optimizer='adam', loss='mean_squared_error')

        return model
    
    def act(self, state, state_json):
        
        if np.random.rand() <= self.epsilon:
            picked_random_valid = randomAgents.pick_rand_action(state_json)
            return f"{picked_random_valid[0]},{picked_random_valid[1]},{picked_random_valid[2]}"
        
        act_values = self.model.predict(state)[0]
        
        for idx,elem in enumerate(act_values):
            action = action_map[elem]
            action_tokens = action.split(",")
            if not randomAgents.is_valid_action(action_tokens[0], action_tokens[1], action_tokens[2], state_json):
                act_values[idx] = float("-inf")
        
        return action_map[np.argmax(act_values[0])]  # Returns action using polic
    
    
    def update(self):
        
        print("Updating...")
        
        states, actions, next_states, rewards = create_targets(*prepare_training_data(self.last_updated))
        
        data = list(zip(states, actions, next_states, rewards))
        
        rd.shuffle(data)
        
        log_rewards = []
        freq = 100
        
        for datum in data:
            
            state = np.array(datum[0]).reshape(1, -1)
            action = datum[1]
            next_state = np.array(datum[2]).reshape(1,-1)
            reward = datum[3]
            
            .app
            
            #print(state.shape, next_state.shape, reward)
            
            #print(state, action, next_state, reward)
            
            try:
                target = reward + self.gamma * np.amax(self.target_model.predict(next_state))
                target_f = self.model.predict(state)
                target_f[0][action_map_inverse[action]] = target
            
                self.model.fit(state, target_f, epochs=1, verbose = 1)
            except KeyboardInterrupt:
                raise KeyboardInterrupt
            except:
                continue
            
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
        self.target_model.set_weights(self.model.get_weights())
        
        self.last_updated = int(dt.datetime.now().timestamp())

    



2022-12-10 15:38:03.405621: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-12-10 15:38:03.405643: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Starting game creation thread...


In [2]:
def get_state_space():
    one_document = db.replay_buffer_collection.find_one()
    serialized = GameState1DSerializer().serialize_single(one_document)
    return len(serialized)

agent = DQNAgent(3 * get_state_space(), len(action_map))

#agent.update()

train = False
timestep = 0

initial_obs = json.load(open("../initial_state.json",'r'))

obs_window = 3*[initial_obs]

agent.update()

while train:
    
    if timestep != 0 and timestep % agent.update_rate == 0:
        agent.update()
        
    try:
        state = np.hstack([GameState1DSerializer().serialize_single(x) for x in obs_window])
    except:
        from pprint import pprint
        pprint(obs_window)

    action = agent.act(state, obs_window[-1])
    
    print(action)
    
    _split = action.split(",")
    mode = _split[0]
    x = _split[1]
    y = _split[2]

    if mode == "attack":
        info, success = agent_wrapper.attack("DQN", obs_window[-1],x,y)
    else:
        info, success = agent_wrapper.move("DQN", obs_window[-1],x,y)
        
    if(success):
        new_obs = info
        obs_window.append(new_obs)
        del obs_window[0]

        
    timestep += 1

    


<class 'int'> <class 'int'>
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               236288    
_________________________________________________________________
dense_1 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_2 (Dense)              (None, 1682)              109330    
Total params: 353,874
Trainable params: 353,874
Non-trainable params: 0
_________________________________________________________________


2022-12-10 15:38:05.657891: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-12-10 15:38:05.657937: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-12-10 15:38:05.657978: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (nikola-tpyoga): /proc/driver/nvidia/version does not exist
2022-12-10 15:38:05.658311: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


response: {"message":"TrainingGame sa id-ijem: 101uspeÅ¡no napravljen.","gameState":"{\"map\":{\"size\":29,\"tiles\":[[{\"q\":0,\"r\":-14,\"entity\":{\"type\":\
[2022-12-10 15:38:21.535570] Game created
1038



KeyboardInterrupt



In [3]:
train = True
timestep = 0

initial_obs = json.load(open("../initial_state.json",'r'))

obs_window = 3*[initial_obs]

while train:
    
    if timestep != 0 and timestep % agent.update_rate == 0:
        #agent.update()
        pass
        
    try:
        state = np.hstack([GameState1DSerializer().serialize_single(x) for x in obs_window])
    except:
        from pprint import pprint
        pprint(obs_window)

    action = agent.act(state, obs_window[-1])
    
    print(action)
    
    _split = action.split(",")
    mode = _split[0]
    x = _split[1]
    y = _split[2]

    if mode == "attack":
        info, success = agent_wrapper.attack("DQN", obs_window[-1],x,y)
    else:
        info, success = agent_wrapper.move("DQN", obs_window[-1],x,y)
        
    if(success):
        new_obs = info
        obs_window.append(new_obs)
        del obs_window[0]

        
    timestep += 1


attack,-7,-8
attack,-10,-6
move,-6,-7
attack,-9,-6
move,-7,-8
attack,-8,-5
attack,-9,-9
move,-7,-6
attack,-8,-7
attack,-4,-9
move,-8,-5
move,-9,-4
move,-9,-4
move,-7,-8
move,-7,-8
attack,-6,-8
attack,-4,-4
move,-8,-7
attack,-10,-9
attack,-6,-4
attack,-10,-8
move,-7,-8
attack,-4,-10
move,-7,-7
attack,-6,-10
move,-6,-6
attack,-4,-4
move,-8,-6
move,-8,-7
attack,-7,-8
attack,-9,-9
attack,-10,-7
attack,-4,-4
move,-7,-8
attack,-4,-6
move,-8,-8
move,-7,-8
move,-6,-8
attack,-5,-10
attack,-3,-7
move,-7,-6
move,-6,-7
attack,-4,-4
attack,-3,-6
move,-6,-8
move,-8,-8
attack,-7,-8
attack,-4,-7
attack,-7,-4
attack,-9,-6
attack,-10,-6
attack,-5,-5
move,-6,-7
attack,-5,-6
attack,-10,-4
move,-7,-7
move,-6,-7
attack,-4,-4
move,-7,-8
attack,-6,-4
move,-8,-7
attack,-10,-9
attack,-9,-6
move,-7,-6
attack,-7,-8
attack,-10,-10
attack,-9,-5
move,-6,-6
move,-7,-8
attack,-8,-8
attack,-4,-10
attack,-9,-10
move,-6,-6
attack,-8,-10
move,-8,-8
attack,-4,-9
move,-6,-8
move,-5,-9
attack,-7,-11
attack,-4,-9
move,-7,-6
a


KeyboardInterrupt

