In [9]:
import db
import numpy as np
from tensorflow import keras
from serializer import GameState1DSerializer
import sys
import datetime as dt
import numpy as np
import json
import random as rd
import agent_wrapper

action_map = {}
counter = 0

for action in ["attack", "move"]:
    for q in range(-14,15):
        for r in range(-14, 15):
            action_map[counter] = f"{action},{q},{r}"
            counter += 1
            

action_map_inverse = {v:k for k,v in action_map.items()}

print(action_map, action_map_inverse)

def prepare_training_data(from_timestamp = 0):
    
    if(isinstance(from_timestamp, dt.datetime)):
        from_timestamp = int(from_timestamp.timestamp())
        
    replays = db.get_all_experiences({ "time": { "$gt": from_timestamp}})
    
    def _get_score_from_state(state: dict):
        try:
            state = json.loads(state["gameState"])
        except:
            pass
        
        
        for key in ["player1", "player2", "player3", "player4"]:
            if state[key]["name"] == "JutricKafica":
                return state[key]["score"]
    
    
    rewards = [_get_score_from_state(replay["sp"]) - _get_score_from_state(replay["s"]) for replay in replays]

    _seralizer = GameState1DSerializer()

    serialized = [
        _seralizer.serialize_single(x) for x in replays
    ]
    
    actions = [
        replay['a'] for replay in replays
    ]

    return serialized, rewards, actions




def create_targets(model, training_data, rewards):
    
    n = len(training_data)
    model_inputs = []
    
    for i in range(2, n):
        model_inputs.append(np.hstack(training_data[i], training_data[i-1], training_data[i-2]))
    
    states = model_inputs[:-1]
    next_states = model_inputs[1:]
    
    return states, actions[2:-1], next_states, rewards[2:-1]
    
    

class DQNAgent:
    
    def __init__(self, state_size, action_size):
        
        self.state_size = state_size
        self.action_size = action_size
        
        self.gamma = 0.99
        self.epsilon = 1
        self.epsilon_min = 0.1
        self.espilon_decay = 0.995
        self.update_rate = 300
        
        self.model = self._build_model(state_size, action_size)
        self.target_model = self._build_model(state_size, action_size)
        self.target_model.set_weights(self.model.get_weights())
        self.model.summary()
        
        self.last_updated = int(dt.datetime().now().timestamp())
    
    
    def _build_model(self, state_size, action_size):
        
        # Define the model architecture
        model = keras.Sequential()
        model.add(keras.layers.Input(shape=(state_size,)))
        model.add(keras.layers.Dense(128, activation='relu'))
        model.add(keras.layers.Dense(64, activation='relu'))
        model.add(keras.layers.Dense(action_size, activation='linear'))

        # Compile the model with an optimizer and a loss function
        model.compile(optimizer='adam', loss='mean_squared_error')

        return model
    
    def act(self, state):
        
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        
        act_values = self.model.predict(state)
        
        return action_map(np.argmax(act_values[0]))  # Returns action using polic
    
    
    def update(self):
        
        states, actions, next_states, rewards = create_targets(*prepare_training_data(self.last_updated))
        
        data = [states, actions, next_states, rewards]
        
        rd.shuffle(data)
        
        for state, action, next_state, reward in zip(*data):
            
            target = reward + self.gamma * np.amax(self.target_model.predict(next_state))
            
            target_f = self.model.predict(state)
            target_f[0][action_map_invere(action)] = target
            
            self.model.fit(state, target_f, epochs=1, verbose = 0)
            
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
        self.target_model.set_weights(self.model.get_weights())
        
        self.last_updated = int(dt.datetime.now().timestamp())

    

{0: 'attack,-14,-14', 1: 'attack,-14,-13', 2: 'attack,-14,-12', 3: 'attack,-14,-11', 4: 'attack,-14,-10', 5: 'attack,-14,-9', 6: 'attack,-14,-8', 7: 'attack,-14,-7', 8: 'attack,-14,-6', 9: 'attack,-14,-5', 10: 'attack,-14,-4', 11: 'attack,-14,-3', 12: 'attack,-14,-2', 13: 'attack,-14,-1', 14: 'attack,-14,0', 15: 'attack,-14,1', 16: 'attack,-14,2', 17: 'attack,-14,3', 18: 'attack,-14,4', 19: 'attack,-14,5', 20: 'attack,-14,6', 21: 'attack,-14,7', 22: 'attack,-14,8', 23: 'attack,-14,9', 24: 'attack,-14,10', 25: 'attack,-14,11', 26: 'attack,-14,12', 27: 'attack,-14,13', 28: 'attack,-14,14', 29: 'attack,-13,-14', 30: 'attack,-13,-13', 31: 'attack,-13,-12', 32: 'attack,-13,-11', 33: 'attack,-13,-10', 34: 'attack,-13,-9', 35: 'attack,-13,-8', 36: 'attack,-13,-7', 37: 'attack,-13,-6', 38: 'attack,-13,-5', 39: 'attack,-13,-4', 40: 'attack,-13,-3', 41: 'attack,-13,-2', 42: 'attack,-13,-1', 43: 'attack,-13,0', 44: 'attack,-13,1', 45: 'attack,-13,2', 46: 'attack,-13,3', 47: 'attack,-13,4', 48: 'a

In [5]:
def get_state_space():
    one_document = db.replay_buffer_collection.find_one()
    serialized = GameState1DSerializer().serialize_single(one_document)
    return len(serialized)

agent = DQNAgent(3 * get_state_space(), 40)


train = True
timestep = 0

initial_state = json.loads("../initial_state.json")


print(agent.update())

# while train:
    
#     if timestep != 0 and timestep % self.update_rate == 0:
#
    


625
Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_6 (Dense)              (None, 128)               240128    
_________________________________________________________________
dense_7 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_8 (Dense)              (None, 40)                2600      
Total params: 250,984
Trainable params: 250,984
Non-trainable params: 0
_________________________________________________________________


TypeError: function missing required argument 'year' (pos 1)

response: {"message":"TrainingGame sa id-ijem: 100uspeÅ¡no napravljen.","gameState":"{\"map\":{\"size\":29,\"tiles\":[[{\"q\":0,\"r\":-14,\"entity\":{\"type\":\
[2022-12-10 13:04:32.137671] Game created
response: {"message":"TrainingGame sa id-ijem: 100uspeÅ¡no napravljen.","gameState":"{\"map\":{\"size\":29,\"tiles\":[[{\"q\":0,\"r\":-14,\"entity\":{\"type\":\
[2022-12-10 13:05:34.790550] Game created
response: {"message":"TrainingGame sa id-ijem: 100uspeÅ¡no napravljen.","gameState":"{\"map\":{\"size\":29,\"tiles\":[[{\"q\":0,\"r\":-14,\"entity\":{\"type\":\
[2022-12-10 13:06:36.933062] Game created
