In [None]:
!pip install 'h5py==2.10.0' --force-reinstall

In [1]:
import tensorflow as tf
import keras
from keras.models import Sequential, load_model, Model
from keras.layers import Dense, Input, BatchNormalization
from keras.optimizers import SGD
import socket
import struct
import pandas as pd
import numpy as np
from collections import deque
import random

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


### Train from csv

In [None]:
lst = [[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]]

df = pd.read_csv("data.csv")
data = np.array(df)

x = data[:, :6700]
y = data[:, 6700]

In [None]:
x = x.reshape(735, 6700)
y = np.array([lst[int(i)] for i in y])

In [None]:
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(6700, )))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))

epochs = 25
sgd = SGD(lr=0.05, nesterov=False)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(x, y, batch_size=32, epochs=epochs)
model.save("model.h5")

In [None]:
%%timeit
np.argmax(model.predict_on_batch(np.random.rand(1, 6700)))

### Train from realtime

In [2]:
class Env:
    def __init__(self):
        self.UDP_IP = "127.0.0.1"
        self.UDP_PORT_RECV = 9003
        self.UDP_PORT_SEND = 9004

        self.send = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.recv = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        self.recv.bind((self.UDP_IP, self.UDP_PORT_RECV))

    def reset(self):
        self.send.sendto(struct.pack("i", -1), (self.UDP_IP, self.UDP_PORT_SEND))
        data, _ = self.recv.recvfrom(26808)
        data = np.array(struct.unpack('6702f', data))[:6700]
        return data.reshape(1, 6700)

    def step(self, action):
        self.send.sendto(struct.pack("i", action), (self.UDP_IP, self.UDP_PORT_SEND))
        data, _ = self.recv.recvfrom(26808)
        data = np.array(struct.unpack('6702f', data))
        new_state = data[:6700].reshape(1, 6700)
        reward = data[6700]
        lose = True if data[6701] > 0 else False
        return new_state, reward, lose

env = Env()

In [36]:
class Train:
    def __init__(self, env):
        self.env = env
        self.gamma = 0.99
        self.epsilon = 0.01
        self.epsilon_decay = 0.05
        self.epsilon_min = 0.01
        self.sgd = SGD(lr=0.02)
        self.replay_buffer = deque(maxlen=20000)
        self.train_network = self.create_model()
        self.episode_num = 400
        self.num_pick_from_buffer = 32
        self.target_network = self.create_model()
        self.target_network.set_weights(self.train_network.get_weights())
        self.old_reward = 0

    def create_model(self):
        model = load_model('Models/1_episodes_100.h5')
#         model.add(Dense(32, activation='relu', input_shape=(6700, )))
#         model.add(Dense(32, activation='relu'))
#         model.add(Dense(3, activation='softmax'))
#         last_init = tf.random_uniform_initializer(minval=-0.003, maxval=0.003)
#         inputs = Input(shape=(6700,))
#         out = Dense(128, activation="relu")(inputs)    
#         out = BatchNormalization()(out)
#         out = Dense(32, activation="relu")(out)
#         outputs = Dense(units=3, activation='softmax', name='raw_actions', kernel_initializer=last_init)(out)
#         model = Model(inputs=inputs, outputs=outputs)
#         model.compile(loss='categorical_crossentropy', optimizer=self.sgd, metrics=['accuracy'])
        return model

    def get_best_action(self, state):
        self.epsilon = max(self.epsilon_min, self.epsilon)
        if np.random.rand(1) < self.epsilon:
            action = np.random.randint(0, 3)
        else:
            action = np.argmax(self.train_network.predict(state)[0])
        return action

    def train_from_buffer(self):
        if len(self.replay_buffer) < self.num_pick_from_buffer:
            return
        samples = random.sample(self.replay_buffer, self.num_pick_from_buffer)
        states = []
        new_states = []
        for sample in samples:
            state, action, reward, new_state, done = sample
            states.append(state)
            new_states.append(new_state)

        new_array = np.array(states)
        states = new_array.reshape(self.num_pick_from_buffer, 6700)
        new_array2 = np.array(new_states)
        new_states = new_array2.reshape(self.num_pick_from_buffer, 6700)
        targets = self.train_network.predict(states)
        new_state_targets = self.target_network.predict(new_states)

        i = 0
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = targets[i]
            if done:
                target[action] = reward
            else:
                Q_future = max(new_state_targets[i])
                target[action] = reward + Q_future * self.gamma
            i += 1

        self.train_network.fit(states, targets, epochs=1, verbose=0)

    def original_try(self, current_state, eps):
        reward_sum = 0
        iteration = 0

        while True:
            best_action = self.get_best_action(current_state)
            new_state, reward, lose = self.env.step(best_action)
            done = False
            if reward > 0.8:
                done = True
            if iteration > 30 and iteration % 2 == 0:
                self.replay_buffer.append([current_state, best_action, reward, new_state, done])
            self.train_from_buffer()
            reward_sum += reward
            current_state = new_state
            
            iteration +=1
            
            if lose:
                break

        self.target_network.set_weights(self.train_network.get_weights())

        print(f"ep: {eps}, epsilon: {self.epsilon:.02f}, iteration: {iteration}, reward: {reward_sum}")
        if self.old_reward > reward_sum - 0.1:
            self.epsilon += random.uniform(0.03, 0.08)
        else:
            self.epsilon -= self.epsilon_decay
        self.old_reward = reward_sum
        if self.epsilon < self.epsilon_min:
            self.epsilon = self.epsilon_min

    def start(self):
        for eps in range(self.episode_num):
            current_state = self.env.reset()
            self.original_try(current_state, eps)


train = Train(env)

In [37]:
train.start()

ep: 0, epsilon: 0.01, iteration: 502, reward: 508.5999842360616
ep: 1, epsilon: 0.01, iteration: 340, reward: 93.00000566244125
ep: 2, epsilon: 0.10, iteration: 345, reward: 233.72000761330128
ep: 3, epsilon: 0.05, iteration: 502, reward: 501.8000223338604
ep: 4, epsilon: 0.01, iteration: 509, reward: 410.35999834537506
ep: 5, epsilon: 0.04, iteration: 564, reward: 488.5999894142151
ep: 6, epsilon: 0.01, iteration: 509, reward: 389.8799983263016
ep: 7, epsilon: 0.08, iteration: 502, reward: 417.2400000989437
ep: 8, epsilon: 0.03, iteration: 509, reward: 397.55999833345413
ep: 9, epsilon: 0.06, iteration: 723, reward: 774.5999469161034
ep: 10, epsilon: 0.01, iteration: 340, reward: 50.19999647140503
ep: 11, epsilon: 0.06, iteration: 502, reward: 459.16001556813717
ep: 12, epsilon: 0.01, iteration: 340, reward: 50.19999647140503
ep: 13, epsilon: 0.10, iteration: 502, reward: 491.79998575150967
ep: 14, epsilon: 0.05, iteration: 502, reward: 474.1200164631009
ep: 15, epsilon: 0.15, iterati

KeyboardInterrupt: 

In [None]:
train.train_network.save("model.h5")

In [None]:
env.send.close()
env.recv.close()

### Predict

In [None]:
model = load_model("Models/1_episodes_100.h5")

UDP_IP = "127.0.0.1"
UDP_PORT_RECV = 9003
UDP_PORT_SEND = 9004


send = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
recv = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
recv.bind((UDP_IP, UDP_PORT_RECV))

In [None]:
send.sendto(struct.pack("i", -1), (UDP_IP, UDP_PORT_SEND))

while True:
    data, addr = recv.recvfrom(26808)
    inp = struct.unpack('6702f', data)[:6700]
    inp = np.array(inp).reshape(1, 6700)
    ret = model.predict(inp)
    ret = np.argmax(ret[0])
    send.sendto(struct.pack("i", ret), (UDP_IP, UDP_PORT_SEND))