In [1]:
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, Flatten, Conv2D, MaxPooling2D, Activation
from keras.optimizers import SGD

import pandas as pd
import numpy as np

In [None]:
lst = [[1.0, 0, 0], [0, 1.0, 0], [0, 0, 1.0]]

df = pd.read_csv("data.csv")
data = np.array(df)

x = data[:, :6700]
y = data[:, 6700]

x = x.reshape(2969, 6700)
y = np.array([lst[int(i)] for i in y])

In [None]:
model = Sequential()
model.add(Dense(32, activation='relu', input_shape=(6700, )))
model.add(Dense(32, activation='relu'))
model.add(Dense(3, activation='softmax'))

epochs = 25
lr = 0.001
decay = lr / epochs
sgd = SGD(lr=lr, momentum=0.9, decay=decay, nesterov=False)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

model.fit(x, y, batch_size=128, epochs=epochs)
model.save("model.h5")

In [None]:
%%timeit
np.argmax(model.predict_on_batch(np.random.rand(1, 6700)))

In [None]:
img = x[100].reshape(67, 100, 1)
cv2.imshow('img', img)
cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
import socket
import struct

model = load_model("model.h5")
model.predict_on_batch(np.random.rand(1, 6700))

UDP_IP = "127.0.0.1"
UDP_PORT_RECV = 9003
UDP_PORT_SEND = 9004


send = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
recv = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
recv.bind((UDP_IP, UDP_PORT_RECV))

send.sendto(struct.pack("i", 0), (UDP_IP, UDP_PORT_SEND))

while True:
    data, addr = recv.recvfrom(26800)
    inp = struct.unpack('6700f', data)
    inp = np.array(inp)
    inp = inp.reshape(1, 6700)
    ret = model.predict_on_batch(inp)
    ret = np.argmax(ret[0])
    send.sendto(struct.pack("i", ret), (UDP_IP, UDP_PORT_SEND))

In [None]:
from collections import deque

class MountainCarTrain:
    def __init__(self,env):
        self.env=env
        self.gamma=0.99
        self.epsilon = 1
        self.epsilon_decay = 0.05
        self.epsilon_min=0.01
        self.learingRate=0.001
        self.replayBuffer=deque(maxlen=20000)
        self.trainNetwork=self.createNetwork()
        self.episodeNum=400
        self.iterationNum=201 #max is 200
        self.numPickFromBuffer=32
        self.targetNetwork=self.createNetwork()
        self.targetNetwork.set_weights(self.trainNetwork.get_weights())

    def createNetwork(self):
        model = Sequential()
        model.add(Dense(32, activation='relu', input_shape=(6700, )))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(3, activation='softmax'))
        return model

    def getBestAction(self,state):
        self.epsilon = max(self.epsilon_min, self.epsilon)
        if np.random.rand(1) < self.epsilon:
            action = np.random.randint(0, 3)
        else:
            action=np.argmax(self.trainNetwork.predict(state)[0])
        return action

    def trainFromBuffer_Boost(self):
        if len(self.replayBuffer) < self.numPickFromBuffer:
            return
        samples = random.sample(self.replayBuffer,self.numPickFromBuffer)
        npsamples = np.array(samples)
        states_temp, actions_temp, rewards_temp, newstates_temp, dones_temp = np.hsplit(npsamples, 5)
        states = np.concatenate((np.squeeze(states_temp[:])), axis = 0)
        rewards = rewards_temp.reshape(self.numPickFromBuffer,).astype(float)
        targets = self.trainNetwork.predict(states)
        newstates = np.concatenate(np.concatenate(newstates_temp))
        dones = np.concatenate(dones_temp).astype(bool)
        notdones = ~dones
        notdones = notdones.astype(float)
        dones = dones.astype(float)
        Q_futures = self.targetNetwork.predict(newstates).max(axis = 1)
        targets[(np.arange(self.numPickFromBuffer), actions_temp.reshape(self.numPickFromBuffer,).astype(int))] = rewards * dones + (rewards + Q_futures * self.gamma)*notdones
        self.trainNetwork.fit(states, targets, epochs=1, verbose=0)

    def trainFromBuffer(self):
        if len(self.replayBuffer) < self.numPickFromBuffer:
            return
        samples = random.sample(self.replayBuffer,self.numPickFromBuffer)
        states = []
        newStates=[]
        for sample in samples:
            state, action, reward, new_state, done = sample
            states.append(state)
            newStates.append(new_state)

        newArray = np.array(states)
        states = newArray.reshape(self.numPickFromBuffer, 2)
        newArray2 = np.array(newStates)
        newStates = newArray2.reshape(self.numPickFromBuffer, 2)
        targets = self.trainNetwork.predict(states)
        new_state_targets=self.targetNetwork.predict(newStates)

        i=0
        for sample in samples:
            state, action, reward, new_state, done = sample
            target = targets[i]
            if done:
                target[action] = reward
            else:
                Q_future = max(new_state_targets[i])
                target[action] = reward + Q_future * self.gamma
            i+=1

        self.trainNetwork.fit(states, targets, epochs=1, verbose=0)

    def orginalTry(self,currentState,eps):
        rewardSum = 0
        max_position=-99

        for i in range(self.iterationNum):
            bestAction = self.getBestAction(currentState)

            new_state, reward, done, _ = env.step(bestAction)
            new_state = new_state.reshape(1, 2)

            # # Keep track of max position
            if new_state[0][0] > max_position:
                max_position = new_state[0][0]

            # # Adjust reward for task completion
            if new_state[0][0] >= 0.5:
                reward += 10

            self.replayBuffer.append([currentState, bestAction, reward, new_state, done])

            #Or you can use self.trainFromBuffer_Boost(), it is a matrix wise version for boosting 
            self.trainFromBuffer()

            rewardSum += reward

            currentState = new_state

            if done:
                break

        if i >= 199:
            print("Failed to finish task in epsoide {}".format(eps))
        else:
            print("Success in epsoide {}, used {} iterations!".format(eps, i))
            self.trainNetwork.save('./trainNetworkInEPS{}.h5'.format(eps))

        #Sync
        self.targetNetwork.set_weights(self.trainNetwork.get_weights())

        print("now epsilon is {}, the reward is {} maxPosition is {}".format(max(self.epsilon_min, self.epsilon), rewardSum,max_position))
        self.epsilon -= self.epsilon_decay

    def start(self):
        for eps in range(self.episodeNum):
            currentState=env.reset().reshape(1,2)
            self.orginalTry(currentState, eps)


env = gym.make('MountainCar-v0')
dqn=MountainCarTrain(env=env)
dqn.start()