## Keras to the Moon
*If code differs between the notebook and the curriculum, go with the notebook!!!*

### Imports

In [1]:
from keras.models import Model, Sequential
from keras.layers.core import Dense, Dropout
from keras.layers import Input, Concatenate

import gym
import numpy as np

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Creating the environment

In [2]:
env = gym.make('LunarLander-v2')

In [3]:
action_space = env.action_space.n
def getStateSize():
    env.reset()
    act = env.action_space.sample()
    obs, _, _, _ = env.step(act)
    return len(obs)

state_space = getStateSize()

### Creating the models
#### single network NN

In [4]:
# single network NN
#Define input layer
input_layer = Input(shape=(state_space,))

hdn_1 = Dense(300, activation='relu')(input_layer)
hdn_2 = Dense(150, activation='relu')(hdn_1)
hdn_3 = Dense(100, activation='relu')(hdn_2)
hdn_4 = Dense(50, activation='relu')(hdn_3)
output_dense = Dense(action_space, activation='softmax')(hdn_4)

model = Model(inputs=input_layer, outputs=output_dense)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

Instructions for updating:
Colocations handled automatically by placer.


#### double network NN

In [5]:
# multiple network NN w/ Concatenate
# Define input layer
input_layer = Input(shape=(state_space,))

# Now, we'll process this data in TWO neural networks simultaneously and combine them at the end
# This version has dropouts included between each dense layer. the rate value is the percentage
# of inputs to drop.

#Network 1:
hdn1_1 = Dense(300, activation='relu')(input_layer)
hdn1_2 = Dropout(rate=0.2)(hdn1_1)
hdn1_3 = Dense(150, activation='relu')(hdn1_2)
hdn1_4 = Dropout(rate=0.2)(hdn1_3)
hdn1_5 = Dense(100, activation='relu')(hdn1_4)
hdn1_6 = Dropout(rate=0.2)(hdn1_5)

#Network 2:
hdn2_1 = Dense(300, activation='linear')(input_layer)
hdn2_2 = Dropout(rate=0.2)(hdn2_1)
hdn2_3 = Dense(150, activation='linear')(hdn2_2)
hdn2_4 = Dropout(rate=0.2)(hdn2_3)
hdn2_5 = Dense(100, activation='linear')(hdn2_4)
hdn2_6 = Dropout(rate=0.2)(hdn2_5)

# Combine the networks using Concatenate()
# Concatenate in Keras takes two layers and smashes them together into one, single layer.
# It allows us to combine the outputs of many networks!
combined_nn_input = Concatenate()([hdn1_6, hdn2_6])

#And make an output layer:
output_dense = Dense(action_space, activation='softmax')(combined_nn_input)
model = Model(inputs=input_layer, outputs=output_dense)
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['acc'])

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [6]:
# Get a nice printout of your model!
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 8)            0                                            
__________________________________________________________________________________________________
dense_6 (Dense)                 (None, 300)          2700        input_2[0][0]                    
__________________________________________________________________________________________________
dense_9 (Dense)                 (None, 300)          2700        input_2[0][0]                    
__________________________________________________________________________________________________
dropout_1 (Dropout)             (None, 300)          0           dense_6[0][0]                    
__________________________________________________________________________________________________
dropout_4 

### Generating data and training

In [7]:
# identical to the cartpole data generator, except
# the data is shaped differently at the output to make
# up for the more picky functional API input layer
def initial_data(number_of_games, game_turns, acceptable_score):
    X=[]
    y=[]
    one_hot = [0 for i in range(action_space)]
    for i in range(number_of_games):
        env.reset()
        game_memory = []
        prev_obs = []
        score = 0
        for turn in range(game_turns):
            action = env.action_space.sample()
            new_obs, reward, done, info = env.step(action)
            score += int(reward)
            if turn > 0:
                game_memory.append([prev_obs, int(action)])
            prev_obs = new_obs
            if done:
                break
                
        if score >= acceptable_score:
            for data in game_memory:
                X.append(np.array(data[0]).reshape(1, state_space))
                predicted_action = list(one_hot)
                predicted_action[data[1]] = 1
                y.append(np.array(predicted_action).reshape(1, action_space))
    print('{} examples were made.'.format(len(X)))
    return np.array(X).reshape(-1, state_space), np.array(y).reshape(-1, action_space)

In [8]:
X, y = initial_data(5000, 150, 10)

620 examples were made.


In [9]:
model.fit(x=X, y=y, epochs=10, verbose=2, validation_split=0.2)

Instructions for updating:
Use tf.cast instead.
Train on 496 samples, validate on 124 samples
Epoch 1/10
 - 1s - loss: 1.4232 - acc: 0.2500 - val_loss: 1.4294 - val_acc: 0.2177
Epoch 2/10
 - 0s - loss: 1.4029 - acc: 0.2601 - val_loss: 1.4348 - val_acc: 0.2661
Epoch 3/10
 - 0s - loss: 1.4063 - acc: 0.2681 - val_loss: 1.4549 - val_acc: 0.2581
Epoch 4/10
 - 0s - loss: 1.3997 - acc: 0.2540 - val_loss: 1.4349 - val_acc: 0.2097
Epoch 5/10
 - 0s - loss: 1.3912 - acc: 0.2843 - val_loss: 1.4938 - val_acc: 0.1935
Epoch 6/10
 - 0s - loss: 1.3894 - acc: 0.2843 - val_loss: 1.4383 - val_acc: 0.2500
Epoch 7/10
 - 0s - loss: 1.3918 - acc: 0.2802 - val_loss: 1.4344 - val_acc: 0.1694
Epoch 8/10
 - 0s - loss: 1.3831 - acc: 0.2681 - val_loss: 1.4709 - val_acc: 0.2016
Epoch 9/10
 - 0s - loss: 1.3854 - acc: 0.2641 - val_loss: 1.4664 - val_acc: 0.2419
Epoch 10/10
 - 0s - loss: 1.3882 - acc: 0.2762 - val_loss: 1.4515 - val_acc: 0.2500


<keras.callbacks.History at 0x13a0ea6d8>

### Playing

In [10]:
def play_game(n_games, model=None):
    for i in range(n_games):
        score=0
        state = env.reset()
        prev_obs = []
        num_moves = 0
        done = False
        while not done:
            env.render()
            num_moves += 1
            action = None
            if model == None:
                action = env.action_space.sample()
            else:
                if len(prev_obs) < 1:
                    action = env.action_space.sample()
                else:
                    action = np.argmax(model.predict(prev_obs.reshape(-1, state_space))) 
            new_obs, reward, done, _ = env.step(action)
            prev_obs = new_obs
            score += reward
              
        env.close()
        print('Final score: {}'.format(score))
        print('# moves: {}'.format(num_moves))

In [11]:
play_game(3, model)

Final score: -476.1938946874037
# moves: 97
Final score: -118.09914966188109
# moves: 62
Final score: -69.92155666418927
# moves: 92
