<a href="https://colab.research.google.com/github/chintankotian/Self-Landing-lunar-lander-deep-q-net/blob/master/lunar_lander.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##Install Dependencies
```



In [0]:
!pip install box2d-py
!pip install pyglet==1.3.2
!apt-get install -y xvfb python-opengl > /dev/null 2>&1
!pip install gym pyvirtualdisplay > /dev/null 2>&1



##Import Required library

In [0]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Dense,Conv2D,Dropout,MaxPooling2D,Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
import gym
import Box2D
import matplotlib.pyplot as plt
from tqdm import tqdm
import random
from IPython import display as ipythondisplay
from pyvirtualdisplay import Display
from IPython import display as ipythondisplay
import matplotlib.animation as ani

##We need to use a virtual display ,coz google collab doesn't have a display

In [2]:
display = Display(visible=0, size=(400, 300))
display.start()

W0721 10:09:49.747256 139922536294272 abstractdisplay.py:151] xdpyinfo was not found, X start can not be checked! Please install xdpyinfo!


<Display cmd_param=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '400x300x24', ':1021'] cmd=['Xvfb', '-br', '-nolisten', 'tcp', '-screen', '0', '400x300x24', ':1021'] oserror=None return_code=None stdout="None" stderr="None" timeout_happened=False>

##Creating Enviroment

In [0]:
env = gym.make('LunarLander-v2')

##Creating Model

In [0]:
inputs = Input(shape=(8,))
dense1 = Dense(1028,activation='relu')(inputs)
dense4 = Dense(1028,activation='relu')(dense1)
output = Dense(4,activation='softmax')(dense4)
model = Model(inputs=inputs,outputs=output)

In [0]:
inp = Input(shape=(8,))
d1 = Dense(1028,activation='relu')(inp)
d2 = Dense(1028,activation='relu')(d1)
out = Dense(4,activation='softmax')(d2)
target_model = Model(inputs=inp,outputs=out)

In [0]:
lr = 0.80
yr = 0.85

In [0]:
adam = Adam(lr=0.0001)
model.compile(optimizer=adam,loss='mse',metrics=['accuracy'])
target_model.compile(optimizer=adam,loss='mse',metrics=['accuracy'])


##Memory for Experience Replay

In [0]:
memory = []

##Training the  model , whose weights will to transferred to target model

In [0]:
def train_target():
    samples = random.sample(memory,32)
    if(len(memory)>300):
        memory[:] = []
    for sample in samples:
        state,new_state,reward,done,action_taken = sample
        target = target_model.predict([[state]])
        if(done):
            target[0][action_taken] = reward
        else:
            target[0][action_taken] = reward + yr*(np.max(target_model.predict([[new_state]])))
        model.fit([[state]],[target],epochs=1,verbose=0)

##Function for transferring weights of model to target model

In [0]:
def transfer_weights():
    weights_model = model.get_weights()
    weights_target = target_model.get_weights()
    for i in range(len(weights_model)):
        weights_target[i] = weights_model[i]
    target_model.set_weights(weights_target)            

##Training Loop

In [0]:
# DIRECT ACTION REPLAY  LEARNING
display = Display(visible=0, size=(400, 300))
display.start()
reward_all = []
img_array = []
fig = plt.figure()
for i in tqdm(range(500)):
    reset = False
    state = env.reset()
    action_taken = env.action_space.sample()
    while(not reset):
        action = model.predict([[state]])
        action_taken = np.argmax(action)
        new_state,reward,reset,_ = env.step(action_taken)
        target_action = action
        memory.append([state,new_state,reward,reset,action_taken])
        
        if(len(memory)>32):
            if(i%10 == 0):
                train_target()
        if(i%20 == 0):
            obs = env.render(mode="rgb_array")
            img_array.append([plt.imshow(obs,animated=True)])
        if(i%5 == 0):
            transfer_weights()
        
        state = new_state
video = ani.ArtistAnimation(fig,img_array,interval=20,repeat_delay=1)
env.close()

In [0]:
video.save('training.mp4')

##Testing Loop

In [0]:
reward_tot = []
img = []
fig = plt.figure()
for i in range(10):
    reset = False
    state = env.reset()
    reward_all = []
    while(not reset):
        action = model.predict([[state]])
        new_state,reward,reset,_ = env.step(np.argmax(action))
#             print(action)
        state = new_state
        reward_all.append(reward)
        if(reward > 50):
            print(reward)
        if(reset):
#             print(sum(reward_all))
            reward_tot.append(sum(reward_all))
        screen = env.render(mode='rgb_array')
        img.append([plt.imshow(screen,animated=True)])
    
test_video = ani.ArtistAnimation(fig,img,interval=20,repeat_delay = 1)
env.close()
plt.show()
print(sum(reward_tot)/20)

In [0]:
test_video.save('test.mp4')

In [0]:
model.save('lunarlander.h5')

In [0]:
model = tf.keras.models.load_model('lunarlander.h5')