In [None]:
from deepgtav.messages import Start, Stop, Scenario, Commands, frame2numpy
from deepgtav.client import Client

import argparse
import time
import cv2

In [None]:
from __future__ import division, print_function

import os, json
from glob import glob
import numpy as np
from scipy import misc, ndimage
from scipy.ndimage.interpolation import zoom

import keras
from keras.callbacks import ModelCheckpoint
from keras import backend as K
from keras.layers.normalization import BatchNormalization
from keras.models import Sequential, load_model,  Model
from keras.layers.core import Flatten, Dense, Dropout, Lambda
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.layers.pooling import GlobalAveragePooling2D
from keras.optimizers import Adam, SGD
from keras.preprocessing import image
from keras.applications import VGG16
from keras.layers import Input, Lambda, Cropping2D, Activation, ELU
from keras.layers.merge import add, concatenate
from keras.models import model_from_json

%matplotlib inline

In [None]:
results_path = 'D:/ML_DATA/wheelai/gtav/results/'

In [None]:
gamma = 0.975
epsilon = 1.0
e_min = 0.01
e_decay = 0.99
experience = deque(maxlen=50000)
min_experience = 1000
target_update_freq = 10
state_size = (160, 320, 3)
action_size = (3)

In [None]:
def add_to_experience(state, action, reward, next_state):
    '''
    Add S,A,R,Sn to expereince
    '''
    global experience
    experience.append((state, action, reward, next_state))
    

def experience_replay(batch_size, model, target_model):
    
    global experience, gamma, state_size, action_size
    
    batch_size = min(batch_size, len(experience))
    minibatch = random.sample(experience, batch_size)
    
    X = np.zeros((batch_size, state_size))
    y = np.zeros((batch_size, action_size))
    
    for i in range(batch_size):
        
        state, action, reward, next_state = minibatch[i]
        Q_target = model.predict(state)[0]
        act = np.argmax(model.predict(next_state)[0])
        target = target_model.predict(next_state)[0]
        # update the target model
        Q_target[action] = reward + gamma * target[act]
        #Q_target[action] = reward + gamma * np.amax(target_model.predict(next_state)[0]) 
        X[i], y[i] = state, Q_target
    model.fit(X, y, batch_size=batch_size, epochs=1, verbose=0) 
    

In [None]:
from utils import *
# add time for key to keep pressed
t_time = 0.11

def straight():
    PressKey(W)
    ReleaseKey(A)
    ReleaseKey(D)
    
def left():
    PressKey(W)
    PressKey(A)
    ReleaseKey(D)
    time.sleep(t_time)
    ReleaseKey(A)
    #ReleaseKey(W)

def right():
    PressKey(W)
    PressKey(D)
    ReleaseKey(A)
    time.sleep(t_time)
    ReleaseKey(D)
    #ReleaseKey(W)
    
def take_action(action):
    if max_index == 1:
        left()
    elif max_index == 2:
        right()
    else:
        straight()

In [None]:
def get_model(model, weights):
    
    model_path = results_path + model
    weights = results_path + weights
    # load model
    json_file = open(model_path, 'r')
    model_json = json_file.read()
    json_file.close()
    model = model_from_json(model_json)
    # load weights into new model
    model.load_weights(weights)
    print("Loaded model from disk")
    # compile model
    model.compile(optimizer='adam',loss='mse')
    return model

In [None]:
model = get_model('vgg.json', 'vgg_ft.h5')
target_model = get_model('vgg.json', 'vgg_ft.h5')

In [None]:
def predict(img):
    x = np.expand_dims(x, axis=0)
    return np.argmax(model.predict(x, batch_size=1,verbose=0)[0])

In [None]:
client = Client(ip='localhost', port=8000)

# We set the scenario to be in manual driving, and everything else random (time, weather and location). 
# See deepgtav/messages.py to see what options are supported
scenario = Scenario(drivingMode=-1) #manual driving

# Send the Start request to DeepGTAV. Dataset is set as default, we only receive frames at 10Hz (320, 160)
client.sendMessage(Start(scenario=scenario))

In [None]:
stoptime = time.time() + 80*3600
while time.time() < stoptime:
        try:
            # We receive a message as a Python dictionary
            message = client.recvMessage()

            # The frame is a numpy array that can we pass through a CNN for example
            state = frame2numpy(message['frame'], (320,160))
            action = predict(state)
            
            take_Action(action)
            
            message = client.recvMessage()

            next_state = frame2numpy(message['frame'], (320,160))
            reward = message['reward']
            
            if len(experience) < 100:
                add_to_experience(state, action, reward, next_state, done)
            else:
                add_to_experience(state, action, reward, next_state, done)
                if episode % target_update_freq == 0:
                    # copy weights from model to target_model
                    target_model.set_weights(model.get_weights())
                experience_replay(32, model, target_model)
                
            state = next_state
            
        except KeyboardInterrupt:
            break
    
# We tell DeepGTAV to stop
client.sendMessage(Stop())
print('stopped')
client.close()
print('disconnected')