Installing the necessary dependencies and Importing the Libraries

In [None]:
!pip install mss
!pip install tqdm
!pip install tensorflow
!pip install time
!pip install PIL
!pip install keyboard
!pip install opencv-python
!pip install visualkeras

In [1]:
import numpy as np
import cv2
from mss import mss
from PIL import Image, ImageEnhance,   ImageOps
import keyboard
import time
import tqdm as tqdm                                                              
import random
import tensorflow as tf 
from tensorflow.keras.models import model_from_json
from tensorflow.keras.layers import Dense, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
import h5py

The Environment class interacts with the actual chrome dino game, and gathers screenshots for analysis and to determine when the game is done.

In [2]:

class Environment:
    ones = np.ones((76, 384, 4))
    zeros = np.zeros((76, 384, 4))
    zeros1 = np.zeros((76, 384, 4))
    zeros2 = np.zeros((76, 384, 4))
    zeros3 = np.zeros((76, 384, 4))
    zeros4 = np.zeros((76, 384, 4))
    
    # These are the actions that the bot can take
        ### space is jumping over obstacles like the cacti
        ### down is ducking under obstacles like the birds
        ### there is an extra 2 thats not here that acts as a do nothing action
    actions = {0: 'space', 1: 'down'}
    
    def __init__(self): 
        self.mon = {'top': 243, 'left': 0, 'width': 1366, 'height': 270} 
        self.sct = mss()
        self.counter = 0
        self.startTime = -1
        self.imageSet = []
        self.imageSetLength = 4 #number of frames for the conv net
        self.actionMemory = 2    #no action taken 
        
        #image processing
        self.zeros1[:,:,0] = 1
        self.zeros2[:,:,1] = 1
        self.zeros3[:,:,2] = 1
        self.zeros4[:,:,3] = 1

    def start_Game(self):
        # a countdown for the user to switch tabs to the Game
        print("Start")
        for i in reversed(range(5)):
            print("game starting in ", i)
            time.sleep(1)

    def step(self, action):
        print("Step: ", action)
        try:
            keyboard.release(self.actions.get(self.actionMemory))
            if action != 2:
                keyboard.press(self.actions.get(action))
                time.sleep(0.5)
                keyboard.release(self.actions.get(action))
        except Exception as e:
            print(" ")
            
        self.actionMemory = action

        #Screenshot is taken
        screenshot = self.sct.grab(self.mon)
        img = np.array(screenshot)[:, :, 0]
        processedImg = self._processImage(img)
        state = self._imagesHandler(processedImg)
        done = self._isDone(processedImg)
        reward = self._getReward(done)
        return state, reward, done

    def _processImage(self, img): #changed
        img = Image.fromarray(img)
        img = img.resize((384, 76), Image.LANCZOS)
        if np.sum(img) > 2000000:
            img = ImageOps.invert(img)

        min_val = 32
        max_val = 171
        img = np.clip(img, min_val, max_val)
        img = ((img - min_val) / (max_val - min_val))

        img = np.reshape(img, (76, 384))
        return img

    
    def _imagesHandler(self, img):
        img = np.array(img)
        
        while len(self.imageSet) < (self.imageSetLength): 
             self.imageSet.append(np.reshape(img,(76,384,1)) * self.ones)
        
        imgset = np.array(self.imageSet)

        img1 = (np.reshape(img, (76, 384, 1)) * self.ones) * self.zeros1
        img2 = imgset[0] * self.zeros2
        img3 = imgset[1] * self.zeros3
        img4 = imgset[2] * self.zeros4

        toReturn = np.array(img1 + img2 + img3 + img4)
        self.imageSet.pop(0)
        self.imageSet.append(np.reshape(img,(76 ,384,1)) * self.ones)
        return toReturn

    
    #the reward system which penalizes the game being over and rewards more time being spent in the game
    def _getReward(self,done):
        #print("Get reward")
        if done:
            return -20
        else: 
            return time.time() - self.startTime
        
    #Checking if the game over    
    def _isDone(self,img):
        
        img = np.array(img)
        img  = img[30:50, 180:203]

        val = np.sum(img)
        #If the game ends during the night mode
        expectedVal1 = 331.9352517985612 
        #If the game ends during the day mode
        expectedVal2 = 243.53

        if np.absolute(val-expectedVal1) > 15 and np.absolute(val-expectedVal2) > 15: #seems to work well    
            return False
        
        #print("Game Over")
        self.reset()
        return True
    
    #if the game is done, it needs to be reset which can be done so by pressing the spacebar
    def reset(self):
        #print("Reset")
        self.startTime = time.time()
        keyboard.press("space")
        time.sleep(0.5)
        keyboard.release("space")
        return self.step(0)

The Agent Class which holds the actual brains of the AI, that is the Convolutional Neural Network.


In [3]:
class Agent:
    def __init__(self):
        #CNN
        model = Sequential([ 
            Conv2D(32, (8,8), input_shape=(76, 384, 4),
                   strides=(2,2), activation='relu'),
            MaxPooling2D(pool_size=(5,5), strides=(2, 2)),
            Conv2D(64, (4,4), activation='relu', strides=(1,1)),
            MaxPooling2D(pool_size=(7, 7), strides=(3, 3)),
            Conv2D(128, (1, 1), strides=(1,1), activation='relu'),
            MaxPooling2D(pool_size=(3,3), strides=(3,3)),
            Flatten(),
            Dense(384, activation='relu'),
            Dense(64, activation="relu"),
            Dense(8, activation="relu"),
            Dense(3, activation="linear"),
        ])
        model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=0.0001))

        #Pretrained weights
        #this can only be uncommented out after running the first episode
        #model.load_weights("DinoGame.h5")
        
        self.model = model
        self.memory = []
        # print(self.model.summary()) 
        self.x_Train = []
        self.y_Train = []
        self.loss = []
        self.location = 0


    def act(self, state):
        print("Act")
        
        #prediction
        stateConv = state
        q_value = self.model.predict(np.reshape(stateConv, (1, 76, 384, 4)))
        print(q_value)
        
        #Softmax actions.
        prob = tf.nn.softmax(tf.math.divide((q_value.flatten()), 1)) 
        action = np.random.choice(range(3), p=np.array(prob))

        return action
    
    # Storing experiences
    def remember_experiences(self, state, nextState, action, done, reward, location):
        self.location = location
        self.memory.append(np.array([state, nextState, action, reward, done]))

    #AI's learning happens here 
    def learn(self):
        
        self.batchSize = 256  #The number of experiences the AI learns from every round

        #To avoid GPU running out of memory, we trim the memory
        if len(self.memory) > 35000:
            self.memory = []
            print("memory trimmed")
        if len(self.memory) < self.batchSize:
            print("not enough information")
            return  
        batch = random.sample(self.memory, self.batchSize)

        self.learnBatch(batch)

    # The alpha value is the learning rate and controls how far the algorithm
    # moves in the direction of the gradient at each step
    def learnBatch(self, batch, alpha=0.8):
        batch = np.array(batch)
        actions = batch[:, 2].reshape(self.batchSize).tolist()
        rewards = batch[:, 3].reshape(self.batchSize).tolist()

        stateToPredict = batch[:, 0].reshape(self.batchSize).tolist()
        nextStateToPredict = batch[:, 1].reshape(self.batchSize).tolist()

        statePrediction = self.model.predict(np.reshape(
            stateToPredict, (self.batchSize, 76, 384, 4)))
        nextStatePrediction = self.model.predict(np.reshape(
            nextStateToPredict, (self.batchSize, 76, 384, 4)))
        statePrediction = np.array(statePrediction)
        nextStatePrediction = np.array(nextStatePrediction)

        for i in range(self.batchSize):
            action = actions[i]
            reward = rewards[i]
            nextState = nextStatePrediction[i]
            qval = statePrediction[i, action]
            if reward < -5: 
                statePrediction[i, action] = reward
            else:
                #this is the q learning update rule
                statePrediction[i, action] += alpha * (reward + 0.95 * np.max(nextState) - qval)

        self.xTrain.append(np.reshape(
            stateToPredict, (self.batchSize, 76, 384, 4)))
        self.yTrain.append(statePrediction)
        history = self.model.fit(
            self.x_Train, self.y_Train, batch_size=10, epochs=1, verbose=0)
        loss = history.history.get("loss")[0]
        print("The LOSS incured is: ", loss)
        self.loss.append(loss)
        self.x_Train = []
        self.y_Train = []

This is where instances of the Agent and Environment are initialized and the AI begins to play the game repeatedly to learn from each experience. It remembers the rewards obtained from each experience and eventually learns to make progress in the game.

For our program, we have set the AI to run for 3000 episodes to give it enough time to learn enough about the game.

In [4]:
while True:
    agent = Agent()  # Currently, the agent is configured with only 2 actions
    env = Environment()
    env.start_Game()
    # Number of episodes
    for i in tqdm.tqdm(range(3000)):  
        print("Starting episode: ", i)
        state, _, _ = env.reset()
        episodeReward = 0
        episodeTime = time.time()
        step_Counter = 0

        while True:
            action = agent.act(state)
            nextState, reward, done = env.step(action)

            if step_Counter > 500:
                for _ in range(3):
                    agent.remember_experiences(state, nextState, action, done, reward, step_Counter)
            elif step_Counter > 50:
                agent.remember_experiences(state, nextState, action, done, reward, step_Counter)

            if env._isDone(state):  # Game over
                for _ in range(8):
                    agent.remember_experiences(state, nextState, action, done, reward, step_Counter)
                print("Done")
                break

            state = nextState
            step_Counter += 1
            episodeReward += reward

        # After each episode
        plotX.append(episodeReward)
        print(episodeReward)
        agent.learn()

        print("Ending episode: ", i)

        if i % 1 == 0:
            agent.model.save_weights("DinoGame.h5")
            print("Model has been saved to disk")


Start
game starting in  4
game starting in  3
game starting in  2
game starting in  1
game starting in  0


  0%|                                                                                         | 0/3000 [00:00<?, ?it/s]

Starting episode:  0
Step:  0
 
Act
[[ 0.00069102 -0.00355709 -0.00115616]]
Step:  0
Act
[[-0.00179401 -0.00348499  0.00024564]]
Step:  2
Act
[[ 0.003185   -0.00248183 -0.00420454]]
Step:  2
 
Act
[[-0.0066451  -0.01848564  0.00083567]]
Step:  2
 
Act
[[ 0.00340073  0.01638164 -0.01131174]]
Step:  1
 
Act
[[ 0.0015539  -0.02293156 -0.0063791 ]]
Step:  2
Act
[[ 0.00491183 -0.00122643 -0.00959862]]
Step:  1
 
Act
[[ 0.0034018   0.00051297 -0.01040369]]
Step:  1
Act
[[ 0.0047557  -0.00525747 -0.00976538]]
Step:  0
Act
[[ 0.00230728 -0.01327455 -0.00358895]]
Step:  2
Act
[[ 0.007258   -0.01169495 -0.01094625]]
Step:  2
 
Act
[[-0.0076182  -0.01616421 -0.00178405]]
Step:  0
 
Act
[[-0.0004052   0.00409503 -0.00073589]]
Step:  2
Act
[[-0.00204677 -0.04556509 -0.01428864]]
Step:  0
 
Act
[[ 0.00629761 -0.0185478  -0.01175026]]
Step:  1
Act
[[ 0.00685117 -0.02151773 -0.01039703]]
Step:  2
Act
[[ 0.00752421 -0.02469858 -0.00954982]]
Step:  1
 
Act
[[ 0.00753435 -0.01913883 -0.0103769 ]]
Step:  

  self.memory.append(np.array([state, nextState, action, reward, done]))


[[-0.01092983  0.0062526  -0.00775859]]
Step:  0
Act
[[ 0.00041182  0.02063331 -0.00410434]]
Step:  2
Act
[[ 0.03867001 -0.05013105 -0.02205394]]
Step:  2
 
Act
[[ 0.03255482 -0.03669397  0.00150096]]
Step:  2
 
Act
[[ 0.05367313 -0.02050873 -0.02329886]]
Step:  2
 
Act
[[ 0.05261634 -0.0419375  -0.02623   ]]
Step:  0
 
Act
[[ 0.08658548 -0.09153372 -0.02782496]]
Step:  1
Act
[[ 0.08658548 -0.09153372 -0.02782496]]
Step:  1
Act
[[ 0.08658548 -0.09153372 -0.02782496]]
Step:  2
Act
[[ 0.08658548 -0.09153372 -0.02782496]]
Step:  0
 
Act
[[ 0.08658548 -0.09153372 -0.02782496]]
Step:  1
Act
[[ 0.06864689 -0.08136866 -0.02014063]]
Step:  1
Act
[[ 0.06838775 -0.0786404  -0.02037485]]
Step:  2
Act
[[ 0.0508524  -0.12152492 -0.02327513]]
Step:  0
 
Act
[[ 0.03371954 -0.0563836  -0.01148448]]
Step:  0
Act
[[ 0.00820333  0.00636436 -0.01360483]]
Step:  2
Act
[[-0.00085136 -0.00640122 -0.00424355]]
Step:  2
 
Act
[[-0.00111513  0.00405136 -0.00509151]]
Step:  2
 
Act
[[ 0.01839517  0.01139731 -0.0

  0%|                                                                                       | 0/3000 [2:11:37<?, ?it/s]


KeyboardInterrupt: 