In [None]:
## Data Aggregation (DAGGER) Model ##################################################################################

# Use DAgger to play catcher game from PLE

# It took 3 iterations before the model had mastered the game!!!

###################################################################################################################

In [4]:
import import_ipynb
import numpy as np
import random
from ple import PLE
import pygame
import pickle 
import os.path
from datetime import datetime
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Reshape, Dense 
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import load_model
from tensorflow.keras.initializers import HeNormal, GlorotNormal

from game import Catcher
from utils import *

#########################################################################
# VARIABLES 

# WORKFLOW
IS_TRAINING = False

# ENVIRONMENT
RENDER = True
NB_FRAMES = 10000
WAIT = 30 

# TRAINING
SAVE = False # True to save user data file
TRAIN = False # True to train the model on data collected
SELECTING = False # True if want to load a file different to the one collected
CUSTOM_FILE = './Expert Data/2020-12-30_18-04-58.txt' # The custom file name 
RESUME = True # True if you want to load the current model and retrain

# MODEL
EPOCHS = 10
LEARNING_RATE = 0.0001

#########################################################################

# SUMMARY #############################################################
# INPUTS: 
    # player x positon
    # player velocity
    # fruits x-position
    # fruits y-position
# OUTPUTS: 
    # prob of staying
    # prob of turning right 
    # prob of turning left    
######################################################################

#---------------------------------------------------------------------
# Want to match the current state to the action
class Behavioural_Cloning():
        
    def __init__(self, lr=LEARNING_RATE):
        self.lr = lr
        self.model = self.create_model()        
        
    def create_model(self):
        model = Sequential()
        model.add(Reshape((4,), input_shape=(4,1)))
        model.add(Dense(100, activation='relu', kernel_initializer=HeNormal(seed=2)))
        model.add(Dense(25, activation='relu', kernel_initializer=HeNormal(seed=2)))
        model.add(Dense(3, activation='softmax', kernel_initializer=GlorotNormal(seed=2)))
        model.compile(optimizer=Adam(learning_rate=self.lr), 
                       metrics=["accuracy"],
                       loss="categorical_crossentropy")
        
        return model
    
# -----------------------------------------------------------------------


# Learning from the expert policy maker
if IS_TRAINING:
    
    # Initialise the environment
    pygame.init()
    game = Catcher(width=256, height=256)
    rand_state = random.randint(1, 100)
    game.rng = np.random.RandomState(rand_state)
    game.clock = pygame.time.Clock()
    game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) # set screen to centre    
    game.init()
    
    inputs = []
    outputs = []
    
    for iter in range(NB_FRAMES):
        
        # sets the wait between frames
        dt = game.clock.tick_busy_loop(WAIT)
        
        state = game.getGameState()
        action = game.getKeyPressed()        
        
        # take a step 
        game.step(dt)
        pygame.display.update() 
        
        # save the data to the lists 
        inputs.append(state)
        outputs.append(action)
        
        if game.game_over() or iter > 1000: 
            pygame.quit()
            
            # how many rounds did the user run for? 
            print('Ran for: ' + str(iter) + ' frames' )
            
            dt_string = None            
            if SAVE:             
                # get current time + date 
                now = datetime.now()
                dt_string = now.strftime("%Y-%m-%d_%H-%M-%S")

                # save the file to pickle
                with open("./Expert Data/" + dt_string + ".txt", "wb") as data1:
                    pickle.dump([inputs, outputs], data1)
            
            if TRAIN and SAVE:
                print('Training the model')
                
                if SELECTING:
                    dataSource = CUSTOM_FILE                  
                
                else:     
                    dataSource = './Expert Data/' + dt_string + '.txt' 
                 
                # Unpickling
                with open(dataSource, "rb") as data1:   
                    expert_data = pickle.load(data1)

                # Format the dataset     
                inputs = expert_data[0] # the inputs 
                outputs = np.array(expert_data[1]).astype(str) # the outputs
                
                # get the split of values
                unique, counts = np.unique(outputs, return_counts=True)
                print(dict(zip(unique, counts)))
                
                # One hot encode data    
                label_encoder = LabelEncoder()
                onehot_encoder = OneHotEncoder(sparse=False)    
                integer_encoded = label_encoder.fit_transform(outputs)
                integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)    
                y = onehot_encoder.fit_transform(integer_encoded).reshape(3, len(integer_encoded))

                # import from utils -> perform preprocessing
                X = process_states(inputs)

                # split the data into training, validation and testing data
                # test: 20%, train: 60%, validation: 20%
                # reshaped again to utilise spliting function correctly
                X_train, X_test, y_train, y_test = train_test_split(X.reshape(X.shape[1], 4), y.reshape(y.shape[1], 3), test_size=0.2, random_state=42) 
                X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
                
                # load in the correct model
                if os.path.isfile('model_imitation_dag_1.h5') and RESUME:
                    model = load_model('model_imitation_dag_1.h5')
                else:
                    model = Behavioural_Cloning().model  
                    
                # add some class weights
                class_weight = {0: 1., 1: 25., 2: 25.}

                # train the model using the selected data source 
                train_res = model.fit(X_train, y_train,
                                      validation_data=(X_val, y_val),
                                      epochs=EPOCHS,
                                      class_weight=class_weight,
                                      verbose=2) 
                
                test_res = model.evaluate(X_test, y_test,
                                          verbose=2)  
                
                print('Training Complete')
                
                # Save the model                
                #model.save('model_imitation_dag_1.h5')
                
                print('Model Saved')
                
            break          
           
        
        
# Testing the apprentice policy maker

else:
    
    # Initialise the environment
    pygame.init()
    game = Catcher(width=256, height=256)    
    rand_state = random.randint(1, 100)
    game.rng = np.random.RandomState(rand_state)
    game.clock = pygame.time.Clock()
    game.screen = pygame.display.set_mode(game.getScreenDims(), 0, 32) # set screen to centre   
    env = PLE(game, fps=60, display_screen=RENDER)
    env.init()
    
    # intialise the model
    model = load_model('./Weights/model_imitation_dag_1.h5')
    
    # get the possible actions
    possible_actions = game.actions 
    
    # for some reason breaks without this    
    env.act(env.NOOP)
    
    # collect the states and the perfect actions from the policy maker 
    inputs = []
    policy_outputs = []
    
    for iter in range(NB_FRAMES):
        
        # sets the wait between frames
        dt = game.clock.tick_busy_loop(WAIT)   
        
        # is the game over?
        if env.game_over(): 
            pygame.quit()
            
            # how many rounds did the user run for?             
            print('Ran for: ' + str(iter) + ' frames' )  
            
            dt_string = None            
            if SAVE:             
                # get current time + date 
                now = datetime.now()
                dt_string = now.strftime("%Y-%m-%d_%H-%M-%S")

                # save the file to pickle
                with open("./Expert Data/" + dt_string + "-Perfect.txt", "wb") as data1:
                    pickle.dump([inputs, policy_outputs], data1)
            
            if TRAIN and SAVE:
                print('Training the model')
                
                if SELECTING:
                    dataSource = CUSTOM_FILE                  
                
                else:     
                    dataSource = './Expert Data/' + dt_string + '-Perfect.txt' 
                 
                # Unpickling
                with open(dataSource, "rb") as data1:   
                    expert_data = pickle.load(data1)

                # Format the dataset     
                inputs = expert_data[0] # the inputs 
                outputs = np.array(expert_data[1]).astype(str) # the outputs
                
                # get the split of values
                unique, counts = np.unique(outputs, return_counts=True)
                unique_values = dict(zip(unique, counts))
                print(unique_values)
                
                # One hot encode data    
                label_encoder = LabelEncoder()
                onehot_encoder = OneHotEncoder(sparse=False)    
                integer_encoded = label_encoder.fit_transform(outputs)
                integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)    
                y = onehot_encoder.fit_transform(integer_encoded).reshape(3, len(integer_encoded))

                # import from utils -> perform preprocessing
                X = process_states(inputs)

                # split the data into training, validation and testing data
                # test: 20%, train: 60%, validation: 20%
                # reshaped again to utilise spliting function correctly
                X_train, X_test, y_train, y_test = train_test_split(X.reshape(X.shape[1], 4), y.reshape(y.shape[1], 3), test_size=0.2, random_state=42) 
                X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25, random_state=42)
                
                # load in the correct model
                if os.path.isfile('./Weights/model_imitation_dag_1.h5') and RESUME:
                    model = load_model('./Weights/model_imitation_dag_1.h5')
                else:
                    model = Behavioural_Cloning().model 
                    
                max_class = max([unique_values['None'], unique_values['left'], unique_values['right']])
                
                # add some class weights
                class_weight = {0: max_class/unique_values['None'],
                                1: max_class/unique_values['left'],
                                2: max_class/unique_values['left']}

                # train the model using the selected data source 
                train_res = model.fit(X_train, y_train,
                                      validation_data=(X_val, y_val),
                                      epochs=EPOCHS,
                                      class_weight=class_weight,
                                      verbose=2) 
                
                test_res = model.evaluate(X_test, y_test,
                                          verbose=2)  
                
                print('Training Complete')
                
                # Save the model                
                model.save('model_imitation_dag_1.h5')
                
                print('Model Saved')
                
            break        
        
        # get the current game state
        state = env.getGameState() 
        
        #  get ideal action
        perfect_action = get_teacher_action(state)         
        
        # decide the current action 
        # state must be in [] for iteration in process states  
        # using model instead of model.predict is so much faster!
        action_prob = model(process_states([state]).reshape(1, len(state)), training=False) 
        selected_action = np.argmax(action_prob[0])
        
        env.act(possible_actions[selected_action])        
        
        # save the data to the lists 
        inputs.append(state)
        policy_outputs.append(perfect_action)
        

KeyboardInterrupt: 