In [1]:
import import_ipynb
import tensorflow as tf
from tensorflow.keras import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import tensorflow_probability as tfp
import os
import random 

from Utils import *

# Code for this section used the following source for reference: 
# https://blog.floydhub.com/spinning-up-with-deep-reinforcement-learning/

class Model():
    
    def __init__(self):        
        self.model = self.create_model()
    
    def create_model(self):
        model = Sequential()
        model.add(Dense(8, activation='relu', kernel_initializer='he_normal'))
        model.add(Dense(1, activation='sigmoid'))
        model.compile(loss='binary_crossentropy', optimizer=Adam(lr=0.01), metrics=['accuracy'])
        
        return model
    
class backup_agent():
    
    def __init__(self, gamma):
        self.gamma = gamma
        self.model = Model().model
        self.dims = 4
        
    def act(self, state):   
        prob = self.model(np.array(state).reshape(1, self.dims), training=False)
        action = 1 if np.random.uniform() < prob else 0        
        
        return action
        
    def train(self, states, rewards, actions):
        r = np.array(rewards)
        discounted_r = np.zeros_like(r)
        running_add = 0
        for t in reversed(range(0, r.size)):
            if r[t] != 0: 
                running_add = 0
            running_add = running_add * self.gamma + r[t] 
            discounted_r[t] = running_add
        discounted_r -= np.mean(discounted_r) 
        discounted_r /= np.std(discounted_r) 
        
        seed = random.randint(1, 1000)
        random.Random(seed).shuffle(discounted_r)
        random.Random(seed).shuffle(rewards)
        random.Random(seed).shuffle(actions)
                
        x_train = np.array(states).reshape(len(actions), self.dims)
        y_train = np.array(actions).reshape(len(actions), 1)
        self.model.fit(x_train, y=y_train,
                       verbose=1,
                       sample_weight=discounted_r)     
        
            
    def save_model(self, name):
        self.model.save_weights(name)
        
    def load_model(self, name):
        if os.path.isfile(name):
            print('Model Loaded')
            self.model.load_weights(name)
    

importing Jupyter notebook from Utils.ipynb
