<a href="https://colab.research.google.com/github/Zeffar/Machine-Learning/blob/main/CartPole_v0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#simulate virtual display, as Google Colabs do not support OpenAI Gym's visualisation
!pip install gym pyvirtualdisplay > /dev/null 2>&1
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install colabgymrender==1.0.2

In [None]:
#install the needed reinforcement learning modules
!pip install keras-rl2

In [None]:
#@title import modules
import gym
import random
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory
from colabgymrender.recorder import Recorder

In [None]:
#create the enveironment 
env=gym.make('CartPole-v0')
directory = './video'
env = Recorder(env, directory)
states= env.observation_space.shape[0]
actions=env.action_space.n

In [None]:
#@title score testing with random actions
episodes = 2
for episode in range(1, episodes+1):
  state = env.reset()
  done = False
  score = 0 
    
  while not done:
      #env.render()
      action = random.choice([0,1])
      n_state, reward, done, info = env.step(action)
      score+=reward
    
  print('Episode:{} Score:{}'.format(episode, score))
env.play()

Episode:1 Score:17.0
Episode:2 Score:25.0


100%|██████████| 27/27 [00:00<00:00, 254.32it/s]


In [None]:
def build_model(states, actions):
    model = tf.keras.Sequential()
    model.add(Flatten(input_shape=(1,states)))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    return model

In [None]:
model = build_model(states, actions)

In [None]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [None]:
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)
dqn.save_weights('dqn_weights.h5f', overwrite=True)
del model
del dqn
del env

In [None]:
env.play()

100%|█████████▉| 201/202 [00:00<00:00, 274.71it/s]


In [None]:
#@title loading and testing the trained model
env=gym.make('CartPole-v0')
directory = './video'
env = Recorder(env, directory)
states= env.observation_space.shape[0]
actions=env.action_space.n
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

dqn.load_weights('dqn_weights.h5f')

In [None]:
_=dqn.test(env, nb_episodes=50, visualize=False)


Testing for 50 episodes ...
Episode 1: reward: 200.000, steps: 200
Episode 2: reward: 200.000, steps: 200
Episode 3: reward: 200.000, steps: 200
Episode 4: reward: 200.000, steps: 200
Episode 5: reward: 200.000, steps: 200
Episode 6: reward: 200.000, steps: 200
Episode 7: reward: 200.000, steps: 200
Episode 8: reward: 200.000, steps: 200
Episode 9: reward: 200.000, steps: 200
Episode 10: reward: 200.000, steps: 200
Episode 11: reward: 200.000, steps: 200
Episode 12: reward: 200.000, steps: 200
Episode 13: reward: 200.000, steps: 200
Episode 14: reward: 200.000, steps: 200
Episode 15: reward: 200.000, steps: 200
Episode 16: reward: 200.000, steps: 200
Episode 17: reward: 200.000, steps: 200
Episode 18: reward: 200.000, steps: 200
Episode 19: reward: 200.000, steps: 200
Episode 20: reward: 200.000, steps: 200
Episode 21: reward: 200.000, steps: 200
Episode 22: reward: 200.000, steps: 200
Episode 23: reward: 200.000, steps: 200
Episode 24: reward: 200.000, steps: 200
Episode 25: reward: 2

The AI got a perfect score 49/50 times after 5000 generations. With 50000 generations, I couldn't find a playthrough in which it doesn't get a perfect score (over 1000 games played)