# **1. Crear y explorar environment Space Invaders**

In [None]:
!pip install keras-rl2 gym[atari] autorom[accept-rom-license]

In [None]:
import gym
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Flatten,Conv2D

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy
from rl.callbacks import ModelIntervalCheckpoint
from rl.core import Processor 

In [None]:
env=gym.make('SpaceInvaders-v0')

In [None]:
height,width,channels=env.observation_space.shape

In [None]:
actions=env.action_space.n

In [None]:
env.unwrapped.get_action_meanings()

In [None]:
actions

In [None]:
env.reset()

In [None]:
state_next,reward,done,info=env.step(0)

In [None]:
info

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.imshow(state_next)

In [None]:
plt.imshow(state_next[0:195,:,:])

# **2. Crear modelo CNN + RL**

In [None]:
def buildModel():
    model=Sequential()
    model.add(Conv2D(32, (8,8), strides=(4,4) , activation='relu', input_shape=(4,84,84,1) ))
    model.add(Conv2D(64, (4,4), strides=(2,2) , activation='relu'))
    model.add(Conv2D(64, (3,3), activation='relu')  )
    model.add(Flatten())
    model.add(Dense(512,activation='relu'))
    model.add(Dense(256,activation='relu'))
    model.add(Dense(actions,activation='linear'))
    return model

In [None]:
model=buildModel()

In [None]:
model.summary()

In [None]:
import cv2
def process_img(observation):
    img=observation.copy()
    img=img[0:195,:,:]
    img=cv2.resize(img,(84,84))
    im_gray=cv2.cvtColor(img,cv2.COLOR_RGB2GRAY)
    return np.expand_dims(im_gray,axis=2)/255

In [None]:
class AtariProcessor(Processor):
    def process_observation(self,observation):
        return process_img(observation)
    def process_reward(self,reward):
        return np.clip(reward,-1.,1.)

In [None]:
processor=AtariProcessor()

In [None]:
def buildAgent(model,actions):
    policy=LinearAnnealedPolicy(EpsGreedyQPolicy(),attr='eps', value_max=1.0, value_min=0.5,value_test=0.2, nb_steps=40000)
    memory=SequentialMemory(limit=10000,window_length=4)
    dqn=DQNAgent(model=model, memory=memory,processor=processor,policy=policy,enable_dueling_network=True, dueling_type='avg',nb_actions=actions,nb_steps_warmup=1000)
    return dqn

In [None]:
dqn=buildAgent(model,actions)
dqn.compile(tf.keras.optimizers.Adam(learning_rate=1e-4))
dqn.save_weights('dqn_weights_start')

In [None]:
ckpt_filename='dqn_weights_{step}'
callbacks=[ModelIntervalCheckpoint(ckpt_filename,interval=30000)]

# **3. Entrenamiento**

In [None]:
dqn.fit(env,callbacks=callbacks,nb_steps=40000,visualize=False,verbose=1)

In [None]:
dqn.save_weights("dqn_weights_last")

# **4. Evaluación**

In [None]:
import glob

In [None]:
for ckpt in sorted(glob.glob("/kaggle/working/dqn_weights_*.index")):
    ckpt=ckpt.split('.')[0]
    dqn.load_weights(ckpt)
    print("--------------{}------------".format(ckpt))
    scores=dqn.test(env,nb_episodes=10,visualize=False)
    print("mean reward: {} mean steps: {}".format(np.mean(scores.history['episode_reward']),np.mean(scores.history['nb_steps']) )  ) 

#  **5. Visualización**

In [None]:
checkpoints=sorted(glob.glob("/kaggle/working/dqn_weights_*.index"))

In [None]:
for ckpt in checkpoints:
    rewards=[]
    frames=[]
    for i in range(0,3):
        ckpt=ckpt.split('.')[0]
        dqn.load_weights(ckpt)
        
        done=False
        
        state=env.reset()
        state=[state]*4
        state=[process_img(i) for i in state]
        
        while not done:
            action=np.argmax(dqn.compute_q_values(np.array(state)))
            state_next,reward,done,info=env.step(action)
            state.pop(0)
            state.append(process_img(state_next))
            rewards.append(reward)
            frames.append(state_next)
        print("recompensa para {} episodio {} es {}".format(ckpt,i+1,np.sum(rewards)))
    out=cv2.VideoWriter("{}.mp4".format(ckpt),cv2.VideoWriter_fourcc('m','p','4','v'),60,(width,height))
    for frame in frames:
        img=cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
        out.write(img)
    print("video para {} serializado!".format(ckpt))
    out.release()