# Agente Street Fighter 2 

## Configurar Entorno

### Importar librerias

In [1]:
# Filtramos los warnings
import warnings
warnings.filterwarnings("ignore")

# Forzar uso CPU en caso de dispones GPU en el PC
import os 
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

# instalar librerias
#!pip3 install gym==0.17.2 gym-retro==0.8.0 --user
#!pip3 install tensorflow==2.3.0
#!pip3 install keras-rl2==1.0.4 keras==2.8.0

# Import retro para crear el entorno Street Fighter a partir de la ROM
import retro

# Import Time para relentizar el juego
import time

# Import Clase base del entorno para hacer wrapper
from gym import Env

# Import los shapes espaciales para el entorno
from gym.spaces import MultiBinary, Box

# Import numpy para calcular el frame delta
import numpy as np

# Import opencv para aplicar la escala de grises
import cv2

# Import matplotlib para visualizar la imagen
from matplotlib import pyplot as plt

# Import generación modelo
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Convolution2D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import average

# Construcción del agente
from rl.agents import DQNAgent
from rl.policy import LinearAnnealedPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory

from tqdm import tqdm

### Prueba entorno Street Fighter 2

In [None]:
env = StreetFighter("Champion.Level1.RyuVsGuile")

obs = env.reset()
done = False

for game in range(1):
    while not done:
        if(done):
            obs = env.reset()

        env.render()
        obs, reward, done, info = env.step(env.action_space.sample())
        #time.sleep(0.00001)
        print(reward)
        
env.close()

### Configuración del Entorno

In [2]:
# Creamos una clase para definir el entorno de SF2
class StreetFighter(Env):
    def __init__(self, state):
        super().__init__()
        # Especificar el espacio de acciones y el espacio de observaciones
        self.observation_space = Box(low=0, 
                                     high=255, 
                                     shape=(84, 84, 1), 
                                     dtype=np.uint8)
        state
        # Instanciar el entorno
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis',
                               state=state,
                               use_restricted_actions=retro.Actions.DISCRETE)
        
        self.action_space = MultiBinary(self.game.action_space.n)
                
    def reset(self):
        
        # Devolvemos el primer frame
        obs = self.game.reset()
         
        # Preprocess
        obs = self.preprocess(obs)
        self.previous_frame = obs
        
        # Inicializar atributo para la diferencia de la puntuación
        self.score = 0
        
        return obs
    
    def preprocess(self, observation):
        # Aplicamos el redimensionado del frame y el escalado de grises
        # Escalado de grises
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Redimensionado del frame
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Añadir el valor de los canales
        channel = np.reshape(resize, (84,84,1))
        
        return channel
    
    def step(self, action):
        # Realizar una acción
        obs, reward, done, info = self.game.step(action)
        
        # Procesamos la observación
        obs = self.preprocess(obs)
        
        # Calcular frame delta (Variación en frame anterior y actual)
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs
        
        # Adaptamos la función de recompensa
        reward = info['score'] - self.score
        self.score = info['score']
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()

In [3]:
states = [
    "Champion.Level1.RyuVsGuile",
    "Champion.Level2.RyuVsBlanka",
    "Champion.Level3.RyuVsRyu",
    "Champion.Level4.RyuVsKen",
    "Champion.Level5.RyuVsChunLi",
    "Champion.Level6.RyuVsZangief",
    "Champion.Level8.RyuVsDhalsim",
    "Champion.Level9.RyuVsHonda",
    "Champion.Level11.RyuVsBalrog",
    "Champion.Level13.RyuVsVega",
    "Champion.Level14.RyuVsSagat",
    "Champion.Level15.RyuVsBison"
]

## Creación del modelo

La arquitectura de la DQN se compondrá de dos partes:
 * Visión por computación: reconocimiento de imágenes.
 * Regresión: determinar los valores Q de cada acción.

In [4]:
# Definición de la arquitectura de la DQN
def build_DQN(input_shape, output_shape):
    height, width, channels = input_shape
    model = Sequential()
    
    # Bloque Capas convolucionales
    model.add(Convolution2D(filters=32, kernel_size=(8,8), strides=(4,4), activation="relu", input_shape=(1, height, width, channels)))
    model.add(Convolution2D(filters=64, kernel_size=(4,4), strides=(2,2), activation="relu"))
    model.add(Convolution2D(filters=64, kernel_size=(3,3), strides=(1,1), activation="relu"))
    
    model.add(Flatten())
    
    # Bloque Capas lineales para determinar el Q-valor 
    model.add(Dense(units=512, activation='relu'))
    model.add(Dense(units=output_shape, activation='linear'))
    
    return model

## Creación del Agente

In [5]:
# Definición del Agente
def build_DQN_Agent(model, actions):
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.2, nb_steps=10000)
    memory = SequentialMemory(limit=10000, window_length=1)
    agent = DQNAgent(model=model,
                     memory=memory,
                     policy=policy,
                     enable_dueling_network=True,
                     dueling_type='avg',
                     nb_actions=actions,
                     nb_steps_warmup=1000)
    
    return agent

In [6]:
# Definición del Agente
def load_DQN_Agent(agent, actions):
    policy = agent.policy
    memory = agent.memory
    agent = DQNAgent(model=agent.model,
                     memory=memory,
                     policy=policy,
                     enable_dueling_network=True,
                     dueling_type='avg',
                     nb_actions=actions,
                     nb_steps_warmup=1000)
    
    return agent

### Guardar modelo

In [7]:
def save_agent(agent_toSave, countModel, path):
    
    agent_pathName = path + '/agent_'+str(countModel)+'.h5f'
    print(agent_pathName)
    agent_toSave.save_weights(agent_pathName, overwrite=True)

### Entrenamiento del agente

In [8]:
def train_DQN_agent(states_train, num_iteration, agent, lr):
    
    score_baseModel = 0
    
    if(num_iteration > 1):
        loadModel_path = "./test/historyModel/iter"+str(num_iteration-1)+"/agent_1.h5f"

        env = StreetFighter("Champion.Level1.RyuVsGuile")

        agent.load_weights(loadModel_path)

        score_baseModel = list(agent.test(env, nb_episodes=1, visualize=True).history.values())[0][0]

        env.close()
        del env
        
    for state in states:
        for i in range(1,4):
            
            path = "./train/"+state.split('.')[2] 

            if not os.path.exists(path):
                os.mkdir(path)
            
            env = StreetFighter(state)
            
            if(num_iteration == 1):

                model = build_DQN(env.observation_space.shape, env.action_space.n)
                agent = build_DQN_Agent(model, env.action_space.n)
            else:
                agent.load_weights(loadModel_path)
            
            agent.compile(Adam(), metrics=['mae'])
            agent.fit(env, nb_steps=20000, nb_max_episode_steps=20000, visualize=False, verbose=2)

            save_agent(agent,i,path)

            env.close()
            del env
    return agent, score_baseModel

In [9]:
def train_agent(states_train, num_iteration, agent, lr):
    
    score_baseModel = 0
    
    if(num_iteration > 1):
        loadModel_path = "./test/historyModel/iter"+str(num_iteration-1)+"/agent_1.h5f"

        env = StreetFighter("Champion.Level1.RyuVsGuile")

        agent.load_weights(loadModel_path)

        score_baseModel = list(agent.test(env, nb_episodes=1, visualize=True).history.values())[0][0]

        env.close()
        del env
    
    elif(num_iteration == 1):
                
        del agent
        model = build_DQN(env.observation_space.shape, env.action_space.n)
        agent = build_DQN_Agent(model, env.action_space.n)

    for state in states:
        for i in range(1,2):
            
            path = "./train/"+state.split('.')[2] 

            if not os.path.exists(path):
                os.mkdir(path)
            
            env = StreetFighter(state)
            
            agent.compile(Adam(), metrics=['mae'])
            agent.fit(env, nb_steps=50000, nb_max_episode_steps=50000, visualize=False, verbose=2)

            env.close()
            del env
            
            isBetter_agent, score_newModel = test_agent(state, agent, score_baseModel)
            
            if(isBetter_agent):
                save_agent(agent,i,path)
                score_baseModel = score_newModel
            else:
                agent.load_weights(loadModel_path)
            
    return agent, score_baseModel

In [10]:
models_weights = []

env = StreetFighter("Champion.Level1.RyuVsGuile")

agent.load_weights("./train/RyuVsDhalsim/agent_3.h5f")
agent.test(env, nb_episodes=1, visualize=True)

models_weights.append(agent.model.get_weights())

save_path="./test/historyModel/iter8"
save_agent(agent, 1, save_path)

agent.load_weights(save_path+"/agent_1.h5f")
agent.test(env, nb_episodes=1, visualize=True)


env.close()
del env

NameError: name 'agent' is not defined

## Test del agente

In [10]:
def generate_modelFinal(states, model_states, num_iteration, agent):
    
    models = []
    saveModel_path = "./test/historyModel"
    
    if not os.path.exists(saveModel_path):
        os.mkdir(saveModel_path)
    if not os.path.exists(saveModel_path+"/iter"+str(num_iteration)):   
        os.mkdir(saveModel_path+"/iter"+str(num_iteration))
            
    for model in model_states.items():
        models.append(model[1])
    
    model_final = np.mean(models, axis=0)

    env = StreetFighter("Champion.Level1.RyuVsGuile")

    agent.model.set_weights(model_final)

    print("\n\n\nFinal Test")

    score = agent.test(env, nb_episodes=1, visualize=True)

    save_agent(agent,1,saveModel_path+"/iter"+str(num_iteration))

    env.close()
    del env

In [11]:
def test_agent(state, agent, score_min):
    isBetter_agent = False
    env = StreetFighter(states[0])
    
    score = agent.test(env, nb_episodes=1, visualize=False)
    
    if(list(score.history.values())[0][0] > score_min):
        isBetter_agent = True
    else:
        score = score_min
        
    env.close()
    del env
    return isBetter_agent,score

In [12]:
def test_DQN_agent(states_test, num_iteration, min_value, agent):
    model_states = {}
    
    print("Test Iteración "+str(num_iteration))
    
    for state in states_test:
        
        model_path = "./train/"+state.split('.')[2]
        models = []
        key_score = state.split('.')[2]
        
        if not os.path.exists(model_path):
            os.mkdir(model_path)
        
        for i in range(1,2):
            agent_path = model_path+'/agent_'+str(i)+'.h5f'

            env = StreetFighter("Champion.Level1.RyuVsGuile")
            
            agent.load_weights(agent_path)
                        
            score = agent.test(env, nb_episodes=1, visualize=False)
                                        
            if((list(score.history.values())[0][0] > min_value)):           
                models.append(agent.model.get_weights())
            
            env.close()
            del env
        if(len(models)>0):
            model_merged = np.mean(models, axis=0)
            model_states[key_score] = model_merged 
    if(len(models)>0):
        for model in model_states.items():

            saveModel_path = "./test/"+model[0]

            if not os.path.exists(saveModel_path):
                os.mkdir(saveModel_path)
                os.mkdir(saveModel_path+"/iter"+str(num_iteration))

            if os.path.isdir(saveModel_path+"/iter"+str(num_iteration)):

                agent.model.set_weights(model_states[model[0]])

                env = StreetFighter("Champion.Level1.RyuVsGuile")

                score = agent.test(env, nb_episodes=1, visualize=False)

                save_agent(agent,1,saveModel_path+"/iter"+str(num_iteration))

                print("El modelo testeado del estado {}".format(model[0])
                      +"\nse ha almacenado en el path {}".format(saveModel_path))

                env.close()
                del env
        generate_modelFinal(states_test, model_states, num_iteration, agent)
    else:
        print("No se ha generado un modelo con puntuación mayor a {} en fase de test".format(min_value))

## Generación del agente

In [26]:
env = StreetFighter("Champion.Level1.RyuVsGuile")
model = build_DQN(env.observation_space.shape, env.action_space.n)
agent = build_DQN_Agent(model, env.action_space.n)
agent.compile(Adam(), metrics=['mae'])
agent.fit(env, nb_steps=1000, nb_max_episode_steps=1000, visualize=False, verbose=2)
env.close()
del env

print("Iniciando entrenamiento número 8")

agent,score_min = train_DQN_agent(states, 8, agent, 1e-7)

print("Iniciando fase de test del entrenamiento número 8")
test_DQN_agent(states, 8, score_min, agent)

print("Finalizada fase de test del entrenamiento número 8")

Training for 1000 steps ...
 1000/1000: episode: 1, duration: 6.946s, episode steps: 1000, steps per second: 144, episode reward: 1500.000, mean reward:  1.500 [ 0.000, 500.000], mean action: 62.300 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 6.947 seconds
Iniciando entrenamiento número 8
Testing for 1 episodes ...
Episode 1: reward: 183400.000, steps: 30643


Exception ignored in: <function SimpleImageViewer.__del__ at 0x0000021F58B42CA8>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x0000021F944A6F98; to 'Win32Window' at 0x0000021F944AFF88>,)


Training for 20000 steps ...
  5158/20000: episode: 1, duration: 242.412s, episode steps: 5158, steps per second:  21, episode reward: 20700.000, mean reward:  4.013 [ 0.000, 10000.000], mean action: 61.945 [0.000, 125.000],  loss: 4499.412228, mae: 125.958039, mean_q: 215.297339, mean_eps: 0.722890
  9630/20000: episode: 2, duration: 241.054s, episode steps: 4472, steps per second:  19, episode reward: 4100.000, mean reward:  0.917 [ 0.000, 500.000], mean action: 62.786 [0.000, 125.000],  loss: 714.028663, mae: 101.891900, mean_q: 159.603743, mean_eps: 0.334585
 13934/20000: episode: 3, duration: 230.239s, episode steps: 4304, steps per second:  19, episode reward: 2800.000, mean reward:  0.651 [ 0.000, 1000.000], mean action: 58.232 [0.000, 125.000],  loss: 370.331981, mae: 108.773868, mean_q: 155.692405, mean_eps: 0.101435
done, took 1044.096 seconds
./train/RyuVsRyu/agent_1.h5f
Training for 20000 steps ...
  3712/20000: episode: 1, duration: 165.965s, episode steps: 3712, steps per

In [16]:
env.close()
del env

## Analisis de la evolución del agente

In [13]:
print('Análisis de la evolución del agente: \n\n\n')
for i in range(1,8):
    
    path_agent = "./test/historyModel/iter"+str(i)+"/agent_1.h5f"
    
    env = StreetFighter("Champion.Level1.RyuVsGuile")
    model = build_DQN(env.observation_space.shape, env.action_space.n)
    agent = build_DQN_Agent(model, env.action_space.n)
    agent.compile(Adam(), metrics=['mae'])
    agent.fit(env, nb_steps=1000, nb_max_episode_steps=1000, visualize=False, verbose=2)
    env.close()
    del env
    
    print('Agente generado en la iteración {}.\n'.format(i))
    env = StreetFighter("Champion.Level1.RyuVsGuile")
    
    agent.load_weights(path_agent)
    
    agent.test(env, nb_episodes=1, visualize=True)
    
    del agent
    env.close()
    del env
    
    time.sleep(10)

Análisis de la evolución del agente: 



Training for 1000 steps ...
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
 1000/1000: episode: 1, duration: 3.704s, episode steps: 1000, steps per second: 270, episode reward: 2500.000, mean reward:  2.500 [ 0.000, 1000.000], mean action: 62.007 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 3.709 seconds
Agente generado en la iteración 1.

Testing for 1 episodes ...
Episode 1: reward: 16100.000, steps: 7058


Exception ignored in: <function SimpleImageViewer.__del__ at 0x0000024BD2943B88>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x0000024BD295D598; to 'Win32Window' at 0x0000024BD1FB0188>,)


Training for 1000 steps ...
 1000/1000: episode: 1, duration: 4.029s, episode steps: 1000, steps per second: 248, episode reward: 2300.000, mean reward:  2.300 [ 0.000, 1000.000], mean action: 61.381 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 4.032 seconds
Agente generado en la iteración 2.

Testing for 1 episodes ...
Episode 1: reward: 15400.000, steps: 7341


Exception ignored in: <function SimpleImageViewer.__del__ at 0x0000024BD2943B88>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x0000024BD97E9F98; to 'Win32Window' at 0x0000024BDF6384C8>,)


Training for 1000 steps ...
 1000/1000: episode: 1, duration: 3.886s, episode steps: 1000, steps per second: 257, episode reward: 600.000, mean reward:  0.600 [ 0.000, 500.000], mean action: 62.206 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 3.890 seconds
Agente generado en la iteración 3.

Testing for 1 episodes ...
Episode 1: reward: 33800.000, steps: 9028


Exception ignored in: <function SimpleImageViewer.__del__ at 0x0000024BD2943B88>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x0000024BDF6281D8; to 'Win32Window' at 0x0000024BD97E0788>,)


Training for 1000 steps ...
 1000/1000: episode: 1, duration: 4.002s, episode steps: 1000, steps per second: 250, episode reward: 2500.000, mean reward:  2.500 [ 0.000, 1000.000], mean action: 62.592 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 4.005 seconds
Agente generado en la iteración 4.

Testing for 1 episodes ...
Episode 1: reward: 35900.000, steps: 11317


Exception ignored in: <function SimpleImageViewer.__del__ at 0x0000024BD2943B88>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x0000024BE0011408; to 'Win32Window' at 0x0000024BE0000988>,)


Training for 1000 steps ...
 1000/1000: episode: 1, duration: 4.061s, episode steps: 1000, steps per second: 246, episode reward: 3200.000, mean reward:  3.200 [ 0.000, 1000.000], mean action: 64.689 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 4.064 seconds
Agente generado en la iteración 5.

Testing for 1 episodes ...
Episode 1: reward: 47200.000, steps: 12560


Exception ignored in: <function SimpleImageViewer.__del__ at 0x0000024BD2943B88>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x0000024BDF463AE8; to 'Win32Window' at 0x0000024BE21AEEC8>,)


Training for 1000 steps ...
 1000/1000: episode: 1, duration: 3.961s, episode steps: 1000, steps per second: 252, episode reward: 500.000, mean reward:  0.500 [ 0.000, 500.000], mean action: 61.742 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 3.964 seconds
Agente generado en la iteración 6.

Testing for 1 episodes ...
Episode 1: reward: 82600.000, steps: 9228


Exception ignored in: <function SimpleImageViewer.__del__ at 0x0000024BD2943B88>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x0000024BE21A3A98; to 'Win32Window' at 0x0000024BE21A0D08>,)


Training for 1000 steps ...
 1000/1000: episode: 1, duration: 4.201s, episode steps: 1000, steps per second: 238, episode reward: 2000.000, mean reward:  2.000 [ 0.000, 1000.000], mean action: 61.488 [0.000, 125.000],  loss: --, mae: --, mean_q: --, mean_eps: --
done, took 4.204 seconds
Agente generado en la iteración 7.

Testing for 1 episodes ...
Episode 1: reward: 183400.000, steps: 30643


In [18]:
env = StreetFighter("Champion.Level1.RyuVsGuile")

Exception ignored in: <function SimpleImageViewer.__del__ at 0x000001BF9BCB1678>
Traceback (most recent call last):
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 379, in __del__
    self.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\gym\envs\classic_control\rendering.py", line 375, in close
    self.window.close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\win32\__init__.py", line 299, in close
    super(Win32Window, self).close()
  File "C:\Users\javie\AppData\Roaming\Python\Python37\site-packages\pyglet\window\__init__.py", line 823, in close
    app.windows.remove(self)
  File "C:\Users\javie\anaconda3\envs\Python37_RL_streetFighter\lib\_weakrefset.py", line 109, in remove
    self.data.remove(ref(item))
KeyError: (<weakref at 0x000001BF9BCC6408; to 'Win32Window' at 0x000001BF9B546BC8>,)


In [21]:
time.sleep(10)
path_agent = "./test/historyModel/iter1/agent_1.h5f"
agent.load_weights(path_agent)
    
agent.test(env, nb_episodes=1, visualize=True)

path_agent = "./test/historyModel/iter4/agent_1.h5f"
agent.load_weights(path_agent)
    
agent.test(env, nb_episodes=1, visualize=True)

path_agent = "./test/historyModel/iter7/agent_1.h5f"
agent.load_weights(path_agent)
    
agent.test(env, nb_episodes=1, visualize=True)


Testing for 1 episodes ...
Episode 1: reward: 16100.000, steps: 7058
Testing for 1 episodes ...
Episode 1: reward: 35900.000, steps: 11317
Testing for 1 episodes ...
Episode 1: reward: 183400.000, steps: 30643


<tensorflow.python.keras.callbacks.History at 0x1bf9e6f3908>