#### Trying out OpenAI Gym

In [None]:
import gym
env = gym.make('MountainCar-v0', render_mode = 'human')
observation, info = env.reset(seed=42)

for _ in range(1000):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    env.render()

    if terminated or truncated:
        observation, info = env.reset()
env.close()

#### Training a Sarsa(lambda) agent on 2D Mountain Car

In [3]:
import GAME.envs.mountain_car
import gym
from GAME.agents.sarsa_lambda import SarsaLambdaCMAC2DMountainCar

In [5]:
env = gym.make('MountainCar2D-v0', render_mode = 'human')
env._max_episode_steps = 4000
observation, info = env.reset(seed=42)

# agent
alpha = 1.2
lamb = 0.95
gamma = 1
method = 'replacing'
epsilon = 0
num_of_tilings = 8
max_size = 2048
agent = SarsaLambdaCMAC2DMountainCar(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)

# experiment parameters
max_episodes = 50
total_steps = 0 # eval metric
update_agent = True
debug = True

for ep in range(max_episodes):
    steps = 0
    while True:
        # current state
        current_state = observation # [x, x_dot]
        action = agent.choose_action_eps_greedy(current_state)
        # next state
        observation, reward, terminated, truncated, info = env.step(action)
        # next action
        next_action = agent.choose_action_eps_greedy(observation)
        # env.render()

        # update agent
        if update_agent:
            target = reward + agent.get_value(observation, next_action)
            active_tiles = agent.get_active_tiles(current_state, action)
            agent.update(active_tiles, target)

        # prep the next iteration
        steps += 1

        # reset the training
        if terminated or truncated:
            observation, info = env.reset()
            total_steps += steps
            if debug:
                print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
            break

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))

Episode: 0, Number of steps: 2280, Total steps: 2280
Episode: 1, Number of steps: 2235, Total steps: 4515
Episode: 2, Number of steps: 1096, Total steps: 5611
Episode: 3, Number of steps: 669, Total steps: 6280
Episode: 4, Number of steps: 494, Total steps: 6774
Episode: 5, Number of steps: 351, Total steps: 7125
Episode: 6, Number of steps: 263, Total steps: 7388
Episode: 7, Number of steps: 185, Total steps: 7573
Episode: 8, Number of steps: 280, Total steps: 7853
Episode: 9, Number of steps: 317, Total steps: 8170
Episode: 10, Number of steps: 264, Total steps: 8434
Episode: 11, Number of steps: 266, Total steps: 8700
Episode: 12, Number of steps: 244, Total steps: 8944
Episode: 13, Number of steps: 195, Total steps: 9139
Episode: 14, Number of steps: 192, Total steps: 9331
Episode: 15, Number of steps: 149, Total steps: 9480
Episode: 16, Number of steps: 155, Total steps: 9635
Episode: 17, Number of steps: 135, Total steps: 9770
Episode: 18, Number of steps: 164, Total steps: 9934


#### Saving training information

In [6]:
import pickle

In [8]:
# save the agent
path = "C:\\Users\\minhh\\Documents\\JHU\\Fall 2022\\Evolutionary and Swarm Intelligence\\src\\GAME\\pickle\\10242022 Initial Experiments with 2D MC and Sarsa\\"
agent_filename = 'agent_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)
agent_weights_filename = 'weights_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)
agent_hash_filename = 'hash_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)
agent_z_filename = 'z_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)

with open(path + agent_filename, 'wb') as f:
    pickle.dump(agent, f)
with open(path + agent_weights_filename, 'wb') as f:
    pickle.dump(agent.weights, f)
with open(path + agent_hash_filename, 'wb') as f:
    pickle.dump(agent.hash_table, f)
with open(path + agent_z_filename, 'wb') as f:
    pickle.dump(agent.z, f)

In [10]:
with open(path + agent_filename, 'rb') as f:
    agent2 = pickle.load(f)

In [11]:
env = gym.make('MountainCar2D-v0', render_mode = 'human')
env._max_episode_steps = 4000
observation, info = env.reset(seed=42)

# experiment parameters
max_episodes = 1
total_steps = 0 # eval metric
update_agent = False
debug = True

for ep in range(max_episodes):
    steps = 0
    while True:
        # current state
        current_state = observation # [x, x_dot]
        action = agent2.choose_action_eps_greedy(current_state)
        # next state
        observation, reward, terminated, truncated, info = env.step(action)
        # next action
        next_action = agent2.choose_action_eps_greedy(observation)
        # env.render()

        # update agent
        if update_agent:
            target = reward + agent2.get_value(observation, next_action)
            active_tiles = agent2.get_active_tiles(current_state, action)
            agent2.update(active_tiles, target)

        # prep the next iteration
        steps += 1

        # reset the training
        if terminated or truncated:
            observation, info = env.reset()
            total_steps += steps
            if debug:
                print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
            break

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))

Episode: 0, Number of steps: 105, Total steps: 105
Average steps per episode: 105.0


In [12]:
with open(path + agent_weights_filename, 'rb') as f:
    agent_weights = pickle.load(f)
with open(path + agent_hash_filename, 'rb') as f:
    agent_hash_tab = pickle.load(f)
with open(path + agent_z_filename, 'rb') as f:
    agent_z = pickle.load(f)

In [13]:
env = gym.make('MountainCar2D-v0', render_mode = 'human')
env._max_episode_steps = 4000
observation, info = env.reset(seed=42)

# agent
alpha = 1.2
lamb = 0.95
gamma = 1
method = 'replacing'
epsilon = 0
num_of_tilings = 8
max_size = 2048
agent3 = SarsaLambdaCMAC2DMountainCar(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)
agent3.weights = agent_weights
agent3.hash_table = agent_hash_tab
agent3.z = agent_z

# experiment parameters
max_episodes = 1
total_steps = 0 # eval metric
update_agent = False
debug = True

for ep in range(max_episodes):
    steps = 0
    while True:
        # current state
        current_state = observation # [x, x_dot]
        action = agent3.choose_action_eps_greedy(current_state)
        # next state
        observation, reward, terminated, truncated, info = env.step(action)
        # next action
        next_action = agent3.choose_action_eps_greedy(observation)
        # env.render()

        # update agent
        if update_agent:
            target = reward + agent3.get_value(observation, next_action)
            active_tiles = agent3.get_active_tiles(current_state, action)
            agent3.update(active_tiles, target)

        # prep the next iteration
        steps += 1

        # reset the training
        if terminated or truncated:
            observation, info = env.reset()
            total_steps += steps
            if debug:
                print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
            break

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))

Episode: 0, Number of steps: 105, Total steps: 105
Average steps per episode: 105.0


#### Collecting samples for 2D MC

In [1]:
import GAME.envs.mountain_car
import gym
from GAME.agents.sarsa_lambda import SarsaLambdaCMAC2DMountainCar
from GAME.utils.helper_funcs import *

In [2]:
env = gym.make('MountainCar2D-v0', render_mode = 'human')
env._max_episode_steps = 3000
observation, info = env.reset(seed=42)

# agent
alpha = 1.2
lamb = 0.95
gamma = 1
method = 'replacing'
epsilon = 0
num_of_tilings = 8
max_size = 2048
agent = SarsaLambdaCMAC2DMountainCar(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)

# experiment parameters
max_episodes = 50
total_steps = 0 # eval metric
update_agent = True
debug = True

# data collector
save_every = 10
agent_info = SarsaLambdaAgentInfo(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)
experiment_info = ExperimentInfo('MountainCar2D-v0', env._max_episode_steps, 42, max_episodes, 'SarsaLambda')
data_column_names = ['Episode', 'Step', 'Current_x_position', 'Current_x_velocity', 'Current_action', 'Reward', 'Next_x_position', 'Next_x_velocity', 'Next_action']
data_column_dtypes = ['int', 'int', 'float', 'float', 'int', 'int', 'float', 'float', 'int']
data_collector = RLSamplesCollector(experiment_info, agent_info, data_column_names, data_column_dtypes)
path = "C:\\Users\\minhh\\Documents\\JHU\\Fall 2022\\Evolutionary and Swarm Intelligence\\src\\GAME\\output\\10242022 Initial Samples Collection for 2D MC\\"
file_name = 'test.csv'
data_collector.write_metadata(path, 'test_metadata.txt')

In [3]:
for ep in range(max_episodes):
    steps = 0
    while True:
        try:
            # current state
            current_state = observation # [x, x_dot]
            action = agent.choose_action_eps_greedy(current_state)
            # next state
            observation, reward, terminated, truncated, info = env.step(action)
            # next action
            next_action = agent.choose_action_eps_greedy(observation)
            # env.render()

            # update agent
            if update_agent:
                target = reward + agent.get_value(observation, next_action)
                active_tiles = agent.get_active_tiles(current_state, action)
                agent.update(active_tiles, target)

            # save data
            data_dict = {
                col_name : col_data for col_name, col_data in zip(data_column_names, [ep, steps, current_state[0], current_state[1], action, reward, observation[0], observation[1], next_action])
            }
            data_collector.log_data(data_dict)

            # prep the next iteration
            steps += 1

            # reset the training
            if terminated or truncated:
                observation, info = env.reset()
                total_steps += steps
                if debug:
                    print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
                break
        except KeyboardInterrupt:
            env.close()
            data_collector.export_data(path, file_name)
    # save data every few iterations
    if save_every and (ep % save_every == 0):
        data_collector.export_data(path, file_name)

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))
data_collector.export_data(path, file_name)

Episode: 0, Number of steps: 1040, Total steps: 1040
Episode: 1, Number of steps: 1305, Total steps: 2345
Episode: 2, Number of steps: 1048, Total steps: 3393
Episode: 3, Number of steps: 784, Total steps: 4177
Episode: 4, Number of steps: 958, Total steps: 5135
Episode: 5, Number of steps: 286, Total steps: 5421
Episode: 6, Number of steps: 1447, Total steps: 6868
Episode: 7, Number of steps: 860, Total steps: 7728
Episode: 8, Number of steps: 545, Total steps: 8273
Episode: 9, Number of steps: 726, Total steps: 8999
Episode: 10, Number of steps: 392, Total steps: 9391
Episode: 11, Number of steps: 344, Total steps: 9735
Episode: 12, Number of steps: 172, Total steps: 9907
Episode: 13, Number of steps: 165, Total steps: 10072
Episode: 14, Number of steps: 114, Total steps: 10186
Episode: 15, Number of steps: 145, Total steps: 10331
Episode: 16, Number of steps: 146, Total steps: 10477
Episode: 17, Number of steps: 133, Total steps: 10610
Episode: 18, Number of steps: 177, Total steps: