#### Trying out OpenAI Gym

In [4]:
import gym
env = gym.make('Pendulum-v1', g=9.81, render_mode=None)
observation, info = env.reset(seed=42)

for _ in range(1):
    action = env.action_space.sample()
    observation, reward, terminated, truncated, info = env.step(action)
    # print(observation)
    env.render()

    if terminated or truncated:
        observation, info = env.reset()
env.close()

[-0.70880616]


  gym.logger.warn(


#### Training a Sarsa(lambda) agent on 2D Mountain Car

In [1]:
import gym
from GAME.agents.sarsa_lambda import SarsaLambdaCMACPendulum
from GAME.utils.config import config
import numpy as np

In [3]:
env = gym.make('Pendulum-v1', g=9.81, render_mode=None)
env._max_episode_steps = 5000
observation, info = env.reset(seed=42)

# agent
# alpha = 1.2
# lamb = 0.95
# gamma = 1
# method = 'replacing'
# epsilon = 0
# num_of_tilings = 8
# max_size = 2048
# agent = SarsaLambdaCMAC2DMountainCar(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)
alpha = 0.5
lamb = 0.95
gamma = 1
method = 'replacing'
epsilon = 0.1
eps_decay = 0.99
num_of_tilings = 14
max_size = 2048
agent = SarsaLambdaCMACPendulum(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)

# experiment parameters
max_episodes = 2000
total_reward = 0 # eval metric
update_agent = True
debug = True

for ep in range(max_episodes):
    ep_reward = 0
    while True:
        # current state
        current_state = observation # [cos(th), sin(th), thdot]
        action = agent.choose_action_eps_greedy(current_state)
        # next state
        observation, reward, terminated, truncated, info = env.step([action])
        # next action
        next_action = agent.choose_action_eps_greedy(observation)
        # env.render()

        # update agent
        if update_agent:
            target = reward + agent.get_value(observation, next_action)
            active_tiles = agent.get_active_tiles(current_state, action)
            agent.update(active_tiles, target)

        # prep the next iteration
        ep_reward += reward

        # reset the training
        if terminated or truncated:
            observation, info = env.reset()
            total_reward += ep_reward
            if debug:
                print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, ep_reward, total_reward))
            break

# training complete
env.close()
print("Average steps per episode: {}".format(total_reward / max_episodes))

IHT full, starting to allow collisions
Episode: 0, Number of steps: -27118.694311852916, Total steps: -27118.694311852916
Episode: 1, Number of steps: -25386.26811099572, Total steps: -52504.962422848635
Episode: 2, Number of steps: -26312.014828898966, Total steps: -78816.9772517476
Episode: 3, Number of steps: -28982.317139462957, Total steps: -107799.29439121055
Episode: 4, Number of steps: -26314.025551447863, Total steps: -134113.31994265842
Episode: 5, Number of steps: -25215.73908386022, Total steps: -159329.05902651863
Episode: 6, Number of steps: -22758.613011949805, Total steps: -182087.67203846844
Episode: 7, Number of steps: -27451.678470326857, Total steps: -209539.3505087953
Episode: 8, Number of steps: -26628.99665439931, Total steps: -236168.34716319462
Episode: 9, Number of steps: -26459.10160516718, Total steps: -262627.4487683618
Episode: 10, Number of steps: -24320.39630933308, Total steps: -286947.8450776949
Episode: 11, Number of steps: -27828.246274152367, Total 

#### Saving training information

In [6]:
import pickle

In [8]:
# save the agent
path = "C:\\Users\\minhh\\Documents\\JHU\\Fall 2022\\Evolutionary and Swarm Intelligence\\src\\GAME\\pickle\\10242022 Initial Experiments with 2D MC and Sarsa\\"
agent_filename = 'agent_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)
agent_weights_filename = 'weights_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)
agent_hash_filename = 'hash_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)
agent_z_filename = 'z_alpha_{:.2f}_lamb_{:.2f}_gam_{:.2f}_eps_{:.2f}_method_{}_ntiles_{}_max_size_{}.pickle'.format(alpha, lamb, gamma, epsilon, method, num_of_tilings, max_size)

with open(path + agent_filename, 'wb') as f:
    pickle.dump(agent, f)
with open(path + agent_weights_filename, 'wb') as f:
    pickle.dump(agent.weights, f)
with open(path + agent_hash_filename, 'wb') as f:
    pickle.dump(agent.hash_table, f)
with open(path + agent_z_filename, 'wb') as f:
    pickle.dump(agent.z, f)

In [10]:
with open(path + agent_filename, 'rb') as f:
    agent2 = pickle.load(f)

In [11]:
env = gym.make('MountainCar2D-v0', render_mode = 'human')
env._max_episode_steps = 4000
observation, info = env.reset(seed=42)

# experiment parameters
max_episodes = 1
total_steps = 0 # eval metric
update_agent = False
debug = True

for ep in range(max_episodes):
    steps = 0
    while True:
        # current state
        current_state = observation # [x, x_dot]
        action = agent2.choose_action_eps_greedy(current_state)
        # next state
        observation, reward, terminated, truncated, info = env.step(action)
        # next action
        next_action = agent2.choose_action_eps_greedy(observation)
        # env.render()

        # update agent
        if update_agent:
            target = reward + agent2.get_value(observation, next_action)
            active_tiles = agent2.get_active_tiles(current_state, action)
            agent2.update(active_tiles, target)

        # prep the next iteration
        steps += 1

        # reset the training
        if terminated or truncated:
            observation, info = env.reset()
            total_steps += steps
            if debug:
                print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
            break

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))

Episode: 0, Number of steps: 105, Total steps: 105
Average steps per episode: 105.0


In [12]:
with open(path + agent_weights_filename, 'rb') as f:
    agent_weights = pickle.load(f)
with open(path + agent_hash_filename, 'rb') as f:
    agent_hash_tab = pickle.load(f)
with open(path + agent_z_filename, 'rb') as f:
    agent_z = pickle.load(f)

In [13]:
env = gym.make('MountainCar2D-v0', render_mode = 'human')
env._max_episode_steps = 4000
observation, info = env.reset(seed=42)

# agent
alpha = 1.2
lamb = 0.95
gamma = 1
method = 'replacing'
epsilon = 0
num_of_tilings = 8
max_size = 2048
agent3 = SarsaLambdaCMAC2DMountainCar(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)
agent3.weights = agent_weights
agent3.hash_table = agent_hash_tab
agent3.z = agent_z

# experiment parameters
max_episodes = 1
total_steps = 0 # eval metric
update_agent = False
debug = True

for ep in range(max_episodes):
    steps = 0
    while True:
        # current state
        current_state = observation # [x, x_dot]
        action = agent3.choose_action_eps_greedy(current_state)
        # next state
        observation, reward, terminated, truncated, info = env.step(action)
        # next action
        next_action = agent3.choose_action_eps_greedy(observation)
        # env.render()

        # update agent
        if update_agent:
            target = reward + agent3.get_value(observation, next_action)
            active_tiles = agent3.get_active_tiles(current_state, action)
            agent3.update(active_tiles, target)

        # prep the next iteration
        steps += 1

        # reset the training
        if terminated or truncated:
            observation, info = env.reset()
            total_steps += steps
            if debug:
                print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
            break

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))

Episode: 0, Number of steps: 105, Total steps: 105
Average steps per episode: 105.0


#### Collecting samples for 2D MC

In [1]:
import GAME.envs.mountain_car
import gym
from GAME.agents.sarsa_lambda import SarsaLambdaCMAC2DMountainCar
from GAME.utils.helper_funcs import *

In [2]:
env = gym.make('MountainCar2D-v0', render_mode = 'human')
env._max_episode_steps = 3000
observation, info = env.reset(seed=42)

# agent
alpha = 1.2
lamb = 0.95
gamma = 1
method = 'replacing'
epsilon = 0
num_of_tilings = 8
max_size = 2048
agent = SarsaLambdaCMAC2DMountainCar(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)

# experiment parameters
max_episodes = 50
total_steps = 0 # eval metric
update_agent = True
debug = True

# data collector
save_every = 10
agent_info = SarsaLambdaAgentInfo(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)
experiment_info = ExperimentInfo('MountainCar2D-v0', env._max_episode_steps, 42, max_episodes, 'SarsaLambda')
data_column_names = ['Episode', 'Step', 'Current-x_position', 'Current-x_velocity', 'Current-action', 'Reward', 'Next-x_position', 'Next-x_velocity', 'Next-action']
data_column_dtypes = ['int', 'int', 'float', 'float', 'int', 'int', 'float', 'float', 'int']
data_collector = RLSamplesCollector(experiment_info, agent_info, data_column_names, data_column_dtypes)
path = "C:\\Users\\minhh\\Documents\\JHU\\Fall 2022\\Evolutionary and Swarm Intelligence\\src\\GAME\\output\\10242022 Initial Samples Collection for 2D MC\\"
file_name = 'test.csv'
data_collector.write_metadata(path, 'test_metadata.txt')

In [3]:
for ep in range(max_episodes):
    steps = 0
    while True:
        try:
            # current state
            current_state = observation # [x, x_dot]
            action = agent.choose_action_eps_greedy(current_state)
            # next state
            observation, reward, terminated, truncated, info = env.step(action)
            # next action
            next_action = agent.choose_action_eps_greedy(observation)
            # env.render()

            # update agent
            if update_agent:
                target = reward + agent.get_value(observation, next_action)
                active_tiles = agent.get_active_tiles(current_state, action)
                agent.update(active_tiles, target)

            # save data
            data_dict = {
                col_name : col_data for col_name, col_data in zip(data_column_names, [ep, steps, current_state[0], current_state[1], action, reward, observation[0], observation[1], next_action])
            }
            data_collector.log_data(data_dict)

            # prep the next iteration
            steps += 1

            # reset the training
            if terminated or truncated:
                observation, info = env.reset()
                total_steps += steps
                if debug:
                    print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
                break
        except KeyboardInterrupt:
            env.close()
            data_collector.export_data(path, file_name)
    # save data every few iterations
    if save_every and (ep % save_every == 0):
        data_collector.export_data(path, file_name)

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))
data_collector.export_data(path, file_name)

Episode: 0, Number of steps: 1040, Total steps: 1040
Episode: 1, Number of steps: 1305, Total steps: 2345
Episode: 2, Number of steps: 1048, Total steps: 3393
Episode: 3, Number of steps: 784, Total steps: 4177
Episode: 4, Number of steps: 958, Total steps: 5135
Episode: 5, Number of steps: 286, Total steps: 5421
Episode: 6, Number of steps: 1447, Total steps: 6868
Episode: 7, Number of steps: 860, Total steps: 7728
Episode: 8, Number of steps: 545, Total steps: 8273
Episode: 9, Number of steps: 726, Total steps: 8999
Episode: 10, Number of steps: 392, Total steps: 9391
Episode: 11, Number of steps: 344, Total steps: 9735
Episode: 12, Number of steps: 172, Total steps: 9907
Episode: 13, Number of steps: 165, Total steps: 10072
Episode: 14, Number of steps: 114, Total steps: 10186
Episode: 15, Number of steps: 145, Total steps: 10331
Episode: 16, Number of steps: 146, Total steps: 10477
Episode: 17, Number of steps: 133, Total steps: 10610
Episode: 18, Number of steps: 177, Total steps:

#### Generating learning curves

In [1]:
import GAME.envs.mountain_car
import gym
from GAME.agents.sarsa_lambda import SarsaLambdaCMAC2DMountainCar
from GAME.utils.data_miners import *

In [2]:
env = gym.make('MountainCar2D-v0', render_mode = None)
env._max_episode_steps = 5000
observation, info = env.reset(seed=42)

In [3]:
alpha = 1.2
lamb = 0.95
gamma = 1
method = 'replacing'
epsilon = 0
num_of_tilings = 8
max_size = 2048
eps_decay = 1

# experiment parameters
trials = 25
max_episodes = 50
total_steps = 0 # eval metric
update_agent = True
debug = True
start_learning_after = 10

# data collector
save_every = 10
agent_info = SarsaLambdaAgentInfo(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)
experiment_info = ExperimentInfo('MountainCar2D-v0', env._max_episode_steps, 42, max_episodes, 'SarsaLambda')
data_column_names = ['Trial', 'Episode', 'Step', 'Current-x_position', 'Current-x_velocity', 'Current-action', 'Reward', 'Next-x_position', 'Next-x_velocity', 'Next-action']
data_column_dtypes = ['int', 'int', 'int', 'float', 'float', 'int', 'int', 'float', 'float', 'int']
data_collector = RLSamplesCollector(experiment_info, agent_info, data_column_names, data_column_dtypes)
path = "C:\\Users\\minhh\\Documents\\JHU\\Fall 2022\\Evolutionary and Swarm Intelligence\\src\\GAME\\output\\10312022 Gen Learn Curves and Collect Samples 2DMC\\"
file_name = 'transition_samples_2DMC_a{}_l{}_e{}_nt{}.csv'.format(alpha, lamb, epsilon, num_of_tilings)
data_collector.write_metadata(path, '{}_metadata.txt'.format(file_name))

# data collector for evaluation
eval_data_column_names = ['Trial', 'Episode', 'Reward']
eval_data_column_dtypes = ['int', 'int', 'int']
eval_data_collector = RLSamplesCollector(experiment_info, agent_info, eval_data_column_names, eval_data_column_dtypes)
eval_file_name = 'eval_2DMC_a{}_l{}_e{}_nt{}.csv'.format(alpha, lamb, epsilon, num_of_tilings)

In [4]:
for trial in range(trials):
    agent = SarsaLambdaCMAC2DMountainCar(alpha, lamb, gamma, method, epsilon, num_of_tilings, max_size)
    for ep in range(max_episodes):
        steps = 0
        while True:
            try:
                # current state
                current_state = observation # [x, x_dot]
                action = agent.choose_action_eps_greedy(current_state)
                # next state
                observation, reward, terminated, truncated, info = env.step(action)
                # next action
                next_action = agent.choose_action_eps_greedy(observation)
                # env.render()

                # update agent
                if ep >= start_learning_after and update_agent:
                    target = reward + agent.get_value(observation, next_action)
                    active_tiles = agent.get_active_tiles(current_state, action)
                    agent.update(active_tiles, target)

                # save data
                data_dict = {
                    col_name : col_data for col_name, col_data in zip(data_column_names, [trial, ep, steps, current_state[0], current_state[1], action, reward, observation[0], observation[1], next_action])
                }
                data_collector.log_data(data_dict)

                # prep the next iteration
                steps += 1

                # reset the training
                if terminated or truncated:
                    observation, info = env.reset()
                    total_steps += steps
                    if debug:
                        print("Episode: {}, Number of steps: {}, Total steps: {}".format(ep, steps, total_steps))
                    break
            except KeyboardInterrupt:
                env.close()
                data_collector.export_data(path, file_name)
        # save data every few iterations
        if save_every and (ep % save_every == 0):
            data_collector.export_data(path, file_name)

        # at the end of each episode, we must evaluate the agent without exploration
        prev_eps = agent.epsilon
        agent.epsilon = 0
        total_rewards = 0
        while True:
            current_state = observation # [x, x_dot]
            action = agent.choose_action_eps_greedy(current_state)
            # next state
            observation, reward, terminated, truncated, info = env.step(action)
            # count the number of steps incurred
            total_rewards += reward
            # reset the training
            if terminated or truncated:
                observation, info = env.reset()
                break
        # after we finish evaluation, save the evaluation data
        data_dict = {
            col_name : col_data for col_name, col_data in zip(eval_data_column_names, [trial, ep, total_rewards])
        }
        eval_data_collector.log_data(data_dict)
        # save data every few iterations
        if save_every and (ep % save_every == 0):
            eval_data_collector.export_data(path, eval_file_name)
        # at the end of each episode, decay the agent's epsilon rate
        agent.epsilon = prev_eps * eps_decay

# training complete
env.close()
print("Average steps per episode: {}".format(total_steps / max_episodes))
data_collector.export_data(path, file_name)
eval_data_collector.export_data(path, eval_file_name)

Episode: 0, Number of steps: 5000, Total steps: 5000
Episode: 1, Number of steps: 5000, Total steps: 10000
Episode: 2, Number of steps: 5000, Total steps: 15000
Episode: 3, Number of steps: 5000, Total steps: 20000
Episode: 4, Number of steps: 5000, Total steps: 25000
Episode: 5, Number of steps: 5000, Total steps: 30000
Episode: 6, Number of steps: 5000, Total steps: 35000
Episode: 7, Number of steps: 5000, Total steps: 40000
Episode: 8, Number of steps: 5000, Total steps: 45000
Episode: 9, Number of steps: 5000, Total steps: 50000
Episode: 10, Number of steps: 1567, Total steps: 51567
Episode: 11, Number of steps: 1304, Total steps: 52871
Episode: 12, Number of steps: 975, Total steps: 53846
Episode: 13, Number of steps: 967, Total steps: 54813
Episode: 14, Number of steps: 391, Total steps: 55204
Episode: 15, Number of steps: 614, Total steps: 55818
Episode: 16, Number of steps: 782, Total steps: 56600
Episode: 17, Number of steps: 343, Total steps: 56943
Episode: 18, Number of step