<a href="https://colab.research.google.com/github/aa14k/adaptive_time/blob/main/code/adaptive_time/notebooks/tradeoff.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
%pip install -e code


In [None]:
# %pip install matplotlib

In [None]:
from importlib import reload

from pprint import pprint

import argparse
import json
import numpy as np

from adaptive_time.environment import MountainCar
from adaptive_time.monte_carlo import mc_policy_iteration
from adaptive_time.samplers import UniformSampler
from adaptive_time.sarsa import sarsa
from adaptive_time.q_functions import MountainCarTileCodingQ
from adaptive_time.utils import parse_dict



In [None]:
import matplotlib.pyplot as plt
import copy

In [None]:
config_file = "code/adaptive_time/configs/sarsa_tilecoding.json"
with open(config_file, "r") as f:
    config_dict = json.load(f)

pprint(config_dict)

In [None]:
def setup(config):
  if config.env == "mountain_car":
    q_function = MountainCarTileCodingQ(config.agent_config)
    env = MountainCar(**vars(config.env_kwargs))
  else:
    raise ValueError(f"Unsupported env: {config.env}")

  if config.sampler_config.sampler == "uniform":
      observation_sampler = UniformSampler(
          env.horizon - 1,
          config.sampler_config.sampler_kwargs.spacing,
      )
  else:
      raise NotImplementedError

  return q_function, env, observation_sampler



In [None]:
# config_dict['budget'] = 1000
# config_dict['log_frequency'] = 1000
config_dict['budget'] = 200001
config_dict['log_frequency'] = 50000

sampler_spacing_tries = [100]
# sampler_spacing_tries = [1, 10, 100]
dt_sec = 0.01

results = []
for spacing in sampler_spacing_tries:
  cur_copy = copy.deepcopy(config_dict)
  cur_copy['sampler_config']['sampler_kwargs']['spacing'] = spacing
  cur_copy['env_kwargs']['dt_sec'] = dt_sec
  cur_copy['use_action_repeat'] = True
  config = parse_dict(cur_copy)
  q_function, env, observation_sampler = setup(config)

  np.random.seed(config.seed)
  print()
  print(f"  ::   Starting run with spacing={spacing}   ::  ")

  cum_samples, ep_returns = sarsa(
      env=env,
      q_function=q_function,
      observation_sampler=observation_sampler,
      config=config,
  )

  results.append((cum_samples, ep_returns, f"spacing={spacing}"))


In [None]:
def plot_performance(tuples_of_x_and_y_and_labels, title=None):

    ax = plt.gca()
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    for x, y, label in tuples_of_x_and_y_and_labels:
        plt.plot(x, y, label=label)

    # if ylim:
    #     plt.ylim(-0.05, 0.5)
    # ax.grid(visible=True, axis='y', alpha=0.5, linestyle='--')
    # plt.xlim(-1000, 80_000)

    plt.ylabel('Episode\nReturn', rotation=0, labelpad=40)
    plt.xlabel('Number of Samples')
    plt.legend()

    if title is not None:
      plt.title(title)

    plt.show()

In [None]:
plot_performance(
    results,
    title=f"Budget: {config_dict['budget']}; uniform spacing; dt={dt_sec}")


In [None]:
print(np.mean(results[0][1]))
print(np.mean(results[1][1]))
print(np.mean(results[2][1]))




## Path Figuring Out

In [None]:
import os
import sys

# Print the working directory
print("Working Directory:", os.getcwd())

# Print the paths for imports
print("Import Paths:")
for path in sys.path:
    print(path)
