# Using invariant representations to guide future exploration

I would like to further explore an idea of leveraging abstract concepts learned from previous experiences to help aid and guide the behavior of an agent when faced with a novel problem. The aim is to sequentially construct and expand agent's knowledge base, and use it to construct a behavioral policy that would guide the exploration while learning the optimal policy.

In [None]:
import os
import sys
import wandb

from gym.wrappers.pixel_observation import PixelObservationWrapper
from tianshou.utils import WandbLogger
from torch.utils.tensorboard import SummaryWriter

In [None]:
if os.path.abspath(os.path.join('.')) not in sys.path:
    sys.path.append(os.path.abspath(os.path.join('.')))

In [None]:
from models import Task
from models.trainer import DQNTrainer
from models.wrappers import PreprocessObservation, StackObservation

In [None]:
WANDB_PROJECT = "lldqn"
WANDB_LOG_DIR = "./data"
WANDB_TENSORBOARD = "./data/tensorboard"
TASKS = [
    dict(
        env_name="CartPole-v1",
        save_data_dir="./data/models",
        use_baseline=True,
        wrappers=[
            (PixelObservationWrapper, {"pixels_only": False}),
            (PreprocessObservation, {}),
            (StackObservation, {}),
        ],
    ),
]

In [None]:
for task_data in TASKS:
    for repeat in range(1):
        task = Task(**task_data, version=repeat + 1)

        wandb.init(
            project=WANDB_PROJECT,
            dir=WANDB_LOG_DIR,
            group=task.name,
            job_type="Policy-Train",
            name=task.save_model_name,
            sync_tensorboard=True,
            reinit=True,
            monitor_gym=True,
            config={
                "train/repeat_count": 1,
            }
        )

        logger = WandbLogger()
        logger.load(SummaryWriter(WANDB_TENSORBOARD))
        trainer = DQNTrainer(task, logger=logger)
        result = trainer.run()

        print("Finished repeat {}. Time taken: {:.4}s".format(repeat + 1, result["duration"]))
