In [4]:
import torch
import os
import numpy as np
import gymnasium as gym

print("current path: ", (os.getcwd()))
print("Done importing!")

current path:  /home/felix/stuff/uni/thesis/fix_her/experiments
Done importing!


In [7]:
## Creating Environments ##
import dill
from four_room.env import FourRoomsEnv
from four_room.wrappers import gym_wrapper

gym.register('MiniGrid-FourRooms-v1', FourRoomsEnv)

with open('./four_room/configs/fourrooms_train_config.pl', 'rb') as file:
    train_config = dill.load(file)

with open('./four_room/configs/fourrooms_test_0_config.pl', 'rb') as file:
    test_0_config = dill.load(file)

with open('./four_room/configs/fourrooms_test_100_config.pl', 'rb') as file:
    test_100_config = dill.load(file)

def make_env(config, seed: int= 0, rank: int = 0):
    env = gym.make('MiniGrid-FourRooms-v1', 
                    agent_pos=config['agent positions'], 
                    goal_pos=config['goal positions'], 
                    doors_pos=config['topologies'], 
                    agent_dir=config['agent directions'])
    
    return env.reset(seed=seed+rank)


train_env = make_env(train_config)

test_0_env = make_env(test_0_config)

test_100_env = make_env(test_100_config)

  logger.warn(f"Overriding environment {new_spec.id} already in registry.")


In [13]:
### Creating Agents ###

## Baseline 
from stable_baselines3.dqn import CnnPolicy
from stable_baselines3 import DQN
from stable_baselines3.common.torch_layers import BaseFeaturesExtractor
from torch import nn

lr_schedule = lambda x: 0.00009*x+0.00001

class Baseline_CNN(BaseFeaturesExtractor):
    """
    :param observation_space: (gym.Space)
    :param features_dim: (int) Number of features extracted.
        This corresponds to the number of unit for the last layer.
    """

    def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 512):
        super(Baseline_CNN, self).__init__(observation_space, features_dim)
        # We assume CxHxW images (channels first)
        # Re-ordering will be done by pre-preprocessing or wrapper
        n_input_channels = observation_space.shape[0]
        self.cnn = nn.Sequential(
            nn.Conv2d(n_input_channels, 32, kernel_size=3, stride=1, padding=1, padding_mode='circular'),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, padding_mode='circular'),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, stride=1, padding=1, padding_mode='circular'),
            nn.Flatten(),
        )

        # Compute shape by doing one forward pass
        with torch.no_grad():
            n_flatten = self.cnn(
                torch.as_tensor(observation_space.sample()[None]).float()
            ).shape[1]

        self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU())

    def forward(self, observations: torch.Tensor) -> torch.Tensor:
        return self.linear(self.cnn(observations))

print(train_env[0])

baseline_policy_kwargs = dict(activation_fn=torch.nn.ReLU, net_arch=[512,128,64], 
                              features_extractor_class=Baseline_CNN, features_extractor_kwargs={'features_dim': 512},
                              optimizer_class=torch.optim.Adam, optimizer_kwargs={'lr': lr_schedule},
                              normalize_images=False)

baseline_model = DQN('CnnPolicy', train_env, buffer_size=500000, batch_size=256, gamma=0.99, 
                     gradient_steps=1, train_freq=(10, 'step'), target_update_interval=(10,'step'), tau=0.01,
                     exploration_initial_eps=1.0, exploration_final_eps=0.01, learning_rate=lr_schedule, max_grad_norm=1.0,
                     verbose=1, tensorboard_log="./four_room/tensorboard/", policy_kwargs=baseline_policy_kwargs ,device='cuda')

[[[0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 1 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]]

 [[0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 1 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]]

 [[1 1 1 1 1 1 1 1 1]
  [1 1 1 1 1 1 1 1 1]
  [0 0 1 1 0 0 0 1 0]
  [0 0 1 1 0 0 0 0 0]
  [0 0 1 1 0 0 0 1 0]
  [0 1 1 1 1 0 1 1 1]
  [0 0 1 1 0 0 0 1 0]
  [0 0 1 1 0 0 0 1 0]
  [0 0 1 1 0 0 0 0 0]]

 [[0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 0 0 0]
  [0 0 0 0 0 0 1 0 0]
  [0 0 0 0 0 0 0 0 0]]]
Using cuda device


ValueError: The environment is of type <class 'tuple'>, not a Gymnasium environment. In this case, we expect OpenAI Gym to be installed and the environment to be an OpenAI Gym environment.

In [None]:
### Training Agents ###
baseline_model.learn(total_timesteps=500000, log_interval=10)