In [1]:
import torch
import cql.environment as env
import cql.nn.model as model
import cql.plotting.plotting as plot

max_epochs = 2000
max_episodes = 200

training_batch_size = 64
learning_rate = 0.001
discount_factor = 0.99
exploration_probability = 0.1
exploration_discount = 0.99

critic_training_step = 1
actor_training_step = 1

# Weight parameters for the reward function
# [0] -> X_Difference
# [1] -> Y_Difference
# [2] -> Area_Reward
# [3] -> Boundary_Constraint
weight_params = [-1.0, -1.0, -1.0, -5.0, 20.0]

starting_pos = (4.5, 4.5)
target_pos = (0.5, 0.5)
env_dim = (5, 5)
env_obs = [((2, 2), -999999),]

goal_radius = 0.2

device = torch.device("mps")
print(f"Currently using {device} device")

e = env.Environment(
    dim=env_dim, 
    device=device,
    target_pos=target_pos, 
    start_pos=starting_pos)
e.set_weights(env_obs)
e.display()

weights = torch.tensor(weight_params, dtype=torch.float).to(device)
paths = model.run_sim(
    environment=e, 
    weights=weights,
    device=device,
    learning_rate=learning_rate,
    discount_factor=discount_factor,
    max_epochs=max_epochs,
    max_episodes=max_episodes,
    training_batch_size=training_batch_size,
    critic_training_step=critic_training_step,
    actor_training_step=actor_training_step
)
plot.plot_heatmap(paths=paths, heatmap_dim=5, bins=25)

Currently using mps device
Setting weight with value: -999999, at coordinate: (2, 2)
tensor([[      0.,       0.,       0.,       0.,       0.],
        [      0.,       0.,       0.,       0.,       0.],
        [      0.,       0., -999999.,       0.,       0.],
        [      0.,       0.,       0.,       0.,       0.],
        [      0.,       0.,       0.,       0.,       0.]])
Current Epoch: 0
Starting Position: tensor([4.5000, 4.5000], device='mps:0')
<class 'torch.Tensor'>
tensor(True, device='mps:0')
tensor([4.5000, 4.5000], device='mps:0') -> tensor([4.8910, 3.7913], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.3910, -0.7087], device='mps:0', grad_fn=<TanhBackward0>)
tensor(False, device='mps:0')
tensor([4.8910, 3.7913], device='mps:0', grad_fn=<AddBackward0>) -> tensor([5.0793, 3.1171], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.1883, -0.6743], device='mps:0', grad_fn=<TanhBackward0>)
Entered invalid position, breaking epoch 1.
Current Epoch: 

  target_pos = torch.tensor(environment.target_pos, dtype=torch.float, device=environment.device)
  reward = torch.tensor(val if val >= 0 else -1, device=self.device)


tensor([4.5745, 3.8838], device='mps:0', grad_fn=<AddBackward0>) -> tensor([4.9595, 3.0302], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.3849, -0.8535], device='mps:0', grad_fn=<TanhBackward0>)
tensor(False, device='mps:0')
tensor([4.9595, 3.0302], device='mps:0', grad_fn=<AddBackward0>) -> tensor([5.0074, 2.4179], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.0479, -0.6124], device='mps:0', grad_fn=<TanhBackward0>)
Entered invalid position, breaking epoch 2.
Current Epoch: 2
Starting Position: tensor([4.5000, 4.5000], device='mps:0')
<class 'torch.Tensor'>
tensor(True, device='mps:0')
tensor([4.5000, 4.5000], device='mps:0') -> tensor([4.6176, 3.8344], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.1176, -0.6656], device='mps:0', grad_fn=<TanhBackward0>)
tensor(False, device='mps:0')
tensor([4.6176, 3.8344], device='mps:0', grad_fn=<AddBackward0>) -> tensor([5.0156, 3.2293], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.3980, -0

  loss = F.mse_loss(predicted_q, expected_q)


tensor(False, device='mps:0')
tensor([4.8877, 3.4124], device='mps:0', grad_fn=<AddBackward0>) -> tensor([5.5080, 2.5132], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.6203, -0.8992], device='mps:0', grad_fn=<TanhBackward0>)
Entered invalid position, breaking epoch 2.
Current Epoch: 25
Starting Position: tensor([4.5000, 4.5000], device='mps:0')
<class 'torch.Tensor'>
tensor(True, device='mps:0')
tensor([4.5000, 4.5000], device='mps:0') -> tensor([4.7150, 3.7918], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.2150, -0.7082], device='mps:0', grad_fn=<TanhBackward0>)
tensor(False, device='mps:0')
tensor([4.7150, 3.7918], device='mps:0', grad_fn=<AddBackward0>) -> tensor([5.0953, 2.9592], device='mps:0', grad_fn=<AddBackward0>), action: tensor([ 0.3802, -0.8327], device='mps:0', grad_fn=<TanhBackward0>)
Entered invalid position, breaking epoch 1.
Current Epoch: 26
Starting Position: tensor([4.5000, 4.5000], device='mps:0')
<class 'torch.Tensor'>
tensor(True, de

KeyboardInterrupt: 