In [1]:
import numpy as np
import gym_envs.envs.traffic_light_support_functions as tlsf
import traci

cycleTime = 60
minGreentime = 5
numberOfPhases = 6
intergreenMatrix = np.zeros((numberOfPhases, numberOfPhases))
intergreenMatrix = [
    [0, 0, 0, 7, 6, 6],
    [0, 0, 0, 5, 0, 0],
    [8, 0, 0, 0, 5, 0],
    [6, 6, 0, 0, 8, 0],
    [8, 0, 6, 5, 0, 0],
    [6, 0, 0, 0, 0, 0],
]

phasePlan = np.full((numberOfPhases,cycleTime), 0)

initialPhaseplan = np.full((numberOfPhases, cycleTime),'r')

starting_phases = [1, 1, 2, 2, 1, 2]
phase_lengths = [
    [19, 2, 14, 3, 22],
    [41, 2, 13, 4],
    [2, 34, 3, 21],
    [2, 13, 3, 42],
    [40, 2, 14, 3, 1],
    [2, 13, 3, 42]
]

initialPhaseplan = tlsf.generate_phase_plan(starting_phases, phase_lengths)
initialPhaseplan = tlsf.change_phase_plan((5,5),60)

In [2]:
import gymnasium as gym
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

#env = gym.make('TrafficEnv-V0', render_mode ='console', starting_phases = starting_phases, phase_lengths = phase_lengths, simulation_time= 3600*5,phase_change_step=5)

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if GPU is to be used
device = torch.device(
    "cuda" if torch.cuda.is_available() else
    #"mps" if torch.backends.mps.is_available() else
    "cpu"
)

In [3]:
import torch
import time
from torchrl.envs import GymEnv, StepCounter, TransformedEnv, GymWrapper

torch.manual_seed(0)

gym_env = GymWrapper(gym.make('TrafficEnv-V0', render_mode ='console', starting_phases = starting_phases, phase_lengths = phase_lengths, simulation_time= 3600*5,phase_change_step=5),categorical_action_encoding=True)
env = TransformedEnv(gym_env, device=device)
env.set_seed(0)





 Retrying in 1 seconds




795726461

In [4]:
from torchrl.modules import EGreedyModule, MLP, QValueModule
from tensordict.nn import TensorDictModule as Mod, TensorDictSequential as Seq

value_mlp = MLP(out_features=env.action_spec.n, num_cells=[128, 128])
value_net = Mod(value_mlp, in_keys=["occupancy",'vehicle_count'], out_keys=["action_value"])
policy = Seq(value_net, QValueModule(spec=env.action_spec))
exploration_module = EGreedyModule(
    env.action_spec, annealing_num_steps=10_000, eps_init=0.9
)
policy_explore = Seq(policy, exploration_module)

In [5]:
from torchrl.collectors import SyncDataCollector
from torchrl.data import LazyTensorStorage, ReplayBuffer

init_rand_steps = 0
frames_per_batch = 100
optim_steps = 10
collector = SyncDataCollector(
    env,
    policy,
    frames_per_batch=frames_per_batch,
    total_frames=-1,
)
rb = ReplayBuffer(storage=LazyTensorStorage(100_000))

from torch.optim import Adam

In [6]:
from torchrl.objectives import DQNLoss, SoftUpdate

loss = DQNLoss(value_network=policy, action_space=env.action_spec, delay_value=True)
optim = Adam(loss.parameters(), lr=0.02)
updater = SoftUpdate(loss, eps=0.99)

In [7]:
from torchrl._utils import logger as torchrl_logger
from torchrl.record import WandbLogger, VideoRecorder

path = "./training_loop"
#logger = CSVLogger(exp_name="dqn", log_dir=path)
logger = WandbLogger(exp_name="dqn")

[34m[1mwandb[0m: Currently logged in as: [33mwagnertamas[0m ([33mwagnertamas-budapesti-m-szaki-s-gazdas-gtudom-nyi-egyetem[0m). Use [1m`wandb login --relogin`[0m to force relogin
  from IPython.core.display import HTML, display  # type: ignore


In [8]:
total_count = 0
total_episodes = 0
t0 = time.time()
for i, data in enumerate(collector):
    # Write data in replay buffer
    rb.extend(data)
    max_length = rb[:]["next", "step_count"].max()
    if len(rb) > init_rand_steps:
        # Optim loop (we do several optim steps
        # per batch collected for efficiency)
        for _ in range(optim_steps):
            sample = rb.sample(128)
            loss_vals = loss(sample)
            loss_vals["loss"].backward()
            optim.step()
            optim.zero_grad()
            # Update exploration factor
            exploration_module.step(data.numel())
            # Update target params
            updater.step()
            if i % 10:
                torchrl_logger.info(f"Max num steps: {max_length}, rb length {len(rb)}")
                #Log the results
            logger.log_scalar("loss", loss_vals["loss"].item(), total_count)
            logger.log_scalar("epsilon", exploration_module.eps, total_count)
            logger.log_scalar("max_steps", max_length, total_count)
            #logger.log_scalar("reward", data["reward"].mean().item(), total_count)
            logger.log_scalar("steps", data["next", "done"].sum().item(), total_count)
            logger.log_scalar("total_episodes", total_episodes, total_count)
                
            total_count += data.numel()
            total_episodes += data["next", "done"].sum()
    if max_length > 200:
        
        break

t1 = time.time()

torchrl_logger.info(
    f"solved after {total_count} steps, {total_episodes} episodes and in {t1-t0}s."
)


TypeError: 'int' object is not subscriptable