In [6]:
import traci
import numpy as np
import tensorflow as tf
import pickle
import time

# --- Constants ---
STATE_DIM = 4  # Adjusted to match the actual state size (4 features)
ACTION_DIM = 4  # Number of possible actions
GAMMA = 0.9  # Discount factor
ALPHA = 0.001  # Learning rate
EPSILON = 0.1  # Exploration factor
MEMORY_CAPACITY = 10000  # Replay memory size
BATCH_SIZE = 32  # Batch size for training
NUM_EPISODES = 2  # Number of training episodes

# --- Q-Network ---
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(STATE_DIM,)),  # Match input size
    tf.keras.layers.Dense(ACTION_DIM, activation='linear')  # Match action size
])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=ALPHA), loss='mse')

# --- Replay Memory ---
memory = []

# --- Helper Functions ---
def get_state():
    """Fetch the current state from the simulator (e.g., SUMO)."""
    state = [
        traci.edge.getLastStepHaltingNumber("E1"),
        traci.edge.getLastStepHaltingNumber("E4"),
        traci.edge.getLastStepMeanSpeed("E1"),
        traci.edge.getLastStepMeanSpeed("E4"),
    ]
    return np.array(state)

def choose_action(state):
    """Epsilon-greedy policy to select an action."""
    if np.random.rand() < EPSILON:
        return np.random.randint(ACTION_DIM)
    else:
        state = state.reshape(1, -1)  # Ensure the state has correct dimensions
        q_values = model.predict(state, verbose=0)[0]
        return np.argmax(q_values)

def update_q_network():
    """Update the Q-network using replay memory."""
    if len(memory) < BATCH_SIZE:
        return
    
    # Randomly sample a minibatch from memory
    indices = np.random.choice(len(memory), BATCH_SIZE, replace=False)
    minibatch = [memory[i] for i in indices]
    
    states = np.array([transition[0] for transition in minibatch])
    actions = np.array([transition[1] for transition in minibatch])
    rewards = np.array([transition[2] for transition in minibatch])
    next_states = np.array([transition[3] for transition in minibatch])
    terminals = np.array([transition[4] for transition in minibatch])
    
    q_values = model.predict(states, verbose=0)
    next_q_values = model.predict(next_states, verbose=0)
    
    for i in range(BATCH_SIZE):
        if terminals[i]:
            q_values[i, actions[i]] = rewards[i]
        else:
            q_values[i, actions[i]] = rewards[i] + GAMMA * np.max(next_q_values[i])
    
    model.fit(states, q_values, epochs=1, verbose=0)

def calculate_reward(state, action, next_state):
    """Define the reward function."""
    reward = -(state[0] + state[1]) + state[2] + state[3]  # Example: Minimize halting, maximize speed
    return reward

def check_if_done():
    """Define termination condition."""
    return traci.simulation.getTime() > 1000  # Example: End after 1000 seconds

# --- Training Loop ---
traci.start(["sumo-gui", "-c", r"C:\Users\lenovo\Desktop\Projet RL\sumo_config\simulation.sumocfg"])  # Update with your SUMO config file
episode_rewards = []

for episode in range(NUM_EPISODES):
    state = get_state()
    total_reward = 0
    
    while traci.simulation.getMinExpectedNumber() > 0:
        action = choose_action(state)
        traci.trafficlight.setPhase("J7", action)  # Apply action
        traci.simulationStep()  # Step simulation
        
        next_state = get_state()
        reward = calculate_reward(state, action, next_state)
        done = check_if_done()
        
        # Store transition in replay memory
        memory.append((state, action, reward, next_state, done))
        if len(memory) > MEMORY_CAPACITY:
            memory.pop(0)
        
        update_q_network()
        state = next_state
        total_reward += reward
        
        if done:
            break
    
    episode_rewards.append(total_reward)
    print(f"Episode {episode + 1}/{NUM_EPISODES}, Total Reward: {total_reward}")

# Save the trained model
model.save("q_network_model.keras")

# Close SUMO connection
traci.close()


TraCIException: The phase index 2 is not in the allowed range [0,1].

In [5]:
traci.close()

In [None]:
import traci
import numpy as np
import tensorflow as tf
import pickle
import time

# --- Constants ---
STATE_DIM = 2  # Adjusted to match the actual state size (4 features)
ACTION_DIM = 4  # Number of possible actions
GAMMA = 0.9  # Discount factor
ALPHA = 0.001  # Learning rate
EPSILON = 0.1  # Exploration factor
MEMORY_CAPACITY = 10000  # Replay memory size
BATCH_SIZE = 32  # Batch size for training
NUM_EPISODES = 2  # Number of training episodes

# --- Q-Network ---
model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=(STATE_DIM,)),  # Match input size
    tf.keras.layers.Dense(ACTION_DIM, activation='linear')  # Match action size
])
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=ALPHA), loss='mse')

# --- Replay Memory ---
memory = []

# --- Helper Functions ---
def get_state():
    """Fetch the current state from the simulator (e.g., SUMO)."""
    state = [
        traci.edge.getLastStepHaltingNumber("E1"),
        traci.edge.getLastStepHaltingNumber("E4"),
        traci.edge.getLastStepMeanSpeed("E1"),
        traci.edge.getLastStepMeanSpeed("E4"),
    ]
    return np.array(state)

def choose_action(state):
    """Epsilon-greedy policy to select an action."""
    if np.random.rand() < EPSILON:
        return np.random.randint(ACTION_DIM)
    else:
        state = state.reshape(1, -1)  # Ensure the state has correct dimensions
        q_values = model.predict(state, verbose=0)[0]
        return np.argmax(q_values)

def update_q_network():
    """Update the Q-network using replay memory."""
    if len(memory) < BATCH_SIZE:
        return
    
    # Randomly sample a minibatch from memory
    indices = np.random.choice(len(memory), BATCH_SIZE, replace=False)
    minibatch = [memory[i] for i in indices]
    
    states = np.array([transition[0] for transition in minibatch])
    actions = np.array([transition[1] for transition in minibatch])
    rewards = np.array([transition[2] for transition in minibatch])
    next_states = np.array([transition[3] for transition in minibatch])
    terminals = np.array([transition[4] for transition in minibatch])
    
    q_values = model.predict(states, verbose=0)
    next_q_values = model.predict(next_states, verbose=0)
    
    for i in range(BATCH_SIZE):
        if terminals[i]:
            q_values[i, actions[i]] = rewards[i]
        else:
            q_values[i, actions[i]] = rewards[i] + GAMMA * np.max(next_q_values[i])
    
    model.fit(states, q_values, epochs=1, verbose=0)

def calculate_reward(state, action, next_state):
    """Define the reward function."""
    reward = -(state[0] + state[1]) + state[2] + state[3]  # Example: Minimize halting, maximize speed
    return reward

def check_if_done():
    """Define termination condition."""
    return traci.simulation.getTime() > 1000  # Example: End after 1000 seconds

# --- Training Loop ---
traci.start(["sumo-gui", "-c", r"C:\Users\lenovo\Desktop\Projet RL\sumo_config\simulation.sumocfg"])  # Update with your SUMO config file
episode_rewards = []

for episode in range(NUM_EPISODES):
    state = get_state()
    total_reward = 0
    
    while traci.simulation.getMinExpectedNumber() > 0:
        action = choose_action(state)
        traci.trafficlight.setPhase("J7", action)  # Apply action
        traci.simulationStep()  # Step simulation
        
        next_state = get_state()
        reward = calculate_reward(state, action, next_state)
        done = check_if_done()
        
        # Store transition in replay memory
        memory.append((state, action, reward, next_state, done))
        if len(memory) > MEMORY_CAPACITY:
            memory.pop(0)
        
        update_q_network()
        state = next_state
        total_reward += reward
        
        if done:
            break
    
    episode_rewards.append(total_reward)
    print(f"Episode {episode + 1}/{NUM_EPISODES}, Total Reward: {total_reward}")

# Save the trained model
model.save("q_network_model.keras")

# Close SUMO connection
traci.close()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


TraCIException: Traffic light 'n2' is not known

In [9]:
traci.close()