In [1]:
import numpy as np
import random

In [2]:
# Step 2: Define States and Actions
states = [0, 1, 2, 3, 4]  # 0=Empty, 1=Light, 2=Moderate, 3=Heavy, 4=Very Heavy
actions = ['GREEN', 'RED']  # Possible actions

print("Traffic States:", states)
print("Actions:", actions)

Traffic States: [0, 1, 2, 3, 4]
Actions: ['GREEN', 'RED']


In [3]:
# Step 3: Initialize Q-Table and Hyperparameters
Q = np.zeros((len(states), len(actions)))  

alpha = 0.1    
gamma = 0.9    
epsilon = 0.2  
episodes = 300 

print("Q-table shape:", Q.shape)

Q-table shape: (5, 2)


In [4]:
# Step 4: Design Reward Function
def get_reward(traffic, action):
    if traffic >= 3 and action == 'GREEN':
        return 10   # Good: clears congestion
    elif traffic >= 3 and action == 'RED':
        return -10  # Bad: causes jams
    elif traffic == 0 and action == 'RED':
        return 5    # Good: saves energy
    elif traffic == 0 and action == 'GREEN':
        return -5   # Wastes power
    elif traffic == 2:
        return 1    # Neutral
    else:
        return 0    # Light traffic or other cases
        
print("Reward Example (traffic=3, action='GREEN'):", get_reward(3, 'GREEN'))

Reward Example (traffic=3, action='GREEN'): 10


In [5]:
# Step 5: Define Environment Dynamics
def next_traffic(current):
    change = random.choice([-1, 0, 1])
    return int(np.clip(current + change, 0, 4))
    
print("Next Traffic Example:", next_traffic(2))

Next Traffic Example: 2


In [8]:
# Step 6: Train the Agent (Q-learning algorithm)
for ep in range(episodes):
    traffic = random.choice(states)  
    
    for _ in range(10):  
        if random.uniform(0, 1) < epsilon:
            action = random.choice(actions)  
        else:
            action = actions[np.argmax(Q[traffic])] 
        
        next_state = next_traffic(traffic)
        reward = get_reward(traffic, action)
        
        a = actions.index(action)
        best_next = np.max(Q[next_state])
        Q[traffic, a] += alpha * (reward + gamma * best_next - Q[traffic, a])
        traffic = next_state 

print("Training Completed!")

Training Completed!


In [9]:
# Step 7: Test the Learned Traffic Light Controller
try:
    traffic = int(input("\nEnter starting traffic level (0–4): "))
    if traffic < 0 or traffic > 4:
        raise ValueError("Traffic level out of range!")
except ValueError as e:
    print(e)
    traffic = 2
    print("Default traffic level set to Moderate (2).")

print("\n--- Smart Traffic Light Simulation (10 steps) ---")
for step in range(10):
    action = actions[np.argmax(Q[traffic])]
    print(f"Step {step+1}: Traffic Level={traffic}, Light={action}")
    traffic = next_traffic(traffic)

print("\nSimulation Complete!")


Enter starting traffic level (0–4):  3



--- Smart Traffic Light Simulation (10 steps) ---
Step 1: Traffic Level=3, Light=GREEN
Step 2: Traffic Level=2, Light=RED
Step 3: Traffic Level=3, Light=GREEN
Step 4: Traffic Level=2, Light=RED
Step 5: Traffic Level=3, Light=GREEN
Step 6: Traffic Level=2, Light=RED
Step 7: Traffic Level=2, Light=RED
Step 8: Traffic Level=1, Light=GREEN
Step 9: Traffic Level=0, Light=RED
Step 10: Traffic Level=0, Light=RED

Simulation Complete!


###### Step 8: Behavior Analysis (To discuss in your report)
Analysis Questions:
1. Yes, it learns to do that.
2. Yes, it saves energy.
3. More exploration, even if it slows, the learning is getting broader
4. Gets even faster convergence, but risk of local optimum.
5. Values for correct actions (GREEN on heavy, RED on empty) increase.