In [None]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random
import re
from collections import namedtuple, deque
import matplotlib.pyplot as plt

# Constants
BUFFER_SIZE = int(1e5)
BATCH_SIZE = 64
GAMMA = 0.99
TAU = 1e-3
LR = 5e-4
UPDATE_EVERY = 4

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
class QNetwork(nn.Module):
    def __init__(self, state_size, action_size, seed):
        super(QNetwork, self).__init__()
        self.seed = torch.manual_seed(seed)
        self.fc1 = nn.Linear(state_size, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, action_size)

    def forward(self, state):
        x = F.relu(self.fc1(state))
        x = F.relu(self.fc2(x))
        return self.fc3(x)

In [None]:
class Agent():
    def __init__(self, state_size, action_size, seed):
        self.state_size = state_size
        self.action_size = action_size
        self.seed = random.seed(seed)

        self.qnetwork_local = QNetwork(state_size, action_size, seed).to(device)
        self.qnetwork_target = QNetwork(state_size, action_size, seed).to(device)
        self.optimizer = optim.Adam(self.qnetwork_local.parameters(), lr=LR)

        self.memory = ReplayBuffer(action_size, BUFFER_SIZE, BATCH_SIZE, seed)
        self.t_step = 0

        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995

    def step(self, state, action, reward, next_state, done):
        self.memory.add(state, action, reward, next_state, done)

        self.t_step = (self.t_step + 1) % UPDATE_EVERY
        if self.t_step == 0:
            if len(self.memory) > BATCH_SIZE:
                experiences = self.memory.sample()
                self.learn(experiences, GAMMA)

    def act(self, state, eps=None):
        eps = eps if eps is not None else self.epsilon
        state = torch.from_numpy(state).float().unsqueeze(0).to(device)
        self.qnetwork_local.eval()
        with torch.no_grad():
            action_values = self.qnetwork_local(state)
        self.qnetwork_local.train()

        if random.random() > eps:
            return np.argmax(action_values.cpu().data.numpy())
        else:
            return random.choice(np.arange(self.action_size))

    def learn(self, experiences, gamma):
        states, actions, rewards, next_states, dones = experiences

        Q_targets_next = self.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
        Q_targets = rewards + (gamma * Q_targets_next * (1 - dones))

        Q_expected = self.qnetwork_local(states).gather(1, actions)

        loss = F.mse_loss(Q_expected, Q_targets)
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.soft_update(self.qnetwork_local, self.qnetwork_target, TAU)

    def soft_update(self, local_model, target_model, tau):
        for target_param, local_param in zip(target_model.parameters(), local_model.parameters()):
            target_param.data.copy_(tau * local_param.data + (1.0 - tau) * target_param.data)

In [None]:
class ReplayBuffer:
    def __init__(self, action_size, buffer_size, batch_size, seed):
        self.action_size = action_size
        self.memory = deque(maxlen=buffer_size)
        self.batch_size = batch_size
        self.experience = namedtuple("Experience", field_names=["state", "action", "reward", "next_state", "done"])
        self.seed = random.seed(seed)

    def add(self, state, action, reward, next_state, done):
        e = self.experience(state, action, reward, next_state, done)
        self.memory.append(e)

    def sample(self):
        experiences = random.sample(self.memory, k=self.batch_size)

        states = torch.from_numpy(np.vstack([e.state for e in experiences if e is not None])).float().to(device)
        actions = torch.from_numpy(np.vstack([e.action for e in experiences if e is not None])).long().to(device)
        rewards = torch.from_numpy(np.vstack([e.reward for e in experiences if e is not None])).float().to(device)
        next_states = torch.from_numpy(np.vstack([e.next_state for e in experiences if e is not None])).float().to(device)
        dones = torch.from_numpy(np.vstack([e.done for e in experiences if e is not None]).astype(np.uint8)).float().to(device)

        return (states, actions, rewards, next_states, dones)

    def __len__(self):
        return len(self.memory)

In [None]:
class TrafficEnv:
    def __init__(self, data, max_steps=1000):
        self.data = data
        self.max_steps = max_steps
        self.current_step = 0
        self.num_samples = len(self.data)
        self.traffic_light_state = 0

    def reset(self):
        self.current_step = 0
        self.state = self._get_state(self.current_step)
        return self.state

    def step(self, action):
        self._update_traffic_light(action)
        reward = self._calculate_reward()
        self.current_step += 1
        done = self.current_step >= self.max_steps or self.current_step >= self.num_samples

        if not done:
            next_state = self._get_state(self.current_step)
            return next_state, reward, done
        else:
            return self.state, reward, done

    def _get_state(self, step):
        if step < self.num_samples:
            row = self.data.iloc[step]
            vehicle_volume = self._parse_vehicle_volume(row)
            return np.array(vehicle_volume + [self.traffic_light_state])
        else:
            # Return the last known state if we're beyond the data
            return self.state

    def _parse_vehicle_volume(self, row):
        volume_data = row['Vehicle Volume By Each Direction of Traffic']
        volumes = {'East Bound': 0, 'West Bound': 0, 'North Bound': 0, 'South Bound': 0}

        matches = re.findall(r'(\w+\s*\w*)\s*:\s*(\d+)', volume_data)

        for direction, volume in matches:
            direction = direction.strip()
            if 'East' in direction:
                volumes['East Bound'] = int(volume)
            elif 'West' in direction:
                volumes['West Bound'] = int(volume)
            elif 'North' in direction:
                volumes['North Bound'] = int(volume)
            elif 'South' in direction:
                volumes['South Bound'] = int(volume)

        return list(volumes.values())

    def _update_traffic_light(self, action):
        self.traffic_light_state = action

    def _calculate_reward(self):
        if self.current_step < self.num_samples:
            total_volume = self.data.iloc[self.current_step]['Total Passing Vehicle Volume']
            if self.traffic_light_state == 1:  # Green light
                return total_volume / 1000  # Reward based on throughput
            else:  # Red light
                return -total_volume / 1000  # Penalty for stopping traffic
        else:
            return 0  # No reward if we're beyond the data

In [None]:
def train(env, agent, episodes, max_steps):
    scores = []
    losses = []
    epsilons = []

    for e in range(episodes):
        state = env.reset()
        state = np.reshape(state, [1, state_size])
        score = 0
        episode_losses = []

        for time in range(max_steps):
            action = agent.act(state)
            next_state, reward, done = env.step(action)
            next_state = np.reshape(next_state, [1, state_size])
            score += reward

            agent.step(state, action, reward, next_state, done)

            # Calculate loss
            if len(agent.memory) > BATCH_SIZE:
                experiences = agent.memory.sample()
                states, actions, rewards, next_states, dones = experiences
                Q_targets_next = agent.qnetwork_target(next_states).detach().max(1)[0].unsqueeze(1)
                Q_targets = rewards + (GAMMA * Q_targets_next * (1 - dones))
                Q_expected = agent.qnetwork_local(states).gather(1, actions)
                loss = F.mse_loss(Q_expected, Q_targets)
                episode_losses.append(loss.item())

            state = next_state

            if done:
                break

        agent.epsilon = max(agent.epsilon * agent.epsilon_decay, agent.epsilon_min)
        scores.append(score)
        losses.append(np.mean(episode_losses) if episode_losses else 0)
        epsilons.append(agent.epsilon)

        if e % 10 == 0:
            torch.save(agent.qnetwork_local.state_dict(), f'checkpoint_{e}.pth')

        if e % 5 == 0:
            print(f"Episode {e+1}/{episodes}, Score: {score:.2f}, Epsilon: {agent.epsilon:.2f}")

    return scores, losses, epsilons

In [None]:
def plot_results(train_scores, epsilons):
    # Create a figure with 1 row and 2 columns for side-by-side plots
    fig, axs = plt.subplots(1, 2, figsize=(15, 5))

    # Plot for training scores
    axs[0].plot(train_scores, color='blue')
    axs[0].set_title('Training Scores')
    axs[0].set_xlabel('Episode')
    axs[0].set_ylabel('Score')
    axs[0].grid()

    # Plot for epsilon decay
    axs[1].plot(epsilons, color='orange')
    axs[1].set_title('Epsilon Decay')
    axs[1].set_xlabel('Episode')
    axs[1].set_ylabel('Epsilon')
    axs[1].grid()

    # Adjust layout and save the figure
    plt.tight_layout()  # Automatically adjust subplot parameters for a better fit
    plt.savefig('Side_by_Side_Plots.png')  # Save the plot to a file
    plt.close()  # Close the plot

In [None]:
if __name__ == "__main__":
    # Load and split the data
    train_data = pd.read_csv("/content/drive/MyDrive/AAAData Google/average-daily-traffic-counts.csv")

    state_size = 5  # East, West, North, South bound volumes, and Light State
    action_size = 2  # 0 = Red, 1 = Green

    train_env = TrafficEnv(train_data)

    agent = Agent(state_size, action_size, seed=0)

    train_episodes = 100
    test_episodes = 20
    max_steps = 1000

    # Training
    train_scores, losses, epsilons = train(train_env, agent, train_episodes, max_steps)

    # Example usage (make sure train_scores and epsilons are defined before this)
    plot_results(train_scores, epsilons)

    print("Training completed. Results plotted and saved as 'training_results.png'.")

Episode 1/100, Score: -677.00, Epsilon: 0.99
Episode 6/100, Score: -238.60, Epsilon: 0.97
Episode 11/100, Score: 217.60, Epsilon: 0.95
Episode 16/100, Score: 1533.00, Epsilon: 0.92
Episode 21/100, Score: 1068.40, Epsilon: 0.90
Episode 26/100, Score: 2897.00, Epsilon: 0.88
Episode 31/100, Score: 1576.80, Epsilon: 0.86
Episode 36/100, Score: 4122.00, Epsilon: 0.83
Episode 41/100, Score: 2271.60, Epsilon: 0.81
Episode 46/100, Score: 3596.40, Epsilon: 0.79
Episode 51/100, Score: 4837.20, Epsilon: 0.77
Episode 56/100, Score: 4614.80, Epsilon: 0.76
Episode 61/100, Score: 4342.00, Epsilon: 0.74
Episode 66/100, Score: 4436.00, Epsilon: 0.72
Episode 71/100, Score: 5045.00, Epsilon: 0.70
Episode 76/100, Score: 5789.60, Epsilon: 0.68
Episode 81/100, Score: 5706.60, Epsilon: 0.67
Episode 86/100, Score: 6237.20, Epsilon: 0.65
Episode 91/100, Score: 6562.60, Epsilon: 0.63
Episode 96/100, Score: 7158.60, Epsilon: 0.62
Training completed. Results plotted and saved as 'training_results.png'.
