<a href="https://colab.research.google.com/github/adpineres-ef/Pytorch-DRL-Model-Build/blob/main/Pytorch_GNN_DRL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
!pip install torch-geometric

Collecting torch-geometric
  Downloading torch_geometric-2.5.3-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: torch-geometric
Successfully installed torch-geometric-2.5.3


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch_geometric.data import Data, DataLoader
from torch.distributions import Categorical
import numpy as np
import pandas as pd

In [5]:
from google.colab import files
# Specify the file name
file_name1 = "test_profit_matrix.xlsx"
file_name2 = "test_time_matrix.xlsx"
file_name3 = "test_markov_matrix.xlsx"
# Read the Excel file into a DataFrame
reward_matrix = pd.read_excel(file_name1,header= None)
time_matrix = pd.read_excel(file_name2,header = None)
markov_matrix = pd.read_excel(file_name3,header=None)
initial_node = 0
time_limit = 35

In [8]:
edges = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5), (0, 6),
         (1, 2), (1, 3), (1, 4), (1, 5), (1, 6),
         (2, 3), (2, 4), (2, 5), (2, 6),
         (3, 4), (3, 5), (3, 6),
         (4, 5), (4, 6),
         (5, 6)]

# Convert edge list to PyTorch LongTensor
edges = torch.tensor(edges, dtype=torch.long).t().contiguous()

# Construct the edge index (COO format)
edge_index = torch.cat([edges, edges.flip(0)], dim=1)

# Convert edge weights from the Markov matrix to PyTorch tensor
edge_weight = torch.tensor(markov_matrix.values, dtype=torch.float32)

# Create the graph data object with edge weights
graph_data = Data(x=None, edge_index=edge_index, edge_weight=edge_weight)

In [6]:
class GraphEnvironment:
    def __init__(self, reward_matrix, time_matrix, markov_matrix, initial_node, time_limit):
        self.reward_matrix = reward_matrix
        self.time_matrix = time_matrix
        self.markov_matrix = markov_matrix
        self.initial_node = initial_node
        self.time_limit = time_limit

        self.num_nodes = reward_matrix.shape[0]
        self.edge_index = self.generate_edge_index()

    def generate_edge_index(self):
        edges = []
        for i in range(self.num_nodes):
            for j in range(self.num_nodes):
                if i != j:
                    edges.append((i, j))
        edge_index = torch.tensor(edges, dtype=torch.long).t().contiguous()
        return edge_index

    def reset(self):
        self.current_node = self.initial_node
        self.remaining_time = self.time_limit
        self.visited_nodes = [self.initial_node]
        self.visited_edges = []
        self.total_reward = 0
        return self.get_state()

    def get_state(self):
        return torch.tensor([self.current_node, self.remaining_time], dtype=torch.float32)

    def step(self, action):
        next_node = action.item()
        edge = (self.current_node, next_node)
        if edge not in self.visited_edges:
            self.visited_edges.append(edge)
            reward = self.reward_matrix[self.current_node, next_node]
            time_consumption = self.time_matrix[self.current_node, next_node]
            self.total_reward += reward
            self.remaining_time -= time_consumption
            self.current_node = next_node
            self.visited_nodes.append(next_node)
            done = (self.remaining_time <= 0) or (len(self.visited_nodes) == self.num_nodes and self.current_node == self.initial_node)
            if done:
                return self.get_state(), reward, done, {}
            else:
                return self.get_state(), reward, done, {}
        else:
            return self.get_state(), 0, False, {}


In [7]:
from torch_geometric.nn import GATConv
class QNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_edges):
        super(QNetwork, self).__init__()
        self.conv1 = GATConv(input_dim, hidden_dim, heads=1, dropout=0.6)
        self.conv2 = GATConv(hidden_dim, hidden_dim, heads=1, dropout=0.6)
        self.fc1 = nn.Linear(hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_edges)

    def forward(self, data):
        x, edge_index, edge_weight = data.x, data.edge_index, data.edge_weight
        x = F.relu(self.conv1(x, edge_index, edge_weight=edge_weight))
        x = F.relu(self.conv2(x, edge_index, edge_weight=edge_weight))
        x = x.mean(dim=0)  # Global pooling to obtain a single vector representation
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [11]:
import torch.optim as optim
import random
# Define hyperparameters
learning_rate = 0.001
num_epochs = 10
batch_size = 64
gamma = 0.99
num_edges = len(edges)
epsilon = 0.1  # Epsilon value for epsilon-greedy policy
# Initialize QNetwork
model = QNetwork(input_dim=2, hidden_dim=128, num_edges=num_edges)
# Define optimizer
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Define loss function (mean squared error)
loss_fn = nn.MSELoss()

In [12]:
env = GraphEnvironment(reward_matrix, time_matrix, markov_matrix, initial_node, time_limit)
# Start training loop
for epoch in range(num_epochs):
    # Reset environment
    state = env.reset()
    done = False
    total_reward = 0

    # Collect data for one episode
    while not done:
        # Get Q-values for the current state
        q_values = model(state)

        # Select action using epsilon-greedy policy
        if random.random() < epsilon:
            # Select a random action
            action = random.randint(0, len(q_values) - 1)
        else:
            # Select the action with the highest Q-value
            action = torch.argmax(q_values).item()

        # Perform action and observe next state and reward
        next_state, reward, done, _ = env.step(action)

        # Compute target Q-value
        with torch.no_grad():
            target_q = reward + gamma * torch.max(model(next_state))

        # Compute predicted Q-value
        predicted_q = model(state)

        # Compute loss
        loss = loss_fn(predicted_q, target_q)

        # Perform gradient descent step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Update state and total reward
        state = next_state
        total_reward += reward

    # Print total reward for the epoch
    print(f"Epoch {epoch + 1}, Total Reward: {total_reward}")

TypeError: GraphEnvironment.__init__() missing 4 required positional arguments: 'time_matrix', 'markov_matrix', 'initial_node', and 'time_limit'

In [None]:
# Image Classifier Neural Network
class RoutingModule(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(1, 32, (3,3)),
            nn.ReLU(),
            nn.Conv2d(32, 64, (3,3)),
            nn.ReLU(),
            nn.Conv2d(64, 64, (3,3)),
            nn.ReLU(),
            nn.Flatten(),
            nn.Linear(64*(28-6)*(28-6), 10)
        )

    def forward(self, x):
        return self.model(x)

# Instance of the neural network, loss, optimizer
clf = ImageClassifier().to('cuda')
opt = Adam(clf.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()