In [1]:
import torch
from torch import nn
import torch.nn.functional as F
import networkx as nx
import matplotlib.pyplot as plt
import torch.optim as optim
from scipy import sparse as sp
import random
import numpy as np
device = torch.device('cuda')

In [2]:
# Decoder
class Decoder(torch.nn.Module):
    def __init__(self, in_features, hidden_features, out_features, n_heads, d_h):
        super(Decoder, self).__init__()
        self.n_heads = n_heads
        self.hidden_features = hidden_features
        self.d_h = d_h

        self.linear1 = nn.Linear(in_features, hidden_features)
        self.linear2 = nn.Linear(hidden_features, out_features)
        self.attn_linear1 = nn.Linear(hidden_features, d_h)
        self.attn_linear2 = nn.Linear(hidden_features, d_h)
        self.softmax = nn.Softmax(dim=1)
        self.activation = nn.Tanh()

    def forward(self, x, v_prev, neighbors):
        n_nodes = x.shape[0]

        v_prev = self.linear1(v_prev)
        v_prev = v_prev.unsqueeze(0).repeat(n_nodes, 1)

        neighbors = self.linear1(neighbors)

        attn_input = torch.cat([v_prev, neighbors], dim=-1)
        attn_input = self.activation(attn_input)

        attn1 = self.attn_linear1(attn_input)
        attn2 = self.attn_linear2(attn_input)

        attn_output = torch.matmul(attn1, attn2.transpose(0, 1)) / (self.d_h ** 0.5)
        attn_output = self.activation(attn_output)

        masked_attn_output = attn_output.masked_fill(neighbors == 0, float('-inf'))
        attn_weights = self.softmax(masked_attn_output)

        x = self.linear2(x)
        x = x.unsqueeze(0).repeat(n_nodes, 1, 1)

        output = torch.matmul(attn_weights.unsqueeze(1), x)
        output = output.squeeze(1)

        return output, attn_weights

In [3]:
# Encoder
class GraphAttentionLayer(torch.nn.Module):
    def __init__(self, in_features, out_features, n_heads, is_concat = True, dropout = 0.6, leacky_relu_negative_slope = 0.2):
        super(GraphAttentionLayer, self).__init__()
        self.W = torch.nn.Parameter(torch.randn(in_features, out_features))
        self.is_concat = is_concat
        self.n_heads = n_heads

        if is_concat:
            assert out_features % n_heads == 0

            self.n_hidden = out_features // n_heads
        else:
            self.n_hidden = out_features

        self.linear = nn.Linear(in_features, self.n_hidden * n_heads, bias = False)

        self.attn = nn.Linear(self.n_hidden * 2, 1, bias = False)
        self.activation = nn.LeakyReLU(negative_slope = leacky_relu_negative_slope)
        self.softmax = nn.Softmax(dim=1)
        self.dropout = nn.Dropout(dropout)
        # self.decoder = Decoder(self.n_hidden)
        

    def forward(self, x, adj):
        n_nodes = x.shape[0]
        g=self.linear(x).view(n_nodes, self.n_heads, self.n_hidden)
        g_repeat = g.repeat(n_nodes, 1,1)
        g_repeat_interleave = g.repeat_interleave(n_nodes, dim=0)
        g_concat = torch.cat([g_repeat_interleave, g_repeat], dim = -1)
        g_concat = g_concat.view(n_nodes, n_nodes, self.n_heads, 2 * self.n_hidden)
        e = self.activation(self.attn(g_concat))
        e = e.squeeze(-1)
        assert adj.shape[0] == 1 or adj.shape[0] == n_nodes
        assert adj.shape[1] == 1 or adj.shape[1] == n_nodes
        assert adj.shape[2] == 1 or adj.shape[2] == self.n_heads
        e=e.masked_fill(adj == 0, 1)
        a = self.softmax(e)
        a = self.dropout(a)
        attn_res = torch.einsum('ijh,jhf->ihf', a, g)
        if self.is_concat:
            return attn_res.reshape(n_nodes, self.n_heads * self.n_hidden)
        else:
            return attn_res.mean(dim = 1)

In [4]:
class GAT(torch.nn.Module):
    def __init__(self, in_features, hidden_features, out_features, n_heads, d_h):
        super(GAT, self).__init__()
        self.n_heads = n_heads
        self.attention1 = GraphAttentionLayer(in_features, hidden_features, n_heads)
        self.attention2 = GraphAttentionLayer(hidden_features, out_features, n_heads)
        self.norm= nn.LayerNorm(out_features)
        self.decoder = Decoder(out_features, hidden_features, out_features, n_heads, d_h)
    
    def forward(self, x, adj):
        x = self.attention1(x, adj)
        x = self.attention2(x, adj)
        x = self.norm(x)
        x = F.softmax(x, dim=-1)
        return x
    
    def decode(self, x, v_prev, neighbors):
        return self.decoder(x, v_prev, neighbors)

In [10]:
# Create multiple dummy graphs with different node sizes
graph_list =[]
# Graph 1
G1 = nx.Graph()
G1.add_nodes_from(range(4))  # Add nodes
G1.add_edges_from([(0, 1), (1, 2), (2, 3)])  # Add edges

adj_matrix1 = nx.adjacency_matrix(G1)
adj_matrix1 = adj_matrix1 + sp.eye(adj_matrix1.shape[0])  # Add self-loop
adj_tensor1 = torch.Tensor(adj_matrix1.todense())


num_nodes1 = G1.number_of_nodes()
in_features1 = 8
x1 = torch.randn(num_nodes1, in_features1)

# Resize adjacency tensor to match the input features size
adj_tensor1 = adj_tensor1.unsqueeze(0)  # Add an extra dimension
adj_tensor1 = adj_tensor1.repeat(num_nodes1, 1, 1)  # Repeat the adjacency tensor
adj_tensor1 = adj_tensor1.transpose(0, 1)  # Transpose the dimensions

# Generate labels for Graph 1
labels1 = torch.randint(0, 2, (num_nodes1,)).to(device)

graph_list.append((x1, adj_tensor1))

# Graph 2
G2 = nx.Graph()
G2.add_nodes_from(range(5))  # Add nodes
G2.add_edges_from([(0, 1), (1, 2), (2, 3), (3, 4)])  # Add edges

adj_matrix2 = nx.adjacency_matrix(G2)
adj_matrix2 = adj_matrix2 + sp.eye(adj_matrix2.shape[0])  # Add self-loop
adj_tensor2 = torch.Tensor(adj_matrix2.todense())

num_nodes2 = G2.number_of_nodes()
in_features2 = 8
x2 = torch.randn(num_nodes2, in_features2)

# Resize adjacency tensor to match the input features size
adj_tensor2 = adj_tensor2.unsqueeze(0)  # Add an extra dimension
adj_tensor2 = adj_tensor2.repeat(num_nodes2, 1, 1)  # Repeat the adjacency tensor
adj_tensor2 = adj_tensor2.transpose(0, 1)  # Transpose the dimensions

# Generate labels for Graph 2
labels2 = torch.randint(0, 2, (num_nodes2,)).to(device)

graph_list.append((x2, adj_tensor2))

graph_array= np.array(graph_list, dtype=object)

# Access the graphs and their components from the graph list
for i, (feature_matrix, adj_tensor) in enumerate(graph_array):
    # # Expand the adj_tensor dimensions if using multiple attention heads
    # if adj_tensor.dim() == 2:
    #     adj_tensor = adj_tensor.unsqueeze(2).expand(-1, -1, self.n_heads)

    graph_list[i] = (feature_matrix.cuda(), adj_tensor.cuda())


    print(f"Graph {i+1} - Feature Matrix:")
    print(feature_matrix)

    print(f"\nGraph {i+1} - Adjacency Tensor:")
    print(adj_tensor)

    print("\n")

Graph 1 - Feature Matrix:
tensor([[ 0.3828, -0.8849, -1.5154,  0.2180,  1.4070,  0.0797, -0.6923, -0.0117],
        [-0.7997,  0.6889,  0.1618, -0.0541,  0.3779,  2.0618,  2.1502,  2.1678],
        [-0.3418, -0.0420,  0.0476, -0.6429,  1.7629,  0.3875,  0.2837, -0.9960],
        [-2.8452,  0.2046, -0.6345, -0.9439,  0.8092, -0.7611, -1.6979, -0.2157]])

Graph 1 - Adjacency Tensor:
tensor([[[1., 1., 0., 0.],
         [1., 1., 0., 0.],
         [1., 1., 0., 0.],
         [1., 1., 0., 0.]],

        [[1., 1., 1., 0.],
         [1., 1., 1., 0.],
         [1., 1., 1., 0.],
         [1., 1., 1., 0.]],

        [[0., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.],
         [0., 1., 1., 1.]],

        [[0., 0., 1., 1.],
         [0., 0., 1., 1.],
         [0., 0., 1., 1.],
         [0., 0., 1., 1.]]])


Graph 2 - Feature Matrix:
tensor([[ 0.9009,  1.3300, -0.6778,  0.7949,  0.8382,  1.1925, -2.2364,  1.8151],
        [-0.7353,  0.2957, -1.0917, -1.2875, -0.5308,  0.2467,  0.

  adj_matrix1 = nx.adjacency_matrix(G1)
  adj_matrix2 = nx.adjacency_matrix(G2)
  graph_array= np.array(graph_list, dtype=object)


In [11]:
# Create and initialize the GAT models for each graph
gat_models = []
for i, (feature_matrix, adj_tensor) in enumerate(graph_array):
    in_features = feature_matrix.shape[1]
    n_heads = adj_tensor.shape[2]
    hidden_features = 4 * n_heads
    out_features = 2 * n_heads
    d_h = 4 * n_heads
    gat_model = GAT(in_features, hidden_features, out_features, n_heads, d_h).cuda()
    gat_models.append(gat_model)
    feature_matrix = feature_matrix.cuda()
    adj_tensor = adj_tensor.cuda()
    output = gat_model(feature_matrix, adj_tensor)
    print(f"Graph {i+1} - Output:")
    print(output)
    #output : 각 노드에 대한 클래스 라벨 예측 값

Graph 1 - Output:
tensor([[0.1569, 0.0680, 0.0590, 0.0096, 0.0605, 0.1869, 0.2752, 0.1839],
        [0.0793, 0.0152, 0.0605, 0.0605, 0.0263, 0.5897, 0.0725, 0.0962],
        [0.0708, 0.0684, 0.0519, 0.0123, 0.0415, 0.4191, 0.1866, 0.1494],
        [0.0796, 0.0668, 0.0694, 0.0094, 0.0761, 0.1948, 0.3323, 0.1716]],
       device='cuda:0', grad_fn=<SoftmaxBackward0>)
Graph 2 - Output:
tensor([[0.0929, 0.0220, 0.0740, 0.0372, 0.0179, 0.0209, 0.0300, 0.0300, 0.0816,
         0.5934],
        [0.4777, 0.0128, 0.2055, 0.0226, 0.0478, 0.0478, 0.0329, 0.0554, 0.0311,
         0.0663],
        [0.3601, 0.0081, 0.1657, 0.0322, 0.0386, 0.0559, 0.0620, 0.1599, 0.0406,
         0.0769],
        [0.2098, 0.0072, 0.1662, 0.0272, 0.0728, 0.0507, 0.0417, 0.1662, 0.1689,
         0.0894],
        [0.3733, 0.0056, 0.0968, 0.0610, 0.0528, 0.0528, 0.0769, 0.0533, 0.1088,
         0.1187]], device='cuda:0', grad_fn=<SoftmaxBackward0>)


In [13]:
# Set the optimizer and loss function
optimizer = optim.Adam(gat_model.parameters(), lr=0.01)
criterion = torch.nn.NLLLoss()

# Move the model and loss function to the GPU
gat_model = gat_model.cuda()
criterion = criterion.cuda()

# Training loop
epochs = 100

for epoch in range(epochs):
    total_loss = 0.0
    for graph_idx, (feature_matrix, adj_tensor) in enumerate(graph_array):
        feature_matrix = feature_matrix.to(device)
        adj_tensor = adj_tensor.to(device)
         # Generate random labels for the current graph
        num_nodes = feature_matrix.shape[0]
        labels = torch.tensor([random.randint(0, 1) for _ in range(num_nodes)]).to(device)
                
        # Zero the gradients
        gat_model.zero_grad()
        
        # Forward pass
        output = gat_models[graph_idx](feature_matrix, adj_tensor)
        
        # Compute the loss
        loss = criterion(output.squeeze(0), labels)
        total_loss += loss.item()
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
    
    # Calculate the average loss for the epoch
    average_loss = total_loss / len(graph_array)
    
    for graph_idx in range(len(graph_list)):
        print("Graph {}: Epoch: {:03d}, Loss: {:.4f}".format(graph_idx+1, epoch+1, average_loss))

Graph 1: Epoch: 001, Loss: -0.0709
Graph 2: Epoch: 001, Loss: -0.0709
Graph 1: Epoch: 002, Loss: -0.1487
Graph 2: Epoch: 002, Loss: -0.1487
Graph 1: Epoch: 003, Loss: -0.1270
Graph 2: Epoch: 003, Loss: -0.1270
Graph 1: Epoch: 004, Loss: -0.2161
Graph 2: Epoch: 004, Loss: -0.2161
Graph 1: Epoch: 005, Loss: -0.1919
Graph 2: Epoch: 005, Loss: -0.1919
Graph 1: Epoch: 006, Loss: -0.1337
Graph 2: Epoch: 006, Loss: -0.1337
Graph 1: Epoch: 007, Loss: -0.2494
Graph 2: Epoch: 007, Loss: -0.2494
Graph 1: Epoch: 008, Loss: -0.2540
Graph 2: Epoch: 008, Loss: -0.2540
Graph 1: Epoch: 009, Loss: -0.2088
Graph 2: Epoch: 009, Loss: -0.2088
Graph 1: Epoch: 010, Loss: -0.1941
Graph 2: Epoch: 010, Loss: -0.1941
Graph 1: Epoch: 011, Loss: -0.3176
Graph 2: Epoch: 011, Loss: -0.3176
Graph 1: Epoch: 012, Loss: -0.2908
Graph 2: Epoch: 012, Loss: -0.2908
Graph 1: Epoch: 013, Loss: -0.4043
Graph 2: Epoch: 013, Loss: -0.4043
Graph 1: Epoch: 014, Loss: -0.1683
Graph 2: Epoch: 014, Loss: -0.1683
Graph 1: Epoch: 015,