In [1]:
from karateclub.dataset import GraphSetReader
from karateclub import DeepWalk
import numpy as np
from sklearn.model_selection import train_test_split
import random

import networkx as nx
from pyvis.network import Network

from tqdm import tqdm

from torch_geometric.data import Data
import torch

In [2]:
reader = GraphSetReader("reddit10k")

graphs = reader.get_graphs()[:250]
Y_label = reader.get_target()[:250]


In [7]:
something = graphs[0]
something.nodes

NodeView((0, 2, 1, 5, 4, 6, 7, 8, 9, 10, 3))

In [308]:
X_embedlist = []
for item in tqdm(graphs):
    model = DeepWalk(dimensions=128, walk_length=5, walk_number=80, window_size=5, workers=4)
    model.fit(item)
    X_embedding = model.get_embedding()
    X_embedlist.append(X_embedding)

100%|████████████████████| 250/250 [00:10<00:00, 23.16it/s]


In [309]:
X_train, X_test, y_train, y_test, graph_train, graph_test = train_test_split(X_embedlist, Y_label, graphs, test_size=0.2, random_state=42)

In [310]:
def dataloading(embedlist, label, graphs):
    datalist = []

    for item in zip(embedlist, label, graphs):
        item_X = torch.tensor(item[0], dtype=torch.float)
        item_Y = torch.tensor(item[1])
        edgeindex = torch.tensor(np.array(item[2].edges).T, dtype=torch.long)
        datalist.append(Data(x=item_X, y=item_Y, edge_index=edgeindex))
    return datalist

train_data = dataloading(X_train, y_train, graph_train)
test_data = dataloading(X_test, y_test, graph_test)

In [311]:
from torch_geometric.data import DataLoader
train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

In [312]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, num_node_features, num_classes, hidden_channels):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(num_node_features, hidden_channels)
        self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GCNConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, num_classes)

    def forward(self, x, edge_index, batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = self.conv3(x, edge_index)

        # 2. Readout layer
        x = global_mean_pool(x, batch)  # [batch_size, hidden_channels]

        # 3. Apply a final classifier
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        
        return x


In [313]:
num_class = len(set(Y_label))
node_dim = X_train[0].shape[1]
model = GCN(node_dim, num_class, hidden_channels=64)
print(model)

GCN(
  (conv1): GCNConv(128, 64)
  (conv2): GCNConv(64, 64)
  (conv3): GCNConv(64, 64)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [314]:
model = GCN(node_dim, num_class, hidden_channels=64)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()

def train(train_loader):
    model.train()

    for data in train_loader:  # Iterate in batches over the training dataset.
         out = model(data.x, data.edge_index, data.batch)  # Perform a single forward pass.
         loss = criterion(out, data.y)  # Compute the loss.
         loss.backward()  # Derive gradients.
         optimizer.step()  # Update parameters based on gradients.
         optimizer.zero_grad()  # Clear gradients.

def test(test_loader):
     model.eval()

     correct = 0
     for data in test_loader:  # Iterate in batches over the training/test dataset.
         out = model(data.x, data.edge_index, data.batch)  
         pred = out.argmax(dim=1)  # Use the class with highest probability.
         correct += int((pred == data.y).sum())  # Check against ground-truth labels.
     return correct / len(test_loader.dataset)  # Derive ratio of correct predictions.


for epoch in range(1, 201):
    train(train_loader)
    train_acc = test(train_loader)
    test_acc = test(test_loader)
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.6350, Test Acc: 0.6800
Epoch: 002, Train Acc: 0.6550, Test Acc: 0.6600
Epoch: 003, Train Acc: 0.6550, Test Acc: 0.6600
Epoch: 004, Train Acc: 0.6700, Test Acc: 0.6600
Epoch: 005, Train Acc: 0.6650, Test Acc: 0.6800
Epoch: 006, Train Acc: 0.6800, Test Acc: 0.7000
Epoch: 007, Train Acc: 0.6950, Test Acc: 0.6400
Epoch: 008, Train Acc: 0.7000, Test Acc: 0.7000
Epoch: 009, Train Acc: 0.7150, Test Acc: 0.6400
Epoch: 010, Train Acc: 0.7300, Test Acc: 0.7000
Epoch: 011, Train Acc: 0.7100, Test Acc: 0.6800
Epoch: 012, Train Acc: 0.7000, Test Acc: 0.6200
Epoch: 013, Train Acc: 0.7000, Test Acc: 0.7200
Epoch: 014, Train Acc: 0.7050, Test Acc: 0.7200
Epoch: 015, Train Acc: 0.7300, Test Acc: 0.6200
Epoch: 016, Train Acc: 0.7250, Test Acc: 0.6600
Epoch: 017, Train Acc: 0.7200, Test Acc: 0.7000
Epoch: 018, Train Acc: 0.7400, Test Acc: 0.6800
Epoch: 019, Train Acc: 0.7500, Test Acc: 0.6000
Epoch: 020, Train Acc: 0.7550, Test Acc: 0.5400
Epoch: 021, Train Acc: 0.7350, Test Acc:

Epoch: 172, Train Acc: 0.9050, Test Acc: 0.7000
Epoch: 173, Train Acc: 0.8550, Test Acc: 0.6800
Epoch: 174, Train Acc: 0.9100, Test Acc: 0.7000
Epoch: 175, Train Acc: 0.9200, Test Acc: 0.6800
Epoch: 176, Train Acc: 0.9100, Test Acc: 0.7200
Epoch: 177, Train Acc: 0.8400, Test Acc: 0.6400
Epoch: 178, Train Acc: 0.8650, Test Acc: 0.6600
Epoch: 179, Train Acc: 0.8900, Test Acc: 0.7200
Epoch: 180, Train Acc: 0.8700, Test Acc: 0.6600
Epoch: 181, Train Acc: 0.8850, Test Acc: 0.6600
Epoch: 182, Train Acc: 0.8850, Test Acc: 0.6600
Epoch: 183, Train Acc: 0.9050, Test Acc: 0.6400
Epoch: 184, Train Acc: 0.9150, Test Acc: 0.7200
Epoch: 185, Train Acc: 0.8850, Test Acc: 0.6600
Epoch: 186, Train Acc: 0.9050, Test Acc: 0.7200
Epoch: 187, Train Acc: 0.9100, Test Acc: 0.7200
Epoch: 188, Train Acc: 0.9000, Test Acc: 0.6600
Epoch: 189, Train Acc: 0.9000, Test Acc: 0.7200
Epoch: 190, Train Acc: 0.9000, Test Acc: 0.7200
Epoch: 191, Train Acc: 0.9100, Test Acc: 0.7200
Epoch: 192, Train Acc: 0.9150, Test Acc:

In [316]:
indexnum =45
model.eval()
out = model(train_data[indexnum].x, train_data[indexnum].edge_index, torch.tensor([0]))  
pred = out.argmax(dim=1)
pred = int(pred)
pred

0