# Graph Neural Networks

### Loading Libraries

In [1]:
# Operating Systems
import os
import math
import time
import yaml
import random

# Numerical Computing
import numpy as np

# Data Manipulation
import polars as pl
import pandas as pd

# Data Visualization
import seaborn as sns
import matplotlib.pyplot as plt

# Scikit-Learn
import sklearn
from sklearn.manifold import TSNE

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torchviz import make_dot
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import dataset
from torchvision import datasets, transforms
from torch.nn import TransformerEncoder, TransformerEncoderLayer

# PyTorch Geometry
from torch_geometric.nn import GCNConv
from torch_geometric.nn import GATConv
from torch_geometric.utils import to_networkx
from torch_geometric.datasets import Planetoid

# Network
import networkx as nx

In [2]:
# Cuda's
use_cuda = torch.cuda.is_available()

device = torch.device("cuda" if use_cuda else "cpu")

In [3]:
torch.use_deterministic_algorithms(True)

### Helper Functions

In [4]:
def visualize(data, labels):
    tsne = TSNE(n_components=2, init='pca', random_state=7)
    tsne_res = tsne.fit_transform(data)
    v = pd.DataFrame(data,columns=[str(i) for i in range(data.shape[1])])
    v['color'] = labels
    v['label'] = v['color'].apply(lambda i: str(i))
    v["dim1"] = tsne_res[:,0]
    v["dim2"] = tsne_res[:,1]
    
    plt.figure(figsize=(12,12))

    sns.scatterplot(
        x="dim1", y="dim2",
        hue="color",
        palette=sns.color_palette(["#52D1DC", "#8D0004", "#845218","#563EAA", "#E44658", "#63C100", "#FF7800"]),
        legend=False,
        data=v,
    )

In [5]:
def visualize_graph(G, color):
    plt.figure(figsize=(75,75))
    plt.xticks([])
    plt.yticks([])
    nx.draw_networkx(G, pos=nx.spring_layout(G), with_labels=False,
                     node_color=color, cmap="Set2")
    plt.show()

### Loading Graph Dataset

In [6]:
dataset = Planetoid(root='data/Planetoid', name='CiteSeer')

print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

In [7]:
data = dataset[0]

### Graph Data Description

In [8]:
data = dataset[0]  # Get the first graph object.

print(data)
print('==============================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

### Graph Visualization

In [9]:
G = to_networkx(data)

visualize_graph(G, color=data.y)

### Graph model 1: `Classic MLP`

#### Define and instantiate MLP model

In [10]:
# class MLP(torch.nn.Module):
#     def __init__(self, hidden_channels):
#         super().__init__()
#         torch.manual_seed(12345)
#         self.lin1 = Linear(dataset.num_features, hidden_channels)
#         self.lin2 = Linear(hidden_channels, dataset.num_classes)

#     def forward(self, x):
#         x = self.lin1(x)
#         x = x.relu()
#         x = F.dropout(x, p=0.5, training=self.training)
#         x = self.lin2(x)
#         return x

# model = MLP(hidden_channels=16)
# print(model)

### Visualize Initial MLP Embeddings for Different Graph Nodes

In [11]:
# model.eval()
# out = model(data.x)

# visualize(out.detach().cpu().numpy(), data.y)

### Train MLP Model on Graph Dataset

In [12]:
# model = MLP(hidden_channels=16)
# criterion = torch.nn.CrossEntropyLoss()  
# optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-3)  

# def train():
#       model.train()
#       optimizer.zero_grad()  
#       out = model(data.x)  
#       loss = criterion(out[data.train_mask], data.y[data.train_mask])  
#       loss.backward()  
#       optimizer.step()  
#       return loss

# def test(mask):
#       model.eval()
#       out = model(data.x)
#       pred = out.argmax(dim=1)  
#       correct = pred[mask] == data.y[mask]  
#       acc = int(correct.sum()) / int(mask.sum())  
#       return acc

# for epoch in range(1, 101):
#     loss = train()
#     val_acc = test(data.val_mask)
#     print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Val: {val_acc:.4f}')

### Evaluate Model Performance on Test Set

In [13]:
# test_acc = test(data.test_mask)

# print(f'Test Accuracy: {test_acc:.4f}')

### Visualize Trained Model's Embeddings for Different Graph Nodes