# NetworkX

In [None]:
import networkx as nx

### Graph

In [None]:
# undirected graph
G = nx.Graph()
print("G is directed : %s" % G.is_directed())

# directed graph
H = nx.DiGraph()
print("H is directed : %s" % H.is_directed())

# graph 단위 속성 추가
G.graph["Name"] = "undirected graph"
print(G.graph)

### node

In [None]:
# node 1개 추가 (+ node 단위 속성)
G.add_node(0, feature=5, label=0)
print("Node 0's attributes: %s" % G.nodes[0])

In [None]:
# 여러 nodes 추가 (+ 속성)
G.add_nodes_from([
    (1, {"feature":1, "label": 1}),
    (2, {"feature":2, "label": 2})
])

# node loop
for node in G.nodes(data=True):
    print(node)

# number of nodes
print("number of nodes in G : %d" % G.number_of_nodes())

### edge

In [None]:
# edge 추가 (+ 가중치 부여)
G.add_edge(0, 1, weight=0.5)
print("Edge (0, 1)'s attributes: %s" % G.edges[(0, 1)])

In [None]:
# 여러 edges 추가 (+ 가중치 부여)
G.add_edges_from([
    (1, 2, {"weight": 0.3}),
    (2, 0, {"weight": 0.5})
])

# edge loop (with data=True)
for edge in G.edges(data=True):
    print(edge)

# edge loop (with data=False)
for edge in G.edges():
    print(edge)

# number of edges
print("number of edges in G: %d" % G.number_of_edges())

### Visualization

In [None]:
nx.draw(G, with_labels=True)

### node degree & neighbor

In [None]:
node_id = 1

print("degree of node %s : %d" % (node_id, G.degree[node_id]) )

In [None]:
for neighbor in G.neighbors(node_id):
    print("node %s has neighbor %s" % (node_id, neighbor) )

### other functionalities

In [None]:
num_nodes = 7

# path graph : a tree with two nodes of vertex degree & the other n-2 nodes of vertex degree 2
G = nx.DiGraph(nx.path_graph(num_nodes))
nx.draw(G, with_labels=True)

In [None]:
# pagerank
pr = nx.pagerank(G, alpha=0.8)
pr

# Pytorch Geometric

In [None]:
# 약 15분 소요됨
!pip install -q torch-scatter -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-sparse -f https://pytorch-geometric.com/whl/torch-1.7.0+cu101.html
!pip install -q torch-geometric

In [None]:
%matplotlib inline
import torch
import networkx as nx
import matplotlib.pyplot as plt

### visualize

In [None]:
def visualize(h, color, epoch=None, loss=None, accuracy=None):
    plt.figure(figsize=(7,7))
    plt.xticks([])
    plt.yticks([])

    if torch.is_tensor(h):
        h = h.detach().cpu().numpy()
        plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2")
        if (epoch is not None) and (loss is not None) and (accuracy['train'] is not None) and (accuracy['val'] is not None):
           plt.xlabel((f'Epoch: {epoch}, Loss: {loss.item():.4f} \n'
                       f'Training Accuracy: {accuracy["train"]*100:.2f}% \n'
                       f' Validation Accuracy: {accuracy["val"]*100:.2f}%'), fontsize=16)

    else:
        nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False, node_color=color, cmap="Set2")

    plt.show()

### dataset

- the well-known Zachary's karate club network.
- This graph describes a social network of 34 members of a karate club and documents links between members who interacted outside the club.
- Here, we are interested in detecting communities that arise from the member's interaction.

In [None]:
from torch_geometric.datasets import KarateClub

dataset = KarateClub()
print(f"number of graphs: { len(dataset) }")
print(f"number of features: { dataset.num_features }")
print(f"number of classes: { dataset.num_classes }")

In [None]:
data = dataset[0]
data

In [None]:
# node 개수
data.num_nodes

In [None]:
# edge 개수
data.num_edges

In [None]:
# node degree 평균
2 * data.num_edges / data.num_nodes

In [None]:
# training node 개수
data.train_mask.sum()

In [None]:
# training node label 비율
float(data.train_mask.sum() / data.num_nodes)

In [None]:
# isolated nodes 존재 여부
data.has_isolated_nodes()

In [None]:
# selp loop 존재 여부
data.has_self_loops()

In [None]:
# undirected 여부
data.is_undirected()

In [None]:
data.edge_index.T[:10]  # == data.edge_index.t()

In [None]:
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 300})'''))
from torch_geometric.utils import to_networkx

G = to_networkx(data, to_undirected=True)
visualize(G, color=data.y)

### GNN 실행

- In total, we are only aware of the ground-truth labels of 4 nodes (one for each community), and the task is to infer the community assignment for the remaining nodes.

In [None]:
dataset.num_classes

In [None]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv

class GCN(torch.nn.Module):
    def __init__(self):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        self.conv1 = GCNConv(dataset.num_features, 4)
        self.conv2 = GCNConv(4, 4)
        self.conv3 = GCNConv(4, 2)
        self.classifier = Linear(2, dataset.num_classes)

        # self.classifier = Linear(2, dataset.num_classes)

        # self.convs = torch.nn.ModuleList()

        # self.convs.append(GCNConv(input_dim, hidden_dim))
        # for l in range(num_layers-1):
        #     self.convs.append(GCNConv(hidden_dim, hidden_dim))
            
        # self.relu = torch.nn.ReLU()
    
    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = h.tanh()
        h = self.conv2(h, edge_index)
        h = h.tanh()
        h = self.conv3(h, edge_index)
        h = h.tanh()

        out = self.classifier(h)

        return out, h
        # for l in range(num_layers):
        #     x = self.convs[l](x, edge_index)
        #     x = x.tanh()

        # h = torch.nn.functional.relu(h)
        # h = torch.nn.functional.dropout(h, dropout=0.5, training=self.training)
        # h = self.conv3(h, edge_index)
        # embeddings = h.tanh()

        # out = self.classifier(embeddings)

        # return out, embeddings

In [None]:
model = GCN()
model

In [None]:
_, h = model(data.x, data.edge_index)
visualize(h, color=data.y)

In [None]:
import time
from IPython.display import Javascript  # Restrict height of output cell.
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 430})'''))

model = GCN()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

def train(data):
    # clear gradients
    optimizer.zero_grad()
    # single forward pass
    out, h = model(data.x, data.edge_index)
    # training nodes 기반 loss 계산
    loss = criterion(out[data.train_mask], data.y[data.train_mask])
    # derive gradients
    loss.backward()
    # update parameters based on gradients
    optimizer.step()

    accuracy = {}
    # training accuracy 계산
    predicted_classes = torch.argmax(out[data.train_mask], axis=1)
    target_classes = data.y[data.train_mask]
    accuracy['train'] = torch.mean( torch.where(predicted_classes == target_classes, 1, 0).float() )

    # validation accuracy 계산
    predicted_classes = torch.argmax(out, axis=1)
    target_classes = data.y
    accuracy['val'] = torch.mean( torch.where(predicted_classes == target_classes, 1, 0).float() )

    return loss, h, accuracy

for epoch in range(500):
    loss, h, accuracy = train(data)
    if epoch % 10 == 0:
        visualize(h, color=data.y, epoch=epoch, loss=loss, accuracy=accuracy)
        time.sleep(0.3)