In [1]:
# from experiments.peptides_functional.pyg.peptides_functional import *
# from experiments.peptides_structural.pyg.peptides_structural import *

In [2]:
import networkx as nx
import matplotlib.pyplot as plt
from torch_geometric.utils import to_networkx

In [3]:
"""Spectral Clustering GNN layer definition."""
import os.path as osp
import torch
from torch.nn import Linear

import torch_geometric.transforms as T
from torch_geometric.datasets import Planetoid
from torch_geometric.nn import GraphConv, dense_mincut_pool
from torch_geometric import utils
from torch_geometric.nn import Sequential
from torch_geometric.nn.conv.gcn_conv import gcn_norm

from sklearn.metrics import normalized_mutual_info_score as NMI


import numpy as np
import torch.nn as nn
import torch.nn.functional as F
from torch_scatter import scatter


class Net(torch.nn.Module):
    def __init__(self, 
                 mp_units,
                 mp_act,
                 in_channels, 
                 n_clusters, 
                 mlp_units=[],
                 mlp_act="Identity"):
        super().__init__()
        
        mp_act = getattr(torch.nn, mp_act)(inplace=True)
        mlp_act = getattr(torch.nn, mlp_act)(inplace=True)
        
        # Message passing layers
        mp = [
            (GraphConv(in_channels, mp_units[0]), 'x, edge_index, edge_weight -> x'),
            mp_act
        ]
        for i in range(len(mp_units)-1):
            mp.append((GraphConv(mp_units[i], mp_units[i+1]), 'x, edge_index, edge_weight -> x'))
            mp.append(mp_act)
        self.mp = Sequential('x, edge_index, edge_weight', mp)
        out_chan = mp_units[-1]
        
        # MLP layers
        self.mlp = torch.nn.Sequential()
        for units in mlp_units:
            self.mlp.append(Linear(out_chan, units))
            out_chan = units
            self.mlp.append(mlp_act)
        self.mlp.append(Linear(out_chan, n_clusters))
        

    def forward(self, x, edge_index, edge_weight):
        
        # Propagate node feats
        x = self.mp(x, edge_index, edge_weight) 
        
        # Cluster assignments (logits)
        s = self.mlp(x) 
        
        # Obtain MinCutPool losses
        adj = utils.to_dense_adj(edge_index, edge_attr=edge_weight)
        _, _, mc_loss, o_loss = dense_mincut_pool(x, adj, s)

        # return torch.softmax(s, dim=-1), mc_loss, adj
        return torch.softmax(s, dim=-1), mc_loss, o_loss, adj

In [4]:
import logging
import time
from typing import Literal

from torch_geometric.data import Data
from torch_geometric.graphgym.checkpoint import clean_ckpt, save_ckpt
from torch_geometric.graphgym.config import cfg
from torch_geometric.graphgym.loss import compute_loss
from torch_geometric.graphgym.model_builder import GraphGymModule
from torch_geometric.graphgym.register import register_train
from torch_geometric.graphgym.utils.epoch import is_ckpt_epoch, is_eval_epoch

from sklearn.metrics import normalized_mutual_info_score as NMI

In [5]:
dataset = Planetoid("../datasets", "cora")
data = dataset[0]

In [6]:
data.edge_index, data.edge_weight = gcn_norm(  
                data.edge_index, data.edge_weight, data.num_nodes,
                add_self_loops=False)

In [7]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
data = data.to(device)
model = Net([16], "ELU", dataset.num_features, 7).to(device)
print(model)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2)

Net(
  (mp): Sequential(
    (0): GraphConv(1433, 16)
    (1): ELU(alpha=1.0, inplace=True)
  )
  (mlp): Sequential(
    (0): Linear(in_features=16, out_features=7, bias=True)
  )
)


In [8]:
def train():
    model.train()
    optimizer.zero_grad()
    x = data.x.float().to(device)
    _, mc_loss, o_loss, adj = model(x, data.edge_index, data.edge_weight)
    # _, mc_loss, adj = model(x, data.edge_index, data.edge_weight)
    loss = mc_loss + (o_loss * 0.2)
    # loss = mc_loss
    loss.backward()
    optimizer.step()
    return loss.item()


@torch.no_grad()
def test():
    model.eval()
    x = data.x.float().to(device)
    y = data.y[0]
    clust, _, _, adj = model(x, data.edge_index, data.edge_weight)
    # clust, _, adj = model(x, data.edge_index, data.edge_weight)
    return NMI(clust.max(1)[1].cpu(), y.cpu())


patience = 1000
best_nmi = 0
for epoch in range(1, 1000):
    train_loss = train()
    # nmi = test()
    nmi = 0.0
    print(f'Epoch: {epoch:03d}, Loss: {train_loss:.4f}, NMI: {nmi:.3f}')
    if nmi > best_nmi:
        best_nmi = nmi
        patience = 50
    else:
        patience -= 1     
    if patience == 0:
        break

Epoch: 001, Loss: -0.7758, NMI: 0.000
Epoch: 002, Loss: -0.7738, NMI: 0.000
Epoch: 003, Loss: -0.7746, NMI: 0.000
Epoch: 004, Loss: -0.7754, NMI: 0.000
Epoch: 005, Loss: -0.7760, NMI: 0.000
Epoch: 006, Loss: -0.7762, NMI: 0.000
Epoch: 007, Loss: -0.7764, NMI: 0.000
Epoch: 008, Loss: -0.7765, NMI: 0.000
Epoch: 009, Loss: -0.7766, NMI: 0.000
Epoch: 010, Loss: -0.7767, NMI: 0.000
Epoch: 011, Loss: -0.7767, NMI: 0.000
Epoch: 012, Loss: -0.7767, NMI: 0.000
Epoch: 013, Loss: -0.7768, NMI: 0.000
Epoch: 014, Loss: -0.7768, NMI: 0.000
Epoch: 015, Loss: -0.7768, NMI: 0.000
Epoch: 016, Loss: -0.7768, NMI: 0.000
Epoch: 017, Loss: -0.7768, NMI: 0.000
Epoch: 018, Loss: -0.7768, NMI: 0.000
Epoch: 019, Loss: -0.7768, NMI: 0.000
Epoch: 020, Loss: -0.7769, NMI: 0.000
Epoch: 021, Loss: -0.7769, NMI: 0.000
Epoch: 022, Loss: -0.7769, NMI: 0.000
Epoch: 023, Loss: -0.7769, NMI: 0.000
Epoch: 024, Loss: -0.7769, NMI: 0.000
Epoch: 025, Loss: -0.7769, NMI: 0.000
Epoch: 026, Loss: -0.7769, NMI: 0.000
Epoch: 027, 

In [9]:
x = data.x.float().to(device)
y = data.y[0]
clust, _, _, adj = model(x, data.edge_index, data.edge_weight)
# clust, _, adj = model(x, data.edge_index, data.edge_weight)


In [10]:
clust.max(1)[1].cpu()

tensor([0, 2, 2,  ..., 6, 0, 0])

In [11]:
data

Data(x=[2708, 1433], edge_index=[2, 10556], y=[2708], train_mask=[2708], val_mask=[2708], test_mask=[2708], edge_weight=[10556])

In [12]:
networkX_graph = to_networkx(data, node_attrs=["x"])

In [13]:
networkX_graph = networkX_graph.to_undirected()

In [14]:
colors = clust.max(1)[1].cpu().numpy()

In [15]:
import plotly.graph_objects as go

In [16]:
G = networkX_graph

In [17]:
pos = nx.spring_layout(G)

In [18]:
edge_x = []
edge_y = []
for edge in G.edges():
    x0, y0 = pos[edge[0]]
    x1, y1 = pos[edge[1]]
    edge_x.append(x0)
    edge_x.append(x1)
    edge_x.append(None)
    edge_y.append(y0)
    edge_y.append(y1)
    edge_y.append(None)

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=0.5, color='#888'),
    hoverinfo='none',
    mode='lines')

node_x = []
node_y = []
for node in G.nodes():
    x, y = pos[node]
    node_x.append(x)
    node_y.append(y)

node_trace = go.Scatter(
    x=node_x, y=node_y,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        showscale=True,
        # colorscale options
        #'Greys' | 'YlGnBu' | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
        #'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
        #'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
        colorscale='YlGnBu',
        reversescale=True,
        color=[],
        size=10,
        colorbar=dict(
            thickness=15,
            title='Node Connections',
            xanchor='left',
            titleside='right'
        ),
        line_width=2))

In [19]:
node_trace.marker.color = colors

In [20]:
# fig = go.Figure(data=[edge_trace, node_trace],
#              layout=go.Layout(
#                 title='<br>Network graph made with Python',
#                 titlefont_size=16,
#                 showlegend=False,
#                 hovermode='closest',
#                 margin=dict(b=20,l=5,r=5,t=40),
#                 xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
#                 yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
#                 )
# fig.show()

In [28]:
print("Cluster size distribution:")
np.unique(colors, return_counts=True)[1]

Cluster size distribution:


array([377, 405, 377, 389, 413, 388, 359])

In [30]:
print("Ground truth class distribution:")
np.unique(data.y.cpu(), return_counts=True)[1]

Ground truth class distribution:


array([351, 217, 418, 818, 426, 298, 180])

In [22]:
nodelist = G.nodes()
clust_node = [[] for idx in range(7)]
for idx in range(len(nodelist)):
    clust_num = colors[idx]
    clust_node[clust_num].append(nodelist[idx]['x'])

In [23]:
clust_mean = [np.mean(clust_lst, axis=0) for clust_lst in clust_node]

In [24]:
clust_mean[0].shape

(1433,)

In [25]:
np.unique(data.y.cpu(), return_counts=True)

(array([0, 1, 2, 3, 4, 5, 6]), array([351, 217, 418, 818, 426, 298, 180]))