From f30d57db08cddd74df937823b2d44a20b1aff00e Mon Sep 17 00:00:00 2001
From: SinuoXu <no-body-no-crime@sjtu.edu.cn>
Date: Fri, 13 Oct 2023 11:39:03 +0800
Subject: [PATCH 01/20] add ogc method

---
 examples/pytorch/ogc/README.md |  37 ++++++++++++
 examples/pytorch/ogc/ogc.py    | 104 +++++++++++++++++++++++++++++++++
 examples/pytorch/ogc/utils.py  |  66 +++++++++++++++++++++
 3 files changed, 207 insertions(+)
 create mode 100644 examples/pytorch/ogc/README.md
 create mode 100644 examples/pytorch/ogc/ogc.py
 create mode 100644 examples/pytorch/ogc/utils.py

diff --git a/examples/pytorch/ogc/README.md b/examples/pytorch/ogc/README.md
new file mode 100644
index 000000000000..713505b3d031
--- /dev/null
+++ b/examples/pytorch/ogc/README.md
@@ -0,0 +1,37 @@
+# Optimized Graph Convolution (OGC)
+
+This DGL example implements the OGC method from the paper: [From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited](https://arxiv.org/abs/2309.13599).
+With only one trainable layer, OGC is a very simple but powerful graph convolution method.
+
+
+## Example Implementor
+
+This example was implemented by [Sinuo Xu](https://github.com/SinuoXu) when she was an undergraduate at SJTU.
+
+
+
+## Dataset
+
+The DGL's built-in Cora, Pubmed and Citeseer datasets, as follows:
+
+| Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes |
+| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
+| Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 |
+| Cora | 2,708 | 10,556 | 1,433 | 7 | 140 | 500 | 1000 |
+| Pubmed | 19,717 | 88,651 | 500 | 3 | 60 | 500 | 1000 |
+
+
+## Usage
+
+```bash
+python main.py --dataset cora
+python main.py --dataset citeseer
+python main.py --dataset pubmed
+```
+
+## Performance
+
+| Dataset | Cora | Citeseer | Pubmed |
+| :-: | :-: | :-: | :-: |
+| OGC (DGL) | **86.9(±0.2)** | **77.4(±0.1)** | **83.6(±0.1)** |
+| OGC (Reported) | **86.9(±0.0)** | **77.4(±0.0)** | 83.4(±0.0) |
\ No newline at end of file
diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py
new file mode 100644
index 000000000000..9d6cdb56115b
--- /dev/null
+++ b/examples/pytorch/ogc/ogc.py
@@ -0,0 +1,104 @@
+import time
+import argparse
+import scipy.sparse as sp
+
+import torch
+import torch.nn.functional as F
+
+from dgl import AddSelfLoop
+from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
+from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork
+
+
+# Training settings
+decline = 0.9            # the dcline rate
+eta_sup = 0.001          # the learning rate for supervised loss
+eta_W = 0.5              # the learning rate for updating W
+beta = 0.1               # in [0,1], the moving probability that a node moves to its neighbors
+max_similar_tol = 0.995  # the max_tol test set label prediction similarity between two iterations
+max_patience = 2         # the tolreance for consecutively getting very similar test prediction
+
+
+def update_U(U, Y, predY, W):
+    global eta_sup
+    # ------ update the smoothness loss via LGC ------
+    U = torch.spmm(lazy_adj.to(device), U)
+
+    # ------ update the supervised loss via SEB ------
+    dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W)
+    U = U - eta_sup * dU_sup
+
+    eta_sup = eta_sup * decline
+    return U
+
+
+def OGC(linear_clf, U, g):
+    patience = 0
+    _, _, last_acc, last_outp = linear_clf.test(U, g)
+    for i in range(64):
+        # updating W by training a simple linear supervised model Y=W*X
+        predY, W = linear_clf.update_W(U, g, eta_W)
+
+        # updating U by LGC and SEB jointly
+        U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W)
+
+        loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g)
+        print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format(
+               i + 1, loss_tv, acc_tv, acc_test))      
+
+        sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0]))
+        if (sim_rate > max_similar_tol):
+            patience += 1
+            if (patience > max_patience):
+                break
+
+        last_acc = acc_test
+        last_outp = pred
+    return last_acc
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        default="citeseer",
+        choices=["cora", "citeseer", "pubmed"],
+        help='Dataset to use.')
+    args, _ = parser.parse_known_args()
+
+    # load and preprocess dataset
+    transform = (AddSelfLoop())
+    if args.dataset == "cora":
+        data = CoraGraphDataset(transform=transform)
+    elif args.dataset == "citeseer":
+        data = CiteseerGraphDataset(transform=transform)
+    elif args.dataset == "pubmed":
+        data = PubmedGraphDataset(transform=transform)
+    else:
+        raise ValueError("Unknown dataset: {}".format(args.dataset))
+
+    g = data[0]
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    g = g.int().to(device)
+    features = g.ndata["feat"]
+
+    adj = symmetric_normalize_adjacency(g)
+    print(g.num_edges)
+    I_N = sp.eye(features.shape[0])
+    # lazy random walk (also known as lazy graph convolution)
+    lazy_adj = (1 - beta) * I_N + beta * adj
+    lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj)
+    # LIM track, else use both train and validation set to construct S
+    S = torch.diag(g.ndata["train_mask"]).float().to_sparse()
+    
+    linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1),
+                                     nclass=g.ndata["label"].max().item()+1,
+                                     bias=False).to(device)
+
+    start_time = time.time()
+    res = OGC(linear_clf, features, g)
+    time_tot = time.time() - start_time
+
+    print(f'Test Acc:{res:.4f}')
+    print(f'Total Time:{time_tot:.4f}')
\ No newline at end of file
diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py
new file mode 100644
index 000000000000..8c19247de605
--- /dev/null
+++ b/examples/pytorch/ogc/utils.py
@@ -0,0 +1,66 @@
+import numpy as np
+import scipy.sparse as sp
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import torch.optim as optim
+
+def sparse_mx_to_torch_sparse_tensor(sparse_mx):
+    """Convert a scipy sparse matrix to a torch sparse tensor."""
+    sparse_mx = sparse_mx.tocoo().astype(np.float32)
+    indices = torch.from_numpy(
+        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
+    values = torch.from_numpy(sparse_mx.data)
+    shape = torch.Size(sparse_mx.shape)
+    return torch.sparse.FloatTensor(indices, values, shape)
+
+
+def symmetric_normalize_adjacency(graph):
+    """ Symmetric normalize graph adjacency matrix. """
+    adj = graph.adjacency_matrix()
+    in_degs = graph.in_degrees().float()
+    in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1)
+    degi = torch.diag(torch.squeeze(torch.t(in_norm)))
+    degi = sp.coo_matrix(degi).tocsr()
+    adj = sp.csr_matrix((adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape)
+    adj = degi.dot(adj.dot(degi))
+    return adj
+
+
+class LinearNeuralNetwork(nn.Module):
+    def __init__(self, nfeat, nclass, bias=True):
+        super(LinearNeuralNetwork, self).__init__()
+        self.W = nn.Linear(nfeat, nclass, bias=bias)
+
+    def forward(self, x):
+        return self.W(x)
+
+    def test(self, U, g):
+        self.eval()
+        with torch.no_grad():
+            output = self(U)
+            pred = output.argmax(dim=-1)
+            labels = g.ndata["label"]
+            test_mask = g.ndata["test_mask"]
+            tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"]
+            loss_tv = F.mse_loss(output[tv_mask],
+                                 F.one_hot(labels).float()[tv_mask])
+            accs = []
+            for mask in [tv_mask, test_mask]:
+                accs.append(
+                    float((pred[mask] == labels[mask]).sum()/mask.sum()))
+        return loss_tv.item(), accs[0], accs[1], pred
+
+    def update_W(self, U, g, eta_W):
+        optimizer = optim.SGD(self.parameters(), lr=eta_W)
+        self.train()
+        optimizer.zero_grad()
+        output = self(U)
+        labels = g.ndata["label"]
+        tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"]
+        loss_tv = F.mse_loss(output[tv_mask],
+                             F.one_hot(labels).float()[tv_mask],
+                             reduction='sum')
+        loss_tv.backward()
+        optimizer.step()
+        return self(U).data, self.W.weight.data

From 2209d7f6748e4a8eec042eef14f8ee02585f81f8 Mon Sep 17 00:00:00 2001
From: SinuoXu <no-body-no-crime@sjtu.edu.cn>
Date: Fri, 13 Oct 2023 12:00:21 +0800
Subject: [PATCH 02/20] update example readme

---
 examples/README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/examples/README.md b/examples/README.md
index 3a9da46c57c7..08964a0dceeb 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -5,6 +5,14 @@ The folder contains example implementations of selected research papers related
 * For examples working with a certain release, check out `https://github.com/dmlc/dgl/tree/<release_version>/examples` (E.g., https://github.com/dmlc/dgl/tree/0.5.x/examples)
 
 To quickly locate the examples of your interest, search for the tagged keywords or use the search tool on [dgl.ai](https://www.dgl.ai/).
+
+## 2023
+
+- <a name="labor"></a> Zheng Wang et al. From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited. [Paper link](https://arxiv.org/abs/2210.13339)
+  - Example code: [PyTorch](../examples/pytorch/ogc)
+
+  - Tags: semi-supervised node classification
+
 ## 2022
 - <a name="labor"></a> Balin et al. Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs. [Paper link](https://arxiv.org/abs/2210.13339)
     - Example code: [PyTorch](../examples/labor/train_lightning.py)

From 25ca95b7313d27ed9c96d15bae1927b1d2f84e18 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Fri, 13 Oct 2023 12:47:21 +0800
Subject: [PATCH 03/20] Add files via upload

---
 ogc.py | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 ogc.py

diff --git a/ogc.py b/ogc.py
new file mode 100644
index 000000000000..dff74f8495c4
--- /dev/null
+++ b/ogc.py
@@ -0,0 +1,103 @@
+import time
+import argparse
+import scipy.sparse as sp
+
+import torch
+import torch.nn.functional as F
+
+from dgl import AddSelfLoop
+from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
+from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork
+
+
+# Training settings
+decline = 0.9            # the dcline rate
+eta_sup = 0.001          # the learning rate for supervised loss
+eta_W = 0.5              # the learning rate for updating W
+beta = 0.1               # in [0,1], the moving probability that a node moves to its neighbors
+max_similar_tol = 0.995  # the max_tol test set label prediction similarity between two iterations
+max_patience = 2         # the tolreance for consecutively getting very similar test prediction
+
+
+def update_U(U, Y, predY, W):
+    global eta_sup
+    # ------ update the smoothness loss via LGC ------
+    U = torch.spmm(lazy_adj.to(device), U)
+
+    # ------ update the supervised loss via SEB ------
+    dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W)
+    U = U - eta_sup * dU_sup
+
+    eta_sup = eta_sup * decline
+    return U
+
+
+def OGC(linear_clf, U, g):
+    patience = 0
+    _, _, last_acc, last_outp = linear_clf.test(U, g)
+    for i in range(64):
+        # updating W by training a simple linear supervised model Y=W*X
+        predY, W = linear_clf.update_W(U, g, eta_W)
+
+        # updating U by LGC and SEB jointly
+        U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W)
+
+        loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g)
+        print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format(
+               i + 1, loss_tv, acc_tv, acc_test))      
+
+        sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0]))
+        if (sim_rate > max_similar_tol):
+            patience += 1
+            if (patience > max_patience):
+                break
+
+        last_acc = acc_test
+        last_outp = pred
+    return last_acc
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        default="citeseer",
+        choices=["cora", "citeseer", "pubmed"],
+        help='Dataset to use.')
+    args, _ = parser.parse_known_args()
+
+    # load and preprocess dataset
+    transform = (AddSelfLoop())
+    if args.dataset == "cora":
+        data = CoraGraphDataset(transform=transform)
+    elif args.dataset == "citeseer":
+        data = CiteseerGraphDataset(transform=transform)
+    elif args.dataset == "pubmed":
+        data = PubmedGraphDataset(transform=transform)
+    else:
+        raise ValueError("Unknown dataset: {}".format(args.dataset))
+
+    g = data[0]
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    g = g.int().to(device)
+    features = g.ndata["feat"]
+
+    adj = symmetric_normalize_adjacency(g)
+    I_N = sp.eye(features.shape[0])
+    # lazy random walk (also known as lazy graph convolution)
+    lazy_adj = (1 - beta) * I_N + beta * adj
+    lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj)
+    # LIM track, else use both train and validation set to construct S
+    S = torch.diag(g.ndata["train_mask"]).float().to_sparse()
+    
+    linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1),
+                                     nclass=g.ndata["label"].max().item()+1,
+                                     bias=False).to(device)
+
+    start_time = time.time()
+    res = OGC(linear_clf, features, g)
+    time_tot = time.time() - start_time
+
+    print(f'Test Acc:{res:.4f}')
+    print(f'Total Time:{time_tot:.4f}')
\ No newline at end of file

From deb1cc33a926d6a27528ab2dc233ed1a07a03b9e Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Fri, 13 Oct 2023 12:51:22 +0800
Subject: [PATCH 04/20] Update ogc.py

---
 examples/pytorch/ogc/ogc.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py
index 9d6cdb56115b..8f5047f366a5 100644
--- a/examples/pytorch/ogc/ogc.py
+++ b/examples/pytorch/ogc/ogc.py
@@ -84,7 +84,6 @@ def OGC(linear_clf, U, g):
     features = g.ndata["feat"]
 
     adj = symmetric_normalize_adjacency(g)
-    print(g.num_edges)
     I_N = sp.eye(features.shape[0])
     # lazy random walk (also known as lazy graph convolution)
     lazy_adj = (1 - beta) * I_N + beta * adj
@@ -101,4 +100,4 @@ def OGC(linear_clf, U, g):
     time_tot = time.time() - start_time
 
     print(f'Test Acc:{res:.4f}')
-    print(f'Total Time:{time_tot:.4f}')
\ No newline at end of file
+    print(f'Total Time:{time_tot:.4f}')

From c6768ce75efab4da446617c030c4afb6b304f273 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Fri, 13 Oct 2023 15:45:53 +0800
Subject: [PATCH 05/20] Update utils.py

---
 examples/pytorch/ogc/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py
index 8c19247de605..8ae4b4a3d5c5 100644
--- a/examples/pytorch/ogc/utils.py
+++ b/examples/pytorch/ogc/utils.py
@@ -21,7 +21,7 @@ def symmetric_normalize_adjacency(graph):
     in_degs = graph.in_degrees().float()
     in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1)
     degi = torch.diag(torch.squeeze(torch.t(in_norm)))
-    degi = sp.coo_matrix(degi).tocsr()
+    degi = sp.coo_matrix(degi.cpu()).tocsr()
     adj = sp.csr_matrix((adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape)
     adj = degi.dot(adj.dot(degi))
     return adj

From ef1110aa33aa3e079c4384d2af0355828f4c1d6c Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 19 Oct 2023 12:25:21 +0800
Subject: [PATCH 06/20] Delete ogc.py

---
 ogc.py | 103 ---------------------------------------------------------
 1 file changed, 103 deletions(-)
 delete mode 100644 ogc.py

diff --git a/ogc.py b/ogc.py
deleted file mode 100644
index dff74f8495c4..000000000000
--- a/ogc.py
+++ /dev/null
@@ -1,103 +0,0 @@
-import time
-import argparse
-import scipy.sparse as sp
-
-import torch
-import torch.nn.functional as F
-
-from dgl import AddSelfLoop
-from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
-from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork
-
-
-# Training settings
-decline = 0.9            # the dcline rate
-eta_sup = 0.001          # the learning rate for supervised loss
-eta_W = 0.5              # the learning rate for updating W
-beta = 0.1               # in [0,1], the moving probability that a node moves to its neighbors
-max_similar_tol = 0.995  # the max_tol test set label prediction similarity between two iterations
-max_patience = 2         # the tolreance for consecutively getting very similar test prediction
-
-
-def update_U(U, Y, predY, W):
-    global eta_sup
-    # ------ update the smoothness loss via LGC ------
-    U = torch.spmm(lazy_adj.to(device), U)
-
-    # ------ update the supervised loss via SEB ------
-    dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W)
-    U = U - eta_sup * dU_sup
-
-    eta_sup = eta_sup * decline
-    return U
-
-
-def OGC(linear_clf, U, g):
-    patience = 0
-    _, _, last_acc, last_outp = linear_clf.test(U, g)
-    for i in range(64):
-        # updating W by training a simple linear supervised model Y=W*X
-        predY, W = linear_clf.update_W(U, g, eta_W)
-
-        # updating U by LGC and SEB jointly
-        U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W)
-
-        loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g)
-        print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format(
-               i + 1, loss_tv, acc_tv, acc_test))      
-
-        sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0]))
-        if (sim_rate > max_similar_tol):
-            patience += 1
-            if (patience > max_patience):
-                break
-
-        last_acc = acc_test
-        last_outp = pred
-    return last_acc
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        '--dataset',
-        type=str,
-        default="citeseer",
-        choices=["cora", "citeseer", "pubmed"],
-        help='Dataset to use.')
-    args, _ = parser.parse_known_args()
-
-    # load and preprocess dataset
-    transform = (AddSelfLoop())
-    if args.dataset == "cora":
-        data = CoraGraphDataset(transform=transform)
-    elif args.dataset == "citeseer":
-        data = CiteseerGraphDataset(transform=transform)
-    elif args.dataset == "pubmed":
-        data = PubmedGraphDataset(transform=transform)
-    else:
-        raise ValueError("Unknown dataset: {}".format(args.dataset))
-
-    g = data[0]
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    g = g.int().to(device)
-    features = g.ndata["feat"]
-
-    adj = symmetric_normalize_adjacency(g)
-    I_N = sp.eye(features.shape[0])
-    # lazy random walk (also known as lazy graph convolution)
-    lazy_adj = (1 - beta) * I_N + beta * adj
-    lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj)
-    # LIM track, else use both train and validation set to construct S
-    S = torch.diag(g.ndata["train_mask"]).float().to_sparse()
-    
-    linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1),
-                                     nclass=g.ndata["label"].max().item()+1,
-                                     bias=False).to(device)
-
-    start_time = time.time()
-    res = OGC(linear_clf, features, g)
-    time_tot = time.time() - start_time
-
-    print(f'Test Acc:{res:.4f}')
-    print(f'Total Time:{time_tot:.4f}')
\ No newline at end of file

From 9b0fe44824fbeac9f188d5c9fb4a38d1099edfd0 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Tue, 24 Oct 2023 16:46:30 +0800
Subject: [PATCH 07/20] Update examples/pytorch/ogc/ogc.py

Co-authored-by: Hongzhi (Steve), Chen <chenhongzhi.nkcs@gmail.com>
---
 examples/pytorch/ogc/ogc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py
index 8f5047f366a5..9a534735faae 100644
--- a/examples/pytorch/ogc/ogc.py
+++ b/examples/pytorch/ogc/ogc.py
@@ -15,7 +15,8 @@
 eta_sup = 0.001          # the learning rate for supervised loss
 eta_W = 0.5              # the learning rate for updating W
 beta = 0.1               # in [0,1], the moving probability that a node moves to its neighbors
-max_similar_tol = 0.995  # the max_tol test set label prediction similarity between two iterations
+max_similar_tol = 0.995  # The max_tol test set label prediction similarity
+                         # between two iterations.
 max_patience = 2         # the tolreance for consecutively getting very similar test prediction
 
 

From 436b18b2e7adfe0bdaad20c889242857cf6ebe76 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Wed, 25 Oct 2023 21:29:23 +0800
Subject: [PATCH 08/20] Update README.md

---
 examples/pytorch/ogc/README.md | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/examples/pytorch/ogc/README.md b/examples/pytorch/ogc/README.md
index 713505b3d031..ca6a9c087933 100644
--- a/examples/pytorch/ogc/README.md
+++ b/examples/pytorch/ogc/README.md
@@ -9,6 +9,13 @@ With only one trainable layer, OGC is a very simple but powerful graph convoluti
 This example was implemented by [Sinuo Xu](https://github.com/SinuoXu) when she was an undergraduate at SJTU.
 
 
+## Dependencies
+
+Python     3.11.5
+PyTorch    2.0.1 
+DGL       1.1.2 
+scikit-learn 1.3.1
+
 
 ## Dataset
 
@@ -34,4 +41,4 @@ python main.py --dataset pubmed
 | Dataset | Cora | Citeseer | Pubmed |
 | :-: | :-: | :-: | :-: |
 | OGC (DGL) | **86.9(±0.2)** | **77.4(±0.1)** | **83.6(±0.1)** |
-| OGC (Reported) | **86.9(±0.0)** | **77.4(±0.0)** | 83.4(±0.0) |
\ No newline at end of file
+| OGC (Reported) | **86.9(±0.0)** | **77.4(±0.0)** | 83.4(±0.0) |

From ff0b12e17d17b707c5f0821f8117f183ea9634e0 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Wed, 25 Oct 2023 21:32:41 +0800
Subject: [PATCH 09/20] Update ogc.py

---
 examples/pytorch/ogc/ogc.py | 120 ++++++++++++++++++++----------------
 1 file changed, 66 insertions(+), 54 deletions(-)

diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py
index 9a534735faae..0627896c419d 100644
--- a/examples/pytorch/ogc/ogc.py
+++ b/examples/pytorch/ogc/ogc.py
@@ -1,5 +1,6 @@
-import time
 import argparse
+import time
+
 import scipy.sparse as sp
 
 import torch
@@ -7,69 +8,79 @@
 
 from dgl import AddSelfLoop
 from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
-from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork
+from utils import (
+    LinearNeuralNetwork,
+    sparse_mx_to_torch_sparse_tensor,
+    symmetric_normalize_adjacency,
+)
 
 
-# Training settings
-decline = 0.9            # the dcline rate
-eta_sup = 0.001          # the learning rate for supervised loss
-eta_W = 0.5              # the learning rate for updating W
-beta = 0.1               # in [0,1], the moving probability that a node moves to its neighbors
-max_similar_tol = 0.995  # The max_tol test set label prediction similarity
-                         # between two iterations.
-max_patience = 2         # the tolreance for consecutively getting very similar test prediction
+# Training settings.
+decline = 0.9  # the decline rate
+lr_sup = 0.001  # the learning rate for supervised loss
+lr_clf = 0.5  # the learning rate for the used linear classifier
+beta = 0.1  # the moving probability that a node moves to its neighbors
+max_sim_rate = 0.995  # the max label prediction similarity between iterations
+max_patience = 2  # the tolerance for consecutively similar test predictions
 
 
-def update_U(U, Y, predY, W):
-    global eta_sup
-    # ------ update the smoothness loss via LGC ------
-    U = torch.spmm(lazy_adj.to(device), U)
+def update_embeds(embeds, graph, label_idx_mat):
+    global lr_sup
+    # Update classifier's weight by training a linear supervised model.
+    pred_labels, clf_weight = linear_clf.update_weight(embeds, graph, lr_clf)
+    labels = F.one_hot(graph.ndata["label"]).float()
 
-    # ------ update the supervised loss via SEB ------
-    dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W)
-    U = U - eta_sup * dU_sup
+    # Update the smoothness loss via LGC.
+    embeds = torch.spmm(lazy_adj.to(device), embeds)
 
-    eta_sup = eta_sup * decline
-    return U
+    # Update the supervised loss via SEB.
+    deriv_sup = 2 * torch.mm(
+        torch.sparse.mm(label_idx_mat, -labels + pred_labels), clf_weight
+    )
+    embeds = embeds - lr_sup * deriv_sup
 
+    lr_sup = lr_sup * decline
+    return embeds
 
-def OGC(linear_clf, U, g):
+
+def OGC(linear_clf, embeds, graph, label_idx_mat):
     patience = 0
-    _, _, last_acc, last_outp = linear_clf.test(U, g)
+    _, _, last_acc, last_output = linear_clf.test(embeds, graph)
     for i in range(64):
-        # updating W by training a simple linear supervised model Y=W*X
-        predY, W = linear_clf.update_W(U, g, eta_W)
-
-        # updating U by LGC and SEB jointly
-        U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W)
-
-        loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g)
-        print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format(
-               i + 1, loss_tv, acc_tv, acc_test))      
-
-        sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0]))
-        if (sim_rate > max_similar_tol):
+        # Updating node embeds by LGC and SEB jointly.
+        embeds = update_embeds(embeds, graph, label_idx_mat)
+
+        loss_tv, acc_tv, acc_test, pred = linear_clf.test(embeds, graph)
+        print(
+            "epoch {} loss_tv {:.4f} acc_tv {:.4f} acc_test {:.4f}".format(
+                i + 1, loss_tv, acc_tv, acc_test
+            )
+        )
+
+        sim_rate = float(int((pred == last_output).sum()) / int(pred.shape[0]))
+        if sim_rate > max_sim_rate:
             patience += 1
-            if (patience > max_patience):
+            if patience > max_patience:
                 break
 
         last_acc = acc_test
-        last_outp = pred
+        last_output = pred
     return last_acc
 
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument(
-        '--dataset',
+        "--dataset",
         type=str,
         default="citeseer",
         choices=["cora", "citeseer", "pubmed"],
-        help='Dataset to use.')
+        help="Dataset to use.",
+    )
     args, _ = parser.parse_known_args()
 
-    # load and preprocess dataset
-    transform = (AddSelfLoop())
+    # Load and preprocess dataset.
+    transform = AddSelfLoop()
     if args.dataset == "cora":
         data = CoraGraphDataset(transform=transform)
     elif args.dataset == "citeseer":
@@ -78,27 +89,28 @@ def OGC(linear_clf, U, g):
         data = PubmedGraphDataset(transform=transform)
     else:
         raise ValueError("Unknown dataset: {}".format(args.dataset))
-
-    g = data[0]
+    graph = data[0]
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    g = g.int().to(device)
-    features = g.ndata["feat"]
+    graph = graph.int().to(device)
+    features = graph.ndata["feat"]
 
-    adj = symmetric_normalize_adjacency(g)
+    adj = symmetric_normalize_adjacency(graph)
     I_N = sp.eye(features.shape[0])
-    # lazy random walk (also known as lazy graph convolution)
+    # Lazy random walk (also known as lazy graph convolution).
     lazy_adj = (1 - beta) * I_N + beta * adj
     lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj)
-    # LIM track, else use both train and validation set to construct S
-    S = torch.diag(g.ndata["train_mask"]).float().to_sparse()
-    
-    linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1),
-                                     nclass=g.ndata["label"].max().item()+1,
-                                     bias=False).to(device)
+    # LIM track, else use both train and val set to construct this matrix.
+    label_idx_mat = torch.diag(graph.ndata["train_mask"]).float().to_sparse()
+
+    linear_clf = LinearNeuralNetwork(
+        nfeat=graph.ndata["feat"].size(1),
+        nclass=graph.ndata["label"].max().item() + 1,
+        bias=False,
+    ).to(device)
 
     start_time = time.time()
-    res = OGC(linear_clf, features, g)
+    res = OGC(linear_clf, features, graph, label_idx_mat)
     time_tot = time.time() - start_time
 
-    print(f'Test Acc:{res:.4f}')
-    print(f'Total Time:{time_tot:.4f}')
+    print(f"Test Acc:{res:.4f}")
+    print(f"Total Time:{time_tot:.4f}")

From 9801cc8cec020a3462572e81fed0128c4d5440a2 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Wed, 25 Oct 2023 21:34:16 +0800
Subject: [PATCH 10/20] Update utils.py

---
 examples/pytorch/ogc/utils.py | 36 ++++++++++++++++++++---------------
 1 file changed, 21 insertions(+), 15 deletions(-)

diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py
index 8ae4b4a3d5c5..5e7ef2b13dd6 100644
--- a/examples/pytorch/ogc/utils.py
+++ b/examples/pytorch/ogc/utils.py
@@ -5,24 +5,28 @@
 import torch.nn.functional as F
 import torch.optim as optim
 
+
 def sparse_mx_to_torch_sparse_tensor(sparse_mx):
     """Convert a scipy sparse matrix to a torch sparse tensor."""
     sparse_mx = sparse_mx.tocoo().astype(np.float32)
     indices = torch.from_numpy(
-        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
+        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
+    )
     values = torch.from_numpy(sparse_mx.data)
     shape = torch.Size(sparse_mx.shape)
     return torch.sparse.FloatTensor(indices, values, shape)
 
 
 def symmetric_normalize_adjacency(graph):
-    """ Symmetric normalize graph adjacency matrix. """
+    """Symmetric normalize graph adjacency matrix."""
     adj = graph.adjacency_matrix()
     in_degs = graph.in_degrees().float()
     in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1)
     degi = torch.diag(torch.squeeze(torch.t(in_norm)))
     degi = sp.coo_matrix(degi.cpu()).tocsr()
-    adj = sp.csr_matrix((adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape)
+    adj = sp.csr_matrix(
+        (adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape
+    )
     adj = degi.dot(adj.dot(degi))
     return adj
 
@@ -35,32 +39,34 @@ def __init__(self, nfeat, nclass, bias=True):
     def forward(self, x):
         return self.W(x)
 
-    def test(self, U, g):
+    def test(self, embeds, g):
         self.eval()
         with torch.no_grad():
-            output = self(U)
+            output = self(embeds)
             pred = output.argmax(dim=-1)
             labels = g.ndata["label"]
             test_mask = g.ndata["test_mask"]
             tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"]
-            loss_tv = F.mse_loss(output[tv_mask],
-                                 F.one_hot(labels).float()[tv_mask])
+            loss_tv = F.mse_loss(
+                output[tv_mask], F.one_hot(labels).float()[tv_mask]
+            )
             accs = []
             for mask in [tv_mask, test_mask]:
                 accs.append(
-                    float((pred[mask] == labels[mask]).sum()/mask.sum()))
+                    float((pred[mask] == labels[mask]).sum() / mask.sum())
+                )
         return loss_tv.item(), accs[0], accs[1], pred
 
-    def update_W(self, U, g, eta_W):
-        optimizer = optim.SGD(self.parameters(), lr=eta_W)
+    def update_weight(self, embeds, g, lr_clf):
+        optimizer = optim.SGD(self.parameters(), lr=lr_clf)
         self.train()
         optimizer.zero_grad()
-        output = self(U)
+        output = self(embeds)
         labels = g.ndata["label"]
         tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"]
-        loss_tv = F.mse_loss(output[tv_mask],
-                             F.one_hot(labels).float()[tv_mask],
-                             reduction='sum')
+        loss_tv = F.mse_loss(
+            output[tv_mask], F.one_hot(labels).float()[tv_mask], reduction="sum"
+        )
         loss_tv.backward()
         optimizer.step()
-        return self(U).data, self.W.weight.data
+        return self(embeds).data, self.W.weight.data

From 74569232f13fc5286b8fe82fc05260955600fac5 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Mon, 13 Nov 2023 18:58:32 +0800
Subject: [PATCH 11/20] Update ogc.py

---
 examples/pytorch/ogc/ogc.py | 136 +++++++++---------------------------
 1 file changed, 32 insertions(+), 104 deletions(-)

diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py
index 0627896c419d..0af54ba0ce6c 100644
--- a/examples/pytorch/ogc/ogc.py
+++ b/examples/pytorch/ogc/ogc.py
@@ -1,116 +1,44 @@
-import argparse
-import time
-
-import scipy.sparse as sp
-
-import torch
+import dgl.sparse as dglsp
+import torch.nn as nn
 import torch.nn.functional as F
 
-from dgl import AddSelfLoop
-from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
-from utils import (
-    LinearNeuralNetwork,
-    sparse_mx_to_torch_sparse_tensor,
-    symmetric_normalize_adjacency,
-)
-
-
-# Training settings.
-decline = 0.9  # the decline rate
-lr_sup = 0.001  # the learning rate for supervised loss
-lr_clf = 0.5  # the learning rate for the used linear classifier
-beta = 0.1  # the moving probability that a node moves to its neighbors
-max_sim_rate = 0.995  # the max label prediction similarity between iterations
-max_patience = 2  # the tolerance for consecutively similar test predictions
-
-
-def update_embeds(embeds, graph, label_idx_mat):
-    global lr_sup
-    # Update classifier's weight by training a linear supervised model.
-    pred_labels, clf_weight = linear_clf.update_weight(embeds, graph, lr_clf)
-    labels = F.one_hot(graph.ndata["label"]).float()
-
-    # Update the smoothness loss via LGC.
-    embeds = torch.spmm(lazy_adj.to(device), embeds)
+from utils import LinearNeuralNetwork
 
-    # Update the supervised loss via SEB.
-    deriv_sup = 2 * torch.mm(
-        torch.sparse.mm(label_idx_mat, -labels + pred_labels), clf_weight
-    )
-    embeds = embeds - lr_sup * deriv_sup
 
-    lr_sup = lr_sup * decline
-    return embeds
-
-
-def OGC(linear_clf, embeds, graph, label_idx_mat):
-    patience = 0
-    _, _, last_acc, last_output = linear_clf.test(embeds, graph)
-    for i in range(64):
-        # Updating node embeds by LGC and SEB jointly.
-        embeds = update_embeds(embeds, graph, label_idx_mat)
-
-        loss_tv, acc_tv, acc_test, pred = linear_clf.test(embeds, graph)
-        print(
-            "epoch {} loss_tv {:.4f} acc_tv {:.4f} acc_test {:.4f}".format(
-                i + 1, loss_tv, acc_tv, acc_test
-            )
+class OGC(nn.Module):
+    def __init__(self, graph):
+        super(OGC, self).__init__()
+        self.linear_clf = LinearNeuralNetwork(
+            nfeat=graph.ndata["feat"].shape[1],
+            nclass=graph.ndata["label"].max().item() + 1,
+            bias=False,
         )
 
-        sim_rate = float(int((pred == last_output).sum()) / int(pred.shape[0]))
-        if sim_rate > max_sim_rate:
-            patience += 1
-            if patience > max_patience:
-                break
+        self.label = graph.ndata["label"]
+        self.label_one_hot = F.one_hot(graph.ndata["label"]).float()
+        # LIM trick, else use both train and val set to construct this matrix.
+        self.label_idx_mat = dglsp.diag(graph.ndata["train_mask"]).float()
 
-        last_acc = acc_test
-        last_output = pred
-    return last_acc
+        self.test_mask = graph.ndata["test_mask"]
+        self.tv_mask = graph.ndata["train_mask"] + graph.ndata["val_mask"]
 
+    def forward(self, x):
+        return self.linear_clf(x)
 
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--dataset",
-        type=str,
-        default="citeseer",
-        choices=["cora", "citeseer", "pubmed"],
-        help="Dataset to use.",
-    )
-    args, _ = parser.parse_known_args()
+    def update_embeds(self, embeds, lazy_adj, args):
+        """Update classifier's weight by training a linear supervised model."""
+        pred_label = self(embeds).data
+        clf_weight = self.linear_clf.W.weight.data
 
-    # Load and preprocess dataset.
-    transform = AddSelfLoop()
-    if args.dataset == "cora":
-        data = CoraGraphDataset(transform=transform)
-    elif args.dataset == "citeseer":
-        data = CiteseerGraphDataset(transform=transform)
-    elif args.dataset == "pubmed":
-        data = PubmedGraphDataset(transform=transform)
-    else:
-        raise ValueError("Unknown dataset: {}".format(args.dataset))
-    graph = data[0]
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    graph = graph.int().to(device)
-    features = graph.ndata["feat"]
+        # Update the smoothness loss via LGC.
+        embeds = dglsp.spmm(lazy_adj, embeds)
 
-    adj = symmetric_normalize_adjacency(graph)
-    I_N = sp.eye(features.shape[0])
-    # Lazy random walk (also known as lazy graph convolution).
-    lazy_adj = (1 - beta) * I_N + beta * adj
-    lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj)
-    # LIM track, else use both train and val set to construct this matrix.
-    label_idx_mat = torch.diag(graph.ndata["train_mask"]).float().to_sparse()
-
-    linear_clf = LinearNeuralNetwork(
-        nfeat=graph.ndata["feat"].size(1),
-        nclass=graph.ndata["label"].max().item() + 1,
-        bias=False,
-    ).to(device)
-
-    start_time = time.time()
-    res = OGC(linear_clf, features, graph, label_idx_mat)
-    time_tot = time.time() - start_time
+        # Update the supervised loss via SEB.
+        deriv_sup = 2 * dglsp.matmul(
+            dglsp.spmm(self.label_idx_mat, -self.label_one_hot + pred_label),
+            clf_weight,
+        )
+        embeds = embeds - args.lr_sup * deriv_sup
 
-    print(f"Test Acc:{res:.4f}")
-    print(f"Total Time:{time_tot:.4f}")
+        args.lr_sup = args.lr_sup * args.decline
+        return embeds

From b9759f628452c9e0d5aaf3c7385733fc83c7f832 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Mon, 13 Nov 2023 19:00:04 +0800
Subject: [PATCH 12/20] Update utils.py

---
 examples/pytorch/ogc/utils.py | 79 ++++++++++-------------------------
 1 file changed, 21 insertions(+), 58 deletions(-)

diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py
index 5e7ef2b13dd6..95b61b6b07bf 100644
--- a/examples/pytorch/ogc/utils.py
+++ b/examples/pytorch/ogc/utils.py
@@ -1,34 +1,7 @@
-import numpy as np
-import scipy.sparse as sp
+import dgl.sparse as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import torch.optim as optim
-
-
-def sparse_mx_to_torch_sparse_tensor(sparse_mx):
-    """Convert a scipy sparse matrix to a torch sparse tensor."""
-    sparse_mx = sparse_mx.tocoo().astype(np.float32)
-    indices = torch.from_numpy(
-        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)
-    )
-    values = torch.from_numpy(sparse_mx.data)
-    shape = torch.Size(sparse_mx.shape)
-    return torch.sparse.FloatTensor(indices, values, shape)
-
-
-def symmetric_normalize_adjacency(graph):
-    """Symmetric normalize graph adjacency matrix."""
-    adj = graph.adjacency_matrix()
-    in_degs = graph.in_degrees().float()
-    in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1)
-    degi = torch.diag(torch.squeeze(torch.t(in_norm)))
-    degi = sp.coo_matrix(degi.cpu()).tocsr()
-    adj = sp.csr_matrix(
-        (adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape
-    )
-    adj = degi.dot(adj.dot(degi))
-    return adj
 
 
 class LinearNeuralNetwork(nn.Module):
@@ -39,34 +12,24 @@ def __init__(self, nfeat, nclass, bias=True):
     def forward(self, x):
         return self.W(x)
 
-    def test(self, embeds, g):
-        self.eval()
-        with torch.no_grad():
-            output = self(embeds)
-            pred = output.argmax(dim=-1)
-            labels = g.ndata["label"]
-            test_mask = g.ndata["test_mask"]
-            tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"]
-            loss_tv = F.mse_loss(
-                output[tv_mask], F.one_hot(labels).float()[tv_mask]
-            )
-            accs = []
-            for mask in [tv_mask, test_mask]:
-                accs.append(
-                    float((pred[mask] == labels[mask]).sum() / mask.sum())
-                )
-        return loss_tv.item(), accs[0], accs[1], pred
 
-    def update_weight(self, embeds, g, lr_clf):
-        optimizer = optim.SGD(self.parameters(), lr=lr_clf)
-        self.train()
-        optimizer.zero_grad()
-        output = self(embeds)
-        labels = g.ndata["label"]
-        tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"]
-        loss_tv = F.mse_loss(
-            output[tv_mask], F.one_hot(labels).float()[tv_mask], reduction="sum"
-        )
-        loss_tv.backward()
-        optimizer.step()
-        return self(embeds).data, self.W.weight.data
+def symmetric_normalize_adjacency(graph):
+    """Symmetric normalize graph adjacency matrix."""
+    indices = torch.stack(graph.edges())
+    n = graph.num_nodes()
+    adj = dglsp.spmatrix(indices, shape=(n, n))
+    deg_invsqrt = dglsp.diag(adj.sum(0)) ** -0.5
+    return deg_invsqrt @ adj @ deg_invsqrt
+
+
+def model_test(model, embeds):
+    model.eval()
+    with torch.no_grad():
+        output = model(embeds)
+        pred = output.argmax(dim=-1)
+        test_mask, tv_mask = model.test_mask, model.tv_mask
+        loss_tv = F.mse_loss(output[tv_mask], model.label_one_hot[tv_mask])
+    accs = []
+    for mask in [tv_mask, test_mask]:
+        accs.append(float((pred[mask] == model.label[mask]).sum() / mask.sum()))
+    return loss_tv.item(), accs[0], accs[1], pred

From 7330ca3cd36aac9a2679819abefae641549211c9 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Mon, 13 Nov 2023 19:17:55 +0800
Subject: [PATCH 13/20] Create train.py

---
 examples/pytorch/ogc/train.py | 126 ++++++++++++++++++++++++++++++++++
 1 file changed, 126 insertions(+)
 create mode 100644 examples/pytorch/ogc/train.py

diff --git a/examples/pytorch/ogc/train.py b/examples/pytorch/ogc/train.py
new file mode 100644
index 000000000000..d78c63ae4ddb
--- /dev/null
+++ b/examples/pytorch/ogc/train.py
@@ -0,0 +1,126 @@
+import argparse
+import time
+
+import dgl.sparse as dglsp
+
+import torch.nn.functional as F
+import torch.optim as optim
+from dgl import AddSelfLoop
+from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
+
+from ogc import OGC
+from utils import model_test, symmetric_normalize_adjacency
+
+
+def train(model, embeds, lazy_adj, args):
+    patience = 0
+    _, _, last_acc, last_output = model_test(model, embeds)
+
+    tv_mask = model.tv_mask
+    optimizer = optim.SGD(model.parameters(), lr=args.lr_clf)
+
+    for i in range(64):
+        model.train()
+        output = model(embeds)
+        loss_tv = F.mse_loss(
+            output[tv_mask], model.label_one_hot[tv_mask], reduction="sum"
+        )
+        optimizer.zero_grad()
+        loss_tv.backward()
+        optimizer.step()
+
+        # Updating node embeds by LGC and SEB jointly.
+        embeds = model.update_embeds(embeds, lazy_adj, args)
+
+        loss_tv, acc_tv, acc_test, pred = model_test(model, embeds)
+        print(
+            "epoch {} loss_tv {:.4f} acc_tv {:.4f} acc_test {:.4f}".format(
+                i + 1, loss_tv, acc_tv, acc_test
+            )
+        )
+
+        sim_rate = float(int((pred == last_output).sum()) / int(pred.shape[0]))
+        if sim_rate > args.max_sim_rate:
+            patience += 1
+            if patience > args.max_patience:
+                break
+        last_acc = acc_test
+        last_output = pred
+    return last_acc
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="citeseer",
+        choices=["cora", "citeseer", "pubmed"],
+        help="dataset to use",
+    )
+    parser.add_argument(
+        "--decline", type=float, default=0.9, help="decline rate"
+    )
+    parser.add_argument(
+        "--lr_sup",
+        type=float,
+        default=0.001,
+        help="learning rate for supervised loss",
+    )
+    parser.add_argument(
+        "--lr_clf",
+        type=float,
+        default=0.5,
+        help="learning rate for the used linear classifier",
+    )
+    parser.add_argument(
+        "--beta",
+        type=float,
+        default=0.1,
+        help="moving probability that a node moves to its neighbors",
+    )
+    parser.add_argument(
+        "--max_sim_rate",
+        type=float,
+        default=0.995,
+        help="max label prediction similarity between iterations",
+    )
+    parser.add_argument(
+        "--max_patience",
+        type=int,
+        default=2,
+        help="tolerance for consecutively similar test predictions",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        choices=["cpu", "cuda"],
+        help="device to use",
+    )
+    args, _ = parser.parse_known_args()
+
+    # Load and preprocess dataset.
+    transform = AddSelfLoop()
+    if args.dataset == "cora":
+        data = CoraGraphDataset(transform=transform)
+    elif args.dataset == "citeseer":
+        data = CiteseerGraphDataset(transform=transform)
+    elif args.dataset == "pubmed":
+        data = PubmedGraphDataset(transform=transform)
+    else:
+        raise ValueError("Unknown dataset: {}".format(args.dataset))
+    graph = data[0].to(args.device)
+    features = graph.ndata["feat"]
+    adj = symmetric_normalize_adjacency(graph)
+    I_N = dglsp.identity((features.shape[0], features.shape[0]))
+    # Lazy random walk (also known as lazy graph convolution).
+    lazy_adj = dglsp.add((1 - args.beta) * I_N, args.beta * adj).to(args.device)
+
+    model = OGC(graph).to(args.device)
+    start_time = time.time()
+    res = train(model, features, lazy_adj, args)
+    time_tot = time.time() - start_time
+
+    print(f"Test Acc:{res:.4f}")
+    print(f"Total Time:{time_tot:.4f}")

From 7dbee82c1f8695f35767ae96a9e63c6234f9278e Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 23 Nov 2023 22:01:04 +0800
Subject: [PATCH 14/20] Create train.py

---
 examples/pytorch/ggcm/train.py | 88 ++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 examples/pytorch/ggcm/train.py

diff --git a/examples/pytorch/ggcm/train.py b/examples/pytorch/ggcm/train.py
new file mode 100644
index 000000000000..5cebb314c7c6
--- /dev/null
+++ b/examples/pytorch/ggcm/train.py
@@ -0,0 +1,88 @@
+import argparse
+import time
+import copy
+
+import torch.nn.functional as F
+import torch.optim as optim
+
+from dgl import AddSelfLoop
+from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
+
+from ggcm import GGCM
+from utils import model_test, symmetric_normalize_adjacency
+
+
+def train(model, embedds, args):
+    # Evaluate embedding by classification with the given split setting
+    best_acc = -1
+    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
+
+    for i in range(args.epochs):
+        model.train()
+        output = model(embedds)
+        loss = F.cross_entropy(output[model.train_mask], model.label[model.train_mask])
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        loss_val, acc_val, acc_test = model_test(model, embedds)
+        if acc_val > best_acc:
+            best_acc, best_model = acc_val, copy.deepcopy(model)
+
+        print(f'{i+1} {loss_val:.4f} {acc_val:.3f} acc_test={acc_test:.3f}')
+
+    loss_val, acc_val, acc_test = model_test(best_model, embedds)
+    return acc_test
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        '--dataset',
+        type=str,
+        default="citeseer",
+        help='Dataset to use.',
+    )
+    parser.add_argument('--epochs', type=int, default=100)
+    parser.add_argument('--lr', type=float, default=0.2)
+    parser.add_argument('--degree', type=int, default=16)
+    parser.add_argument('--decline', type=float, default=1)
+    parser.add_argument('--negative_rate', type=float, default=20.0)
+    parser.add_argument('--wd', type=float, nargs='*', default=1e-3)
+    parser.add_argument('--alpha', type=float, default=0.12)
+    parser.add_argument('--decline_neg', type=float, default=1.0)
+    parser.add_argument(
+        '--device',
+        type=str,
+        default='cpu',
+        choices=['cpu', 'cuda'],
+        help='device to use',
+    )
+    args, _ = parser.parse_known_args()
+
+    transform = (AddSelfLoop())
+    if args.dataset == "cora":
+        num_edges = CoraGraphDataset()[0].num_edges()
+        data = CoraGraphDataset(transform=transform)
+    elif args.dataset == "citeseer":
+        num_edges = CiteseerGraphDataset()[0].num_edges()
+        data = CiteseerGraphDataset(transform=transform)
+    elif args.dataset == "pubmed":
+        num_edges = PubmedGraphDataset()[0].num_edges()
+        data = PubmedGraphDataset(transform=transform)
+    else:
+        raise ValueError("Unknown dataset: {}".format(args.dataset))
+    
+    graph = data[0]
+    graph = graph.to(args.device)
+    features = graph.ndata["feat"]
+    adj = symmetric_normalize_adjacency(graph)
+
+    avg_edge_num = int(args.negative_rate * num_edges / features.shape[0])
+    avg_edge_num = ((avg_edge_num + 1) // 2) * 2
+
+    model = GGCM(graph, args).to(args.device)
+    start_time = time.time()
+    embedds = GGCM.update_embedds(features, adj, avg_edge_num, args)
+    test_acc = train(model, embedds, args)
+    print(f'Final test acc: {test_acc:.4f}')
+    print(f'Total Time: {time.time() - start_time:.4f}')

From a833478f3408df21a4e8df1be640a78dfdd0c937 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 23 Nov 2023 22:01:34 +0800
Subject: [PATCH 15/20] Create ggcm.py

---
 examples/pytorch/ggcm/ggcm.py | 50 +++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 examples/pytorch/ggcm/ggcm.py

diff --git a/examples/pytorch/ggcm/ggcm.py b/examples/pytorch/ggcm/ggcm.py
new file mode 100644
index 000000000000..56d7f8cbb918
--- /dev/null
+++ b/examples/pytorch/ggcm/ggcm.py
@@ -0,0 +1,50 @@
+import dgl.sparse as dglsp
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from utils import LinearNeuralNetwork, lazy_random_walk, inverse_graph_convolution
+
+
+class GGCM(nn.Module):
+    def __init__(self, graph, args):
+        super(GGCM, self).__init__()
+        self.linear_nn = LinearNeuralNetwork(nfeat=graph.ndata["feat"].shape[1],
+                               nclass=F.one_hot(graph.ndata["label"]).shape[1],
+                               bias=True).to(args.device)
+        
+        self.label = graph.ndata["label"]
+        self.test_mask = graph.ndata["test_mask"]
+        self.train_mask = graph.ndata["train_mask"]
+        self.val_mask = graph.ndata["val_mask"]
+
+    def forward(self, x):
+        return self.linear_nn(x)
+    
+    def update_embedds(features, A_hat, avg_edge_num, args):
+        beta = 1.0
+        beta_neg = 1.0
+        K = args.degree
+        X = features.clone()
+        temp_sum = torch.zeros_like(features)
+        I_N = dglsp.identity((features.shape[0], features.shape[0]))
+
+        for _ in range(K):
+            # lazy graph convolution (LGC)
+            lazy_A = lazy_random_walk(A_hat, beta, I_N).to(args.device)
+
+            # inverse graph convlution (IGC), lazy version
+            neg_A_hat = inverse_graph_convolution(
+                avg_edge_num, features.shape[0], args.device).to(args.device)
+            inv_lazy_A = lazy_random_walk(neg_A_hat, beta_neg, I_N).to(args.device)
+            inv_features = dglsp.spmm(inv_lazy_A, features)
+            features = dglsp.spmm(lazy_A, features)
+
+            # add for multi-scale version
+            temp_sum += (features + inv_features) / 2.0
+            beta *= args.decline
+            beta_neg *= args.decline_neg
+
+        embedds = args.alpha * X + (1 - args.alpha) * (temp_sum / (K * 1.0))
+        return embedds

From cee45b63f59533c0e700159d3419611af447e210 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 23 Nov 2023 22:02:23 +0800
Subject: [PATCH 16/20] Create utils.py

---
 examples/pytorch/ggcm/utils.py | 51 ++++++++++++++++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 examples/pytorch/ggcm/utils.py

diff --git a/examples/pytorch/ggcm/utils.py b/examples/pytorch/ggcm/utils.py
new file mode 100644
index 000000000000..6f9c31146896
--- /dev/null
+++ b/examples/pytorch/ggcm/utils.py
@@ -0,0 +1,51 @@
+import dgl.sparse as dglsp
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import networkx as nx
+
+
+class LinearNeuralNetwork(nn.Module):
+    def __init__(self, nfeat, nclass, bias=True):
+        super(LinearNeuralNetwork, self).__init__()
+        self.W = nn.Linear(nfeat, nclass, bias=bias)
+
+    def forward(self, x):
+        return self.W(x)
+
+
+def symmetric_normalize_adjacency(graph):
+    """Symmetric normalize graph adjacency matrix."""
+    indices = torch.stack(graph.edges())
+    n = graph.num_nodes()
+    adj = dglsp.spmatrix(indices, shape=(n, n))
+    deg_invsqrt = dglsp.diag(adj.sum(0)) ** -0.5
+    return deg_invsqrt @ adj @ deg_invsqrt
+
+
+def model_test(model, embedds):
+    model.eval()
+    with torch.no_grad():
+        output = model(embedds)
+        pred = output.argmax(dim=-1)
+        test_mask, val_mask = model.test_mask, model.val_mask
+        loss = F.cross_entropy(output[val_mask], model.label[val_mask])
+    accs = []
+    for mask in [val_mask, test_mask]:
+        accs.append(float((pred[mask] == model.label[mask]).sum()/mask.sum()))
+    return loss.item(), accs[0], accs[1]
+
+
+def inverse_graph_convolution(k, n, device):
+    adj = nx.adjacency_matrix(nx.random_regular_graph(k, n)).tocoo()
+    indices = torch.tensor([adj.row.tolist(), adj.col.tolist()])
+    values = torch.tensor(adj.data.tolist())
+    adj_sym_nor = dglsp.spmatrix(indices, values, adj.shape).coalesce().to(device)
+    I_N = dglsp.identity((n, n)).to(dtype=torch.int64)
+    # re-normalization trick
+    adj_sym_nor = dglsp.sub(2 * I_N, adj_sym_nor) / (k + 2)
+    return adj_sym_nor
+
+
+def lazy_random_walk(adj, beta, I_N):
+    return dglsp.add((1 - beta) * I_N, beta * adj)

From 715b5d2e6393e7c2314bdc47c6b17a97ca1ff3c5 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 4 Jan 2024 22:39:57 +0800
Subject: [PATCH 17/20] Update ggcm.py

---
 examples/pytorch/ggcm/ggcm.py | 61 +++++++++++++++++++----------------
 1 file changed, 33 insertions(+), 28 deletions(-)

diff --git a/examples/pytorch/ggcm/ggcm.py b/examples/pytorch/ggcm/ggcm.py
index 56d7f8cbb918..f02107fdb3cd 100644
--- a/examples/pytorch/ggcm/ggcm.py
+++ b/examples/pytorch/ggcm/ggcm.py
@@ -2,49 +2,54 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 
-from utils import LinearNeuralNetwork, lazy_random_walk, inverse_graph_convolution
+from utils import (
+    inverse_graph_convolution,
+    lazy_random_walk,
+    symmetric_normalize_adjacency,
+)
 
 
 class GGCM(nn.Module):
-    def __init__(self, graph, args):
+    def __init__(self):
         super(GGCM, self).__init__()
-        self.linear_nn = LinearNeuralNetwork(nfeat=graph.ndata["feat"].shape[1],
-                               nclass=F.one_hot(graph.ndata["label"]).shape[1],
-                               bias=True).to(args.device)
-        
-        self.label = graph.ndata["label"]
-        self.test_mask = graph.ndata["test_mask"]
-        self.train_mask = graph.ndata["train_mask"]
-        self.val_mask = graph.ndata["val_mask"]
-
-    def forward(self, x):
-        return self.linear_nn(x)
-    
-    def update_embedds(features, A_hat, avg_edge_num, args):
+
+    def get_embedding(self, graph, args):
+        # get the learned node embeddings
         beta = 1.0
         beta_neg = 1.0
-        K = args.degree
-        X = features.clone()
+        layer_num, alpha = args.layer_num, args.alpha
+        device = args.device
+        features = graph.ndata["feat"]
+        orig_feats = features.clone()
         temp_sum = torch.zeros_like(features)
-        I_N = dglsp.identity((features.shape[0], features.shape[0]))
 
-        for _ in range(K):
-            # lazy graph convolution (LGC)
-            lazy_A = lazy_random_walk(A_hat, beta, I_N).to(args.device)
+        node_num = features.shape[0]
+        I_N = dglsp.identity((node_num, node_num))
+        A_hat = symmetric_normalize_adjacency(graph)
+
+        # the inverser random adj
+        edge_num = int(args.negative_rate * graph.num_edges() / node_num)
+        # need n*k odd, for networkx
+        edge_num = ((edge_num + 1) // 2) * 2
 
+        for _ in range(layer_num):
             # inverse graph convlution (IGC), lazy version
-            neg_A_hat = inverse_graph_convolution(
-                avg_edge_num, features.shape[0], args.device).to(args.device)
-            inv_lazy_A = lazy_random_walk(neg_A_hat, beta_neg, I_N).to(args.device)
+            neg_A_hat = inverse_graph_convolution(edge_num, node_num, I_N).to(
+                device
+            )
+            inv_lazy_A = lazy_random_walk(neg_A_hat, beta_neg, I_N).to(device)
             inv_features = dglsp.spmm(inv_lazy_A, features)
+
+            # lazy graph convolution (LGC)
+            lazy_A = lazy_random_walk(A_hat, beta, I_N).to(device)
             features = dglsp.spmm(lazy_A, features)
 
             # add for multi-scale version
             temp_sum += (features + inv_features) / 2.0
             beta *= args.decline
             beta_neg *= args.decline_neg
-
-        embedds = args.alpha * X + (1 - args.alpha) * (temp_sum / (K * 1.0))
-        return embedds
+        embeds = alpha * orig_feats + (1 - alpha) * (
+            temp_sum / (layer_num * 1.0)
+        )
+        return embeds

From 6edb1d2cc106d431aa5e6e75c7c797decfd29e9f Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 4 Jan 2024 22:49:36 +0800
Subject: [PATCH 18/20] Update train.py

---
 examples/pytorch/ggcm/train.py | 148 ++++++++++++++++++++-------------
 1 file changed, 92 insertions(+), 56 deletions(-)

diff --git a/examples/pytorch/ggcm/train.py b/examples/pytorch/ggcm/train.py
index 5cebb314c7c6..0f8c87fae3f4 100644
--- a/examples/pytorch/ggcm/train.py
+++ b/examples/pytorch/ggcm/train.py
@@ -1,7 +1,7 @@
 import argparse
-import time
 import copy
 
+import torch
 import torch.nn.functional as F
 import torch.optim as optim
 
@@ -9,80 +9,116 @@
 from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset
 
 from ggcm import GGCM
-from utils import model_test, symmetric_normalize_adjacency
+from utils import Classifier
 
 
-def train(model, embedds, args):
-    # Evaluate embedding by classification with the given split setting
-    best_acc = -1
-    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
+def evaluate(model, embeds, graph):
+    model.eval()
+    with torch.no_grad():
+        output = model(embeds)
+        pred = output.argmax(dim=-1)
+        label = graph.ndata["label"]
+        val_mask, test_mask = graph.ndata["val_mask"], graph.ndata["test_mask"]
+        loss = F.cross_entropy(output[val_mask], label[val_mask])
+    accs = []
+    for mask in [val_mask, test_mask]:
+        accs.append(float((pred[mask] == label[mask]).sum() / mask.sum()))
+    return loss.item(), accs[0], accs[1]
+
+
+def main(args):
+    # prepare data
+    transform = AddSelfLoop()
+    if args.dataset == "cora":
+        data = CoraGraphDataset(transform=transform)
+    elif args.dataset == "citeseer":
+        data = CiteseerGraphDataset(transform=transform)
+    elif args.dataset == "pubmed":
+        data = PubmedGraphDataset(transform=transform)
+    else:
+        raise ValueError("Unknown dataset: {}".format(args.dataset))
 
+    graph = data[0].to(args.device)
+    features = graph.ndata["feat"]
+    train_mask = graph.ndata["train_mask"]
+    in_feats = features.shape[1]
+    n_classes = data.num_classes
+
+    # get node embedding
+    ggcm = GGCM()
+    embeds = ggcm.get_embedding(graph, args)
+
+    # create classifier model
+    classifier = Classifier(in_feats, n_classes)
+    optimizer = optim.Adam(
+        classifier.parameters(), lr=args.lr, weight_decay=args.wd
+    )
+
+    # train classifier
+    best_acc = -1
     for i in range(args.epochs):
-        model.train()
-        output = model(embedds)
-        loss = F.cross_entropy(output[model.train_mask], model.label[model.train_mask])
+        classifier.train()
+        output = classifier(embeds)
+        loss = F.cross_entropy(
+            output[train_mask], graph.ndata["label"][train_mask]
+        )
         optimizer.zero_grad()
         loss.backward()
         optimizer.step()
 
-        loss_val, acc_val, acc_test = model_test(model, embedds)
+        loss_val, acc_val, acc_test = evaluate(classifier, embeds, graph)
         if acc_val > best_acc:
-            best_acc, best_model = acc_val, copy.deepcopy(model)
+            best_acc, best_model = acc_val, copy.deepcopy(classifier)
+
+        print(f"{i+1} {loss_val:.4f} {acc_val:.3f} acc_test={acc_test:.3f}")
 
-        print(f'{i+1} {loss_val:.4f} {acc_val:.3f} acc_test={acc_test:.3f}')
+    _, _, acc_test = evaluate(best_model, embeds, graph)
+    print(f"Final test acc: {acc_test:.4f}")
 
-    loss_val, acc_val, acc_test = model_test(best_model, embedds)
-    return acc_test
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
+    parser = argparse.ArgumentParser(description="GGCM")
     parser.add_argument(
-        '--dataset',
+        "--dataset",
         type=str,
         default="citeseer",
-        help='Dataset to use.',
+        choices=["citeseer", "cora", "pubmed"],
+        help="Dataset to use.",
     )
-    parser.add_argument('--epochs', type=int, default=100)
-    parser.add_argument('--lr', type=float, default=0.2)
-    parser.add_argument('--degree', type=int, default=16)
-    parser.add_argument('--decline', type=float, default=1)
-    parser.add_argument('--negative_rate', type=float, default=20.0)
-    parser.add_argument('--wd', type=float, nargs='*', default=1e-3)
-    parser.add_argument('--alpha', type=float, default=0.12)
-    parser.add_argument('--decline_neg', type=float, default=1.0)
+    parser.add_argument("--decline", type=float, default=1, help="Decline.")
+    parser.add_argument("--alpha", type=float, default=0.15, help="Alpha.")
     parser.add_argument(
-        '--device',
+        "--epochs", type=int, default=100, help="Number of epochs to train."
+    )
+    parser.add_argument(
+        "--lr", type=float, default=0.13, help="Initial learning rate."
+    )
+    parser.add_argument(
+        "--layer_num", type=int, default=16, help="Degree of the approximation."
+    )
+    parser.add_argument(
+        "--negative_rate",
+        type=float,
+        default=20.0,
+        help="Negative sampling rate for a negative graph.",
+    )
+    parser.add_argument(
+        "--wd",
+        type=float,
+        nargs="*",
+        default=2e-3,
+        help="Weight decay (L2 loss on parameters).",
+    )
+    parser.add_argument(
+        "--decline_neg", type=float, default=1.0, help="Decline negative."
+    )
+    parser.add_argument(
+        "--device",
         type=str,
-        default='cpu',
-        choices=['cpu', 'cuda'],
-        help='device to use',
+        default="cpu",
+        choices=["cpu", "cuda"],
+        help="device to use",
     )
     args, _ = parser.parse_known_args()
 
-    transform = (AddSelfLoop())
-    if args.dataset == "cora":
-        num_edges = CoraGraphDataset()[0].num_edges()
-        data = CoraGraphDataset(transform=transform)
-    elif args.dataset == "citeseer":
-        num_edges = CiteseerGraphDataset()[0].num_edges()
-        data = CiteseerGraphDataset(transform=transform)
-    elif args.dataset == "pubmed":
-        num_edges = PubmedGraphDataset()[0].num_edges()
-        data = PubmedGraphDataset(transform=transform)
-    else:
-        raise ValueError("Unknown dataset: {}".format(args.dataset))
-    
-    graph = data[0]
-    graph = graph.to(args.device)
-    features = graph.ndata["feat"]
-    adj = symmetric_normalize_adjacency(graph)
-
-    avg_edge_num = int(args.negative_rate * num_edges / features.shape[0])
-    avg_edge_num = ((avg_edge_num + 1) // 2) * 2
-
-    model = GGCM(graph, args).to(args.device)
-    start_time = time.time()
-    embedds = GGCM.update_embedds(features, adj, avg_edge_num, args)
-    test_acc = train(model, embedds, args)
-    print(f'Final test acc: {test_acc:.4f}')
-    print(f'Total Time: {time.time() - start_time:.4f}')
+    main(args)

From 0f1641ab7c91fd3d604491275fde956225829fac Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 4 Jan 2024 22:50:23 +0800
Subject: [PATCH 19/20] Update utils.py

---
 examples/pytorch/ggcm/utils.py | 44 +++++++++++++---------------------
 1 file changed, 17 insertions(+), 27 deletions(-)

diff --git a/examples/pytorch/ggcm/utils.py b/examples/pytorch/ggcm/utils.py
index 6f9c31146896..877736b22654 100644
--- a/examples/pytorch/ggcm/utils.py
+++ b/examples/pytorch/ggcm/utils.py
@@ -1,17 +1,21 @@
+import dgl
 import dgl.sparse as dglsp
+import networkx as nx
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
-import networkx as nx
 
 
-class LinearNeuralNetwork(nn.Module):
-    def __init__(self, nfeat, nclass, bias=True):
-        super(LinearNeuralNetwork, self).__init__()
-        self.W = nn.Linear(nfeat, nclass, bias=bias)
+class Classifier(nn.Module):
+    def __init__(self, in_feats, n_classes):
+        super(Classifier, self).__init__()
+        self.fc = nn.Linear(in_feats, n_classes)
+        self.reset_parameters()
+
+    def reset_parameters(self):
+        self.fc.reset_parameters()
 
     def forward(self, x):
-        return self.W(x)
+        return self.fc(x)
 
 
 def symmetric_normalize_adjacency(graph):
@@ -23,27 +27,13 @@ def symmetric_normalize_adjacency(graph):
     return deg_invsqrt @ adj @ deg_invsqrt
 
 
-def model_test(model, embedds):
-    model.eval()
-    with torch.no_grad():
-        output = model(embedds)
-        pred = output.argmax(dim=-1)
-        test_mask, val_mask = model.test_mask, model.val_mask
-        loss = F.cross_entropy(output[val_mask], model.label[val_mask])
-    accs = []
-    for mask in [val_mask, test_mask]:
-        accs.append(float((pred[mask] == model.label[mask]).sum()/mask.sum()))
-    return loss.item(), accs[0], accs[1]
-
-
-def inverse_graph_convolution(k, n, device):
-    adj = nx.adjacency_matrix(nx.random_regular_graph(k, n)).tocoo()
-    indices = torch.tensor([adj.row.tolist(), adj.col.tolist()])
-    values = torch.tensor(adj.data.tolist())
-    adj_sym_nor = dglsp.spmatrix(indices, values, adj.shape).coalesce().to(device)
-    I_N = dglsp.identity((n, n)).to(dtype=torch.int64)
+def inverse_graph_convolution(edge_num, node_num, I_N):
+    graph = dgl.from_networkx(nx.random_regular_graph(edge_num, node_num))
+    indices = torch.stack(graph.edges())
+    adj = dglsp.spmatrix(indices, shape=(node_num, node_num)).coalesce()
+
     # re-normalization trick
-    adj_sym_nor = dglsp.sub(2 * I_N, adj_sym_nor) / (k + 2)
+    adj_sym_nor = dglsp.sub(2 * I_N, adj) / (edge_num + 2)
     return adj_sym_nor
 
 

From 87fb2e8133c7502acfd139f209e7459cb892e191 Mon Sep 17 00:00:00 2001
From: SinuoXu <147471236+SinuoXu@users.noreply.github.com>
Date: Thu, 4 Jan 2024 22:52:27 +0800
Subject: [PATCH 20/20] Create README.md

---
 examples/pytorch/ggcm/README.md | 41 +++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 examples/pytorch/ggcm/README.md

diff --git a/examples/pytorch/ggcm/README.md b/examples/pytorch/ggcm/README.md
new file mode 100644
index 000000000000..1f105b0701b2
--- /dev/null
+++ b/examples/pytorch/ggcm/README.md
@@ -0,0 +1,41 @@
+# DGL Implementation of GGCM
+
+This DGL example implements the GGCM method from the paper: [From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited](https://arxiv.org/abs/2309.13599).
+The authors' original implementation can be found [here](https://github.com/zhengwang100/ogc_ggcm).
+
+
+## Example Implementor
+
+This example was implemented by [Sinuo Xu](https://github.com/SinuoXu) when she was an undergraduate at SJTU.
+
+
+## Dependencies
+Python 3.11.5<br>
+PyTorch 2.0.1<br>
+DGL 1.1.2<br>
+scikit-learn 1.3.1<br>
+
+
+## Dataset
+The DGL's built-in Citeseer, Cora and Pubmed datasets, as follows:
+| Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes |
+| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: |
+| Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 |
+|Cora	|2,708|	10,556|	1,433|	7	|140|	500|	1000|
+|Pubmed|	19,717|	88,651|	500	|3|	60|	500|	1000|
+
+
+## Usage
+Run with the following (available dataset: "cora", "citeseer", "pubmed")
+```bash
+python train.py --dataset citeseer
+python train.py --dataset cora --decline 1.0 --alpha 0.15 --epochs 100 --lr 0.2 --layer_num 16 --negative_rate 20.0 --wd 1e-5 --decline_neg 0.5
+python train.py --dataset pubmed --decline 1.0 --alpha 0.1 --epochs 100 --lr 0.2 --layer_num 16 --negative_rate 20.0 --wd 2e-5 --decline_neg 0.5
+```
+
+## Performance
+
+|Dataset|citeseer|cora|pubmed|
+| :-: | :-: | :-: | :-: |
+| GGCM (DGL)|74.1|83.5|80.7|
+|GGCM (reported) |74.2|83.6|80.8|