From f30d57db08cddd74df937823b2d44a20b1aff00e Mon Sep 17 00:00:00 2001 From: SinuoXu Date: Fri, 13 Oct 2023 11:39:03 +0800 Subject: [PATCH 01/20] add ogc method --- examples/pytorch/ogc/README.md | 37 ++++++++++++ examples/pytorch/ogc/ogc.py | 104 +++++++++++++++++++++++++++++++++ examples/pytorch/ogc/utils.py | 66 +++++++++++++++++++++ 3 files changed, 207 insertions(+) create mode 100644 examples/pytorch/ogc/README.md create mode 100644 examples/pytorch/ogc/ogc.py create mode 100644 examples/pytorch/ogc/utils.py diff --git a/examples/pytorch/ogc/README.md b/examples/pytorch/ogc/README.md new file mode 100644 index 000000000000..713505b3d031 --- /dev/null +++ b/examples/pytorch/ogc/README.md @@ -0,0 +1,37 @@ +# Optimized Graph Convolution (OGC) + +This DGL example implements the OGC method from the paper: [From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited](https://arxiv.org/abs/2309.13599). +With only one trainable layer, OGC is a very simple but powerful graph convolution method. + + +## Example Implementor + +This example was implemented by [Sinuo Xu](https://github.com/SinuoXu) when she was an undergraduate at SJTU. + + + +## Dataset + +The DGL's built-in Cora, Pubmed and Citeseer datasets, as follows: + +| Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | +| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | +| Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 | +| Cora | 2,708 | 10,556 | 1,433 | 7 | 140 | 500 | 1000 | +| Pubmed | 19,717 | 88,651 | 500 | 3 | 60 | 500 | 1000 | + + +## Usage + +```bash +python main.py --dataset cora +python main.py --dataset citeseer +python main.py --dataset pubmed +``` + +## Performance + +| Dataset | Cora | Citeseer | Pubmed | +| :-: | :-: | :-: | :-: | +| OGC (DGL) | **86.9(±0.2)** | **77.4(±0.1)** | **83.6(±0.1)** | +| OGC (Reported) | **86.9(±0.0)** | **77.4(±0.0)** | 83.4(±0.0) | \ No newline at end of file diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py new file mode 100644 index 000000000000..9d6cdb56115b --- /dev/null +++ b/examples/pytorch/ogc/ogc.py @@ -0,0 +1,104 @@ +import time +import argparse +import scipy.sparse as sp + +import torch +import torch.nn.functional as F + +from dgl import AddSelfLoop +from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset +from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork + + +# Training settings +decline = 0.9 # the dcline rate +eta_sup = 0.001 # the learning rate for supervised loss +eta_W = 0.5 # the learning rate for updating W +beta = 0.1 # in [0,1], the moving probability that a node moves to its neighbors +max_similar_tol = 0.995 # the max_tol test set label prediction similarity between two iterations +max_patience = 2 # the tolreance for consecutively getting very similar test prediction + + +def update_U(U, Y, predY, W): + global eta_sup + # ------ update the smoothness loss via LGC ------ + U = torch.spmm(lazy_adj.to(device), U) + + # ------ update the supervised loss via SEB ------ + dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W) + U = U - eta_sup * dU_sup + + eta_sup = eta_sup * decline + return U + + +def OGC(linear_clf, U, g): + patience = 0 + _, _, last_acc, last_outp = linear_clf.test(U, g) + for i in range(64): + # updating W by training a simple linear supervised model Y=W*X + predY, W = linear_clf.update_W(U, g, eta_W) + + # updating U by LGC and SEB jointly + U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W) + + loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g) + print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format( + i + 1, loss_tv, acc_tv, acc_test)) + + sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0])) + if (sim_rate > max_similar_tol): + patience += 1 + if (patience > max_patience): + break + + last_acc = acc_test + last_outp = pred + return last_acc + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--dataset', + type=str, + default="citeseer", + choices=["cora", "citeseer", "pubmed"], + help='Dataset to use.') + args, _ = parser.parse_known_args() + + # load and preprocess dataset + transform = (AddSelfLoop()) + if args.dataset == "cora": + data = CoraGraphDataset(transform=transform) + elif args.dataset == "citeseer": + data = CiteseerGraphDataset(transform=transform) + elif args.dataset == "pubmed": + data = PubmedGraphDataset(transform=transform) + else: + raise ValueError("Unknown dataset: {}".format(args.dataset)) + + g = data[0] + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + g = g.int().to(device) + features = g.ndata["feat"] + + adj = symmetric_normalize_adjacency(g) + print(g.num_edges) + I_N = sp.eye(features.shape[0]) + # lazy random walk (also known as lazy graph convolution) + lazy_adj = (1 - beta) * I_N + beta * adj + lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj) + # LIM track, else use both train and validation set to construct S + S = torch.diag(g.ndata["train_mask"]).float().to_sparse() + + linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1), + nclass=g.ndata["label"].max().item()+1, + bias=False).to(device) + + start_time = time.time() + res = OGC(linear_clf, features, g) + time_tot = time.time() - start_time + + print(f'Test Acc:{res:.4f}') + print(f'Total Time:{time_tot:.4f}') \ No newline at end of file diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py new file mode 100644 index 000000000000..8c19247de605 --- /dev/null +++ b/examples/pytorch/ogc/utils.py @@ -0,0 +1,66 @@ +import numpy as np +import scipy.sparse as sp +import torch +import torch.nn as nn +import torch.nn.functional as F +import torch.optim as optim + +def sparse_mx_to_torch_sparse_tensor(sparse_mx): + """Convert a scipy sparse matrix to a torch sparse tensor.""" + sparse_mx = sparse_mx.tocoo().astype(np.float32) + indices = torch.from_numpy( + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) + values = torch.from_numpy(sparse_mx.data) + shape = torch.Size(sparse_mx.shape) + return torch.sparse.FloatTensor(indices, values, shape) + + +def symmetric_normalize_adjacency(graph): + """ Symmetric normalize graph adjacency matrix. """ + adj = graph.adjacency_matrix() + in_degs = graph.in_degrees().float() + in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1) + degi = torch.diag(torch.squeeze(torch.t(in_norm))) + degi = sp.coo_matrix(degi).tocsr() + adj = sp.csr_matrix((adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape) + adj = degi.dot(adj.dot(degi)) + return adj + + +class LinearNeuralNetwork(nn.Module): + def __init__(self, nfeat, nclass, bias=True): + super(LinearNeuralNetwork, self).__init__() + self.W = nn.Linear(nfeat, nclass, bias=bias) + + def forward(self, x): + return self.W(x) + + def test(self, U, g): + self.eval() + with torch.no_grad(): + output = self(U) + pred = output.argmax(dim=-1) + labels = g.ndata["label"] + test_mask = g.ndata["test_mask"] + tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"] + loss_tv = F.mse_loss(output[tv_mask], + F.one_hot(labels).float()[tv_mask]) + accs = [] + for mask in [tv_mask, test_mask]: + accs.append( + float((pred[mask] == labels[mask]).sum()/mask.sum())) + return loss_tv.item(), accs[0], accs[1], pred + + def update_W(self, U, g, eta_W): + optimizer = optim.SGD(self.parameters(), lr=eta_W) + self.train() + optimizer.zero_grad() + output = self(U) + labels = g.ndata["label"] + tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"] + loss_tv = F.mse_loss(output[tv_mask], + F.one_hot(labels).float()[tv_mask], + reduction='sum') + loss_tv.backward() + optimizer.step() + return self(U).data, self.W.weight.data From 2209d7f6748e4a8eec042eef14f8ee02585f81f8 Mon Sep 17 00:00:00 2001 From: SinuoXu Date: Fri, 13 Oct 2023 12:00:21 +0800 Subject: [PATCH 02/20] update example readme --- examples/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/examples/README.md b/examples/README.md index 3a9da46c57c7..08964a0dceeb 100644 --- a/examples/README.md +++ b/examples/README.md @@ -5,6 +5,14 @@ The folder contains example implementations of selected research papers related * For examples working with a certain release, check out `https://github.com/dmlc/dgl/tree//examples` (E.g., https://github.com/dmlc/dgl/tree/0.5.x/examples) To quickly locate the examples of your interest, search for the tagged keywords or use the search tool on [dgl.ai](https://www.dgl.ai/). + +## 2023 + +- Zheng Wang et al. From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited. [Paper link](https://arxiv.org/abs/2210.13339) + - Example code: [PyTorch](../examples/pytorch/ogc) + + - Tags: semi-supervised node classification + ## 2022 - Balin et al. Layer-Neighbor Sampling -- Defusing Neighborhood Explosion in GNNs. [Paper link](https://arxiv.org/abs/2210.13339) - Example code: [PyTorch](../examples/labor/train_lightning.py) From 25ca95b7313d27ed9c96d15bae1927b1d2f84e18 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Fri, 13 Oct 2023 12:47:21 +0800 Subject: [PATCH 03/20] Add files via upload --- ogc.py | 103 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 ogc.py diff --git a/ogc.py b/ogc.py new file mode 100644 index 000000000000..dff74f8495c4 --- /dev/null +++ b/ogc.py @@ -0,0 +1,103 @@ +import time +import argparse +import scipy.sparse as sp + +import torch +import torch.nn.functional as F + +from dgl import AddSelfLoop +from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset +from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork + + +# Training settings +decline = 0.9 # the dcline rate +eta_sup = 0.001 # the learning rate for supervised loss +eta_W = 0.5 # the learning rate for updating W +beta = 0.1 # in [0,1], the moving probability that a node moves to its neighbors +max_similar_tol = 0.995 # the max_tol test set label prediction similarity between two iterations +max_patience = 2 # the tolreance for consecutively getting very similar test prediction + + +def update_U(U, Y, predY, W): + global eta_sup + # ------ update the smoothness loss via LGC ------ + U = torch.spmm(lazy_adj.to(device), U) + + # ------ update the supervised loss via SEB ------ + dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W) + U = U - eta_sup * dU_sup + + eta_sup = eta_sup * decline + return U + + +def OGC(linear_clf, U, g): + patience = 0 + _, _, last_acc, last_outp = linear_clf.test(U, g) + for i in range(64): + # updating W by training a simple linear supervised model Y=W*X + predY, W = linear_clf.update_W(U, g, eta_W) + + # updating U by LGC and SEB jointly + U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W) + + loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g) + print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format( + i + 1, loss_tv, acc_tv, acc_test)) + + sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0])) + if (sim_rate > max_similar_tol): + patience += 1 + if (patience > max_patience): + break + + last_acc = acc_test + last_outp = pred + return last_acc + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--dataset', + type=str, + default="citeseer", + choices=["cora", "citeseer", "pubmed"], + help='Dataset to use.') + args, _ = parser.parse_known_args() + + # load and preprocess dataset + transform = (AddSelfLoop()) + if args.dataset == "cora": + data = CoraGraphDataset(transform=transform) + elif args.dataset == "citeseer": + data = CiteseerGraphDataset(transform=transform) + elif args.dataset == "pubmed": + data = PubmedGraphDataset(transform=transform) + else: + raise ValueError("Unknown dataset: {}".format(args.dataset)) + + g = data[0] + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + g = g.int().to(device) + features = g.ndata["feat"] + + adj = symmetric_normalize_adjacency(g) + I_N = sp.eye(features.shape[0]) + # lazy random walk (also known as lazy graph convolution) + lazy_adj = (1 - beta) * I_N + beta * adj + lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj) + # LIM track, else use both train and validation set to construct S + S = torch.diag(g.ndata["train_mask"]).float().to_sparse() + + linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1), + nclass=g.ndata["label"].max().item()+1, + bias=False).to(device) + + start_time = time.time() + res = OGC(linear_clf, features, g) + time_tot = time.time() - start_time + + print(f'Test Acc:{res:.4f}') + print(f'Total Time:{time_tot:.4f}') \ No newline at end of file From deb1cc33a926d6a27528ab2dc233ed1a07a03b9e Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Fri, 13 Oct 2023 12:51:22 +0800 Subject: [PATCH 04/20] Update ogc.py --- examples/pytorch/ogc/ogc.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py index 9d6cdb56115b..8f5047f366a5 100644 --- a/examples/pytorch/ogc/ogc.py +++ b/examples/pytorch/ogc/ogc.py @@ -84,7 +84,6 @@ def OGC(linear_clf, U, g): features = g.ndata["feat"] adj = symmetric_normalize_adjacency(g) - print(g.num_edges) I_N = sp.eye(features.shape[0]) # lazy random walk (also known as lazy graph convolution) lazy_adj = (1 - beta) * I_N + beta * adj @@ -101,4 +100,4 @@ def OGC(linear_clf, U, g): time_tot = time.time() - start_time print(f'Test Acc:{res:.4f}') - print(f'Total Time:{time_tot:.4f}') \ No newline at end of file + print(f'Total Time:{time_tot:.4f}') From c6768ce75efab4da446617c030c4afb6b304f273 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Fri, 13 Oct 2023 15:45:53 +0800 Subject: [PATCH 05/20] Update utils.py --- examples/pytorch/ogc/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py index 8c19247de605..8ae4b4a3d5c5 100644 --- a/examples/pytorch/ogc/utils.py +++ b/examples/pytorch/ogc/utils.py @@ -21,7 +21,7 @@ def symmetric_normalize_adjacency(graph): in_degs = graph.in_degrees().float() in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1) degi = torch.diag(torch.squeeze(torch.t(in_norm))) - degi = sp.coo_matrix(degi).tocsr() + degi = sp.coo_matrix(degi.cpu()).tocsr() adj = sp.csr_matrix((adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape) adj = degi.dot(adj.dot(degi)) return adj From ef1110aa33aa3e079c4384d2af0355828f4c1d6c Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 19 Oct 2023 12:25:21 +0800 Subject: [PATCH 06/20] Delete ogc.py --- ogc.py | 103 --------------------------------------------------------- 1 file changed, 103 deletions(-) delete mode 100644 ogc.py diff --git a/ogc.py b/ogc.py deleted file mode 100644 index dff74f8495c4..000000000000 --- a/ogc.py +++ /dev/null @@ -1,103 +0,0 @@ -import time -import argparse -import scipy.sparse as sp - -import torch -import torch.nn.functional as F - -from dgl import AddSelfLoop -from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset -from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork - - -# Training settings -decline = 0.9 # the dcline rate -eta_sup = 0.001 # the learning rate for supervised loss -eta_W = 0.5 # the learning rate for updating W -beta = 0.1 # in [0,1], the moving probability that a node moves to its neighbors -max_similar_tol = 0.995 # the max_tol test set label prediction similarity between two iterations -max_patience = 2 # the tolreance for consecutively getting very similar test prediction - - -def update_U(U, Y, predY, W): - global eta_sup - # ------ update the smoothness loss via LGC ------ - U = torch.spmm(lazy_adj.to(device), U) - - # ------ update the supervised loss via SEB ------ - dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W) - U = U - eta_sup * dU_sup - - eta_sup = eta_sup * decline - return U - - -def OGC(linear_clf, U, g): - patience = 0 - _, _, last_acc, last_outp = linear_clf.test(U, g) - for i in range(64): - # updating W by training a simple linear supervised model Y=W*X - predY, W = linear_clf.update_W(U, g, eta_W) - - # updating U by LGC and SEB jointly - U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W) - - loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g) - print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format( - i + 1, loss_tv, acc_tv, acc_test)) - - sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0])) - if (sim_rate > max_similar_tol): - patience += 1 - if (patience > max_patience): - break - - last_acc = acc_test - last_outp = pred - return last_acc - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - '--dataset', - type=str, - default="citeseer", - choices=["cora", "citeseer", "pubmed"], - help='Dataset to use.') - args, _ = parser.parse_known_args() - - # load and preprocess dataset - transform = (AddSelfLoop()) - if args.dataset == "cora": - data = CoraGraphDataset(transform=transform) - elif args.dataset == "citeseer": - data = CiteseerGraphDataset(transform=transform) - elif args.dataset == "pubmed": - data = PubmedGraphDataset(transform=transform) - else: - raise ValueError("Unknown dataset: {}".format(args.dataset)) - - g = data[0] - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - g = g.int().to(device) - features = g.ndata["feat"] - - adj = symmetric_normalize_adjacency(g) - I_N = sp.eye(features.shape[0]) - # lazy random walk (also known as lazy graph convolution) - lazy_adj = (1 - beta) * I_N + beta * adj - lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj) - # LIM track, else use both train and validation set to construct S - S = torch.diag(g.ndata["train_mask"]).float().to_sparse() - - linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1), - nclass=g.ndata["label"].max().item()+1, - bias=False).to(device) - - start_time = time.time() - res = OGC(linear_clf, features, g) - time_tot = time.time() - start_time - - print(f'Test Acc:{res:.4f}') - print(f'Total Time:{time_tot:.4f}') \ No newline at end of file From 9b0fe44824fbeac9f188d5c9fb4a38d1099edfd0 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Tue, 24 Oct 2023 16:46:30 +0800 Subject: [PATCH 07/20] Update examples/pytorch/ogc/ogc.py Co-authored-by: Hongzhi (Steve), Chen --- examples/pytorch/ogc/ogc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py index 8f5047f366a5..9a534735faae 100644 --- a/examples/pytorch/ogc/ogc.py +++ b/examples/pytorch/ogc/ogc.py @@ -15,7 +15,8 @@ eta_sup = 0.001 # the learning rate for supervised loss eta_W = 0.5 # the learning rate for updating W beta = 0.1 # in [0,1], the moving probability that a node moves to its neighbors -max_similar_tol = 0.995 # the max_tol test set label prediction similarity between two iterations +max_similar_tol = 0.995 # The max_tol test set label prediction similarity + # between two iterations. max_patience = 2 # the tolreance for consecutively getting very similar test prediction From 436b18b2e7adfe0bdaad20c889242857cf6ebe76 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Wed, 25 Oct 2023 21:29:23 +0800 Subject: [PATCH 08/20] Update README.md --- examples/pytorch/ogc/README.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/examples/pytorch/ogc/README.md b/examples/pytorch/ogc/README.md index 713505b3d031..ca6a9c087933 100644 --- a/examples/pytorch/ogc/README.md +++ b/examples/pytorch/ogc/README.md @@ -9,6 +9,13 @@ With only one trainable layer, OGC is a very simple but powerful graph convoluti This example was implemented by [Sinuo Xu](https://github.com/SinuoXu) when she was an undergraduate at SJTU. +## Dependencies + +Python 3.11.5 +PyTorch 2.0.1 +DGL 1.1.2 +scikit-learn 1.3.1 + ## Dataset @@ -34,4 +41,4 @@ python main.py --dataset pubmed | Dataset | Cora | Citeseer | Pubmed | | :-: | :-: | :-: | :-: | | OGC (DGL) | **86.9(±0.2)** | **77.4(±0.1)** | **83.6(±0.1)** | -| OGC (Reported) | **86.9(±0.0)** | **77.4(±0.0)** | 83.4(±0.0) | \ No newline at end of file +| OGC (Reported) | **86.9(±0.0)** | **77.4(±0.0)** | 83.4(±0.0) | From ff0b12e17d17b707c5f0821f8117f183ea9634e0 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Wed, 25 Oct 2023 21:32:41 +0800 Subject: [PATCH 09/20] Update ogc.py --- examples/pytorch/ogc/ogc.py | 120 ++++++++++++++++++++---------------- 1 file changed, 66 insertions(+), 54 deletions(-) diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py index 9a534735faae..0627896c419d 100644 --- a/examples/pytorch/ogc/ogc.py +++ b/examples/pytorch/ogc/ogc.py @@ -1,5 +1,6 @@ -import time import argparse +import time + import scipy.sparse as sp import torch @@ -7,69 +8,79 @@ from dgl import AddSelfLoop from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset -from utils import sparse_mx_to_torch_sparse_tensor, symmetric_normalize_adjacency, LinearNeuralNetwork +from utils import ( + LinearNeuralNetwork, + sparse_mx_to_torch_sparse_tensor, + symmetric_normalize_adjacency, +) -# Training settings -decline = 0.9 # the dcline rate -eta_sup = 0.001 # the learning rate for supervised loss -eta_W = 0.5 # the learning rate for updating W -beta = 0.1 # in [0,1], the moving probability that a node moves to its neighbors -max_similar_tol = 0.995 # The max_tol test set label prediction similarity - # between two iterations. -max_patience = 2 # the tolreance for consecutively getting very similar test prediction +# Training settings. +decline = 0.9 # the decline rate +lr_sup = 0.001 # the learning rate for supervised loss +lr_clf = 0.5 # the learning rate for the used linear classifier +beta = 0.1 # the moving probability that a node moves to its neighbors +max_sim_rate = 0.995 # the max label prediction similarity between iterations +max_patience = 2 # the tolerance for consecutively similar test predictions -def update_U(U, Y, predY, W): - global eta_sup - # ------ update the smoothness loss via LGC ------ - U = torch.spmm(lazy_adj.to(device), U) +def update_embeds(embeds, graph, label_idx_mat): + global lr_sup + # Update classifier's weight by training a linear supervised model. + pred_labels, clf_weight = linear_clf.update_weight(embeds, graph, lr_clf) + labels = F.one_hot(graph.ndata["label"]).float() - # ------ update the supervised loss via SEB ------ - dU_sup = 2*torch.mm(torch.sparse.mm(S, -Y + predY), W) - U = U - eta_sup * dU_sup + # Update the smoothness loss via LGC. + embeds = torch.spmm(lazy_adj.to(device), embeds) - eta_sup = eta_sup * decline - return U + # Update the supervised loss via SEB. + deriv_sup = 2 * torch.mm( + torch.sparse.mm(label_idx_mat, -labels + pred_labels), clf_weight + ) + embeds = embeds - lr_sup * deriv_sup + lr_sup = lr_sup * decline + return embeds -def OGC(linear_clf, U, g): + +def OGC(linear_clf, embeds, graph, label_idx_mat): patience = 0 - _, _, last_acc, last_outp = linear_clf.test(U, g) + _, _, last_acc, last_output = linear_clf.test(embeds, graph) for i in range(64): - # updating W by training a simple linear supervised model Y=W*X - predY, W = linear_clf.update_W(U, g, eta_W) - - # updating U by LGC and SEB jointly - U = update_U(U, F.one_hot(g.ndata["label"]).float(), predY, W) - - loss_tv, acc_tv, acc_test, pred = linear_clf.test(U, g) - print('epoch {} loss_tv {:.4f} acc_train_val {:.4f} acc_test {:.4f}'.format( - i + 1, loss_tv, acc_tv, acc_test)) - - sim_rate = float(int((pred == last_outp).sum()) / int(pred.shape[0])) - if (sim_rate > max_similar_tol): + # Updating node embeds by LGC and SEB jointly. + embeds = update_embeds(embeds, graph, label_idx_mat) + + loss_tv, acc_tv, acc_test, pred = linear_clf.test(embeds, graph) + print( + "epoch {} loss_tv {:.4f} acc_tv {:.4f} acc_test {:.4f}".format( + i + 1, loss_tv, acc_tv, acc_test + ) + ) + + sim_rate = float(int((pred == last_output).sum()) / int(pred.shape[0])) + if sim_rate > max_sim_rate: patience += 1 - if (patience > max_patience): + if patience > max_patience: break last_acc = acc_test - last_outp = pred + last_output = pred return last_acc if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - '--dataset', + "--dataset", type=str, default="citeseer", choices=["cora", "citeseer", "pubmed"], - help='Dataset to use.') + help="Dataset to use.", + ) args, _ = parser.parse_known_args() - # load and preprocess dataset - transform = (AddSelfLoop()) + # Load and preprocess dataset. + transform = AddSelfLoop() if args.dataset == "cora": data = CoraGraphDataset(transform=transform) elif args.dataset == "citeseer": @@ -78,27 +89,28 @@ def OGC(linear_clf, U, g): data = PubmedGraphDataset(transform=transform) else: raise ValueError("Unknown dataset: {}".format(args.dataset)) - - g = data[0] + graph = data[0] device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - g = g.int().to(device) - features = g.ndata["feat"] + graph = graph.int().to(device) + features = graph.ndata["feat"] - adj = symmetric_normalize_adjacency(g) + adj = symmetric_normalize_adjacency(graph) I_N = sp.eye(features.shape[0]) - # lazy random walk (also known as lazy graph convolution) + # Lazy random walk (also known as lazy graph convolution). lazy_adj = (1 - beta) * I_N + beta * adj lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj) - # LIM track, else use both train and validation set to construct S - S = torch.diag(g.ndata["train_mask"]).float().to_sparse() - - linear_clf = LinearNeuralNetwork(nfeat=g.ndata["feat"].size(1), - nclass=g.ndata["label"].max().item()+1, - bias=False).to(device) + # LIM track, else use both train and val set to construct this matrix. + label_idx_mat = torch.diag(graph.ndata["train_mask"]).float().to_sparse() + + linear_clf = LinearNeuralNetwork( + nfeat=graph.ndata["feat"].size(1), + nclass=graph.ndata["label"].max().item() + 1, + bias=False, + ).to(device) start_time = time.time() - res = OGC(linear_clf, features, g) + res = OGC(linear_clf, features, graph, label_idx_mat) time_tot = time.time() - start_time - print(f'Test Acc:{res:.4f}') - print(f'Total Time:{time_tot:.4f}') + print(f"Test Acc:{res:.4f}") + print(f"Total Time:{time_tot:.4f}") From 9801cc8cec020a3462572e81fed0128c4d5440a2 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Wed, 25 Oct 2023 21:34:16 +0800 Subject: [PATCH 10/20] Update utils.py --- examples/pytorch/ogc/utils.py | 36 ++++++++++++++++++++--------------- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py index 8ae4b4a3d5c5..5e7ef2b13dd6 100644 --- a/examples/pytorch/ogc/utils.py +++ b/examples/pytorch/ogc/utils.py @@ -5,24 +5,28 @@ import torch.nn.functional as F import torch.optim as optim + def sparse_mx_to_torch_sparse_tensor(sparse_mx): """Convert a scipy sparse matrix to a torch sparse tensor.""" sparse_mx = sparse_mx.tocoo().astype(np.float32) indices = torch.from_numpy( - np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) + np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) + ) values = torch.from_numpy(sparse_mx.data) shape = torch.Size(sparse_mx.shape) return torch.sparse.FloatTensor(indices, values, shape) def symmetric_normalize_adjacency(graph): - """ Symmetric normalize graph adjacency matrix. """ + """Symmetric normalize graph adjacency matrix.""" adj = graph.adjacency_matrix() in_degs = graph.in_degrees().float() in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1) degi = torch.diag(torch.squeeze(torch.t(in_norm))) degi = sp.coo_matrix(degi.cpu()).tocsr() - adj = sp.csr_matrix((adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape) + adj = sp.csr_matrix( + (adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape + ) adj = degi.dot(adj.dot(degi)) return adj @@ -35,32 +39,34 @@ def __init__(self, nfeat, nclass, bias=True): def forward(self, x): return self.W(x) - def test(self, U, g): + def test(self, embeds, g): self.eval() with torch.no_grad(): - output = self(U) + output = self(embeds) pred = output.argmax(dim=-1) labels = g.ndata["label"] test_mask = g.ndata["test_mask"] tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"] - loss_tv = F.mse_loss(output[tv_mask], - F.one_hot(labels).float()[tv_mask]) + loss_tv = F.mse_loss( + output[tv_mask], F.one_hot(labels).float()[tv_mask] + ) accs = [] for mask in [tv_mask, test_mask]: accs.append( - float((pred[mask] == labels[mask]).sum()/mask.sum())) + float((pred[mask] == labels[mask]).sum() / mask.sum()) + ) return loss_tv.item(), accs[0], accs[1], pred - def update_W(self, U, g, eta_W): - optimizer = optim.SGD(self.parameters(), lr=eta_W) + def update_weight(self, embeds, g, lr_clf): + optimizer = optim.SGD(self.parameters(), lr=lr_clf) self.train() optimizer.zero_grad() - output = self(U) + output = self(embeds) labels = g.ndata["label"] tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"] - loss_tv = F.mse_loss(output[tv_mask], - F.one_hot(labels).float()[tv_mask], - reduction='sum') + loss_tv = F.mse_loss( + output[tv_mask], F.one_hot(labels).float()[tv_mask], reduction="sum" + ) loss_tv.backward() optimizer.step() - return self(U).data, self.W.weight.data + return self(embeds).data, self.W.weight.data From 74569232f13fc5286b8fe82fc05260955600fac5 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Mon, 13 Nov 2023 18:58:32 +0800 Subject: [PATCH 11/20] Update ogc.py --- examples/pytorch/ogc/ogc.py | 136 +++++++++--------------------------- 1 file changed, 32 insertions(+), 104 deletions(-) diff --git a/examples/pytorch/ogc/ogc.py b/examples/pytorch/ogc/ogc.py index 0627896c419d..0af54ba0ce6c 100644 --- a/examples/pytorch/ogc/ogc.py +++ b/examples/pytorch/ogc/ogc.py @@ -1,116 +1,44 @@ -import argparse -import time - -import scipy.sparse as sp - -import torch +import dgl.sparse as dglsp +import torch.nn as nn import torch.nn.functional as F -from dgl import AddSelfLoop -from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset -from utils import ( - LinearNeuralNetwork, - sparse_mx_to_torch_sparse_tensor, - symmetric_normalize_adjacency, -) - - -# Training settings. -decline = 0.9 # the decline rate -lr_sup = 0.001 # the learning rate for supervised loss -lr_clf = 0.5 # the learning rate for the used linear classifier -beta = 0.1 # the moving probability that a node moves to its neighbors -max_sim_rate = 0.995 # the max label prediction similarity between iterations -max_patience = 2 # the tolerance for consecutively similar test predictions - - -def update_embeds(embeds, graph, label_idx_mat): - global lr_sup - # Update classifier's weight by training a linear supervised model. - pred_labels, clf_weight = linear_clf.update_weight(embeds, graph, lr_clf) - labels = F.one_hot(graph.ndata["label"]).float() - - # Update the smoothness loss via LGC. - embeds = torch.spmm(lazy_adj.to(device), embeds) +from utils import LinearNeuralNetwork - # Update the supervised loss via SEB. - deriv_sup = 2 * torch.mm( - torch.sparse.mm(label_idx_mat, -labels + pred_labels), clf_weight - ) - embeds = embeds - lr_sup * deriv_sup - lr_sup = lr_sup * decline - return embeds - - -def OGC(linear_clf, embeds, graph, label_idx_mat): - patience = 0 - _, _, last_acc, last_output = linear_clf.test(embeds, graph) - for i in range(64): - # Updating node embeds by LGC and SEB jointly. - embeds = update_embeds(embeds, graph, label_idx_mat) - - loss_tv, acc_tv, acc_test, pred = linear_clf.test(embeds, graph) - print( - "epoch {} loss_tv {:.4f} acc_tv {:.4f} acc_test {:.4f}".format( - i + 1, loss_tv, acc_tv, acc_test - ) +class OGC(nn.Module): + def __init__(self, graph): + super(OGC, self).__init__() + self.linear_clf = LinearNeuralNetwork( + nfeat=graph.ndata["feat"].shape[1], + nclass=graph.ndata["label"].max().item() + 1, + bias=False, ) - sim_rate = float(int((pred == last_output).sum()) / int(pred.shape[0])) - if sim_rate > max_sim_rate: - patience += 1 - if patience > max_patience: - break + self.label = graph.ndata["label"] + self.label_one_hot = F.one_hot(graph.ndata["label"]).float() + # LIM trick, else use both train and val set to construct this matrix. + self.label_idx_mat = dglsp.diag(graph.ndata["train_mask"]).float() - last_acc = acc_test - last_output = pred - return last_acc + self.test_mask = graph.ndata["test_mask"] + self.tv_mask = graph.ndata["train_mask"] + graph.ndata["val_mask"] + def forward(self, x): + return self.linear_clf(x) -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.add_argument( - "--dataset", - type=str, - default="citeseer", - choices=["cora", "citeseer", "pubmed"], - help="Dataset to use.", - ) - args, _ = parser.parse_known_args() + def update_embeds(self, embeds, lazy_adj, args): + """Update classifier's weight by training a linear supervised model.""" + pred_label = self(embeds).data + clf_weight = self.linear_clf.W.weight.data - # Load and preprocess dataset. - transform = AddSelfLoop() - if args.dataset == "cora": - data = CoraGraphDataset(transform=transform) - elif args.dataset == "citeseer": - data = CiteseerGraphDataset(transform=transform) - elif args.dataset == "pubmed": - data = PubmedGraphDataset(transform=transform) - else: - raise ValueError("Unknown dataset: {}".format(args.dataset)) - graph = data[0] - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") - graph = graph.int().to(device) - features = graph.ndata["feat"] + # Update the smoothness loss via LGC. + embeds = dglsp.spmm(lazy_adj, embeds) - adj = symmetric_normalize_adjacency(graph) - I_N = sp.eye(features.shape[0]) - # Lazy random walk (also known as lazy graph convolution). - lazy_adj = (1 - beta) * I_N + beta * adj - lazy_adj = sparse_mx_to_torch_sparse_tensor(lazy_adj) - # LIM track, else use both train and val set to construct this matrix. - label_idx_mat = torch.diag(graph.ndata["train_mask"]).float().to_sparse() - - linear_clf = LinearNeuralNetwork( - nfeat=graph.ndata["feat"].size(1), - nclass=graph.ndata["label"].max().item() + 1, - bias=False, - ).to(device) - - start_time = time.time() - res = OGC(linear_clf, features, graph, label_idx_mat) - time_tot = time.time() - start_time + # Update the supervised loss via SEB. + deriv_sup = 2 * dglsp.matmul( + dglsp.spmm(self.label_idx_mat, -self.label_one_hot + pred_label), + clf_weight, + ) + embeds = embeds - args.lr_sup * deriv_sup - print(f"Test Acc:{res:.4f}") - print(f"Total Time:{time_tot:.4f}") + args.lr_sup = args.lr_sup * args.decline + return embeds From b9759f628452c9e0d5aaf3c7385733fc83c7f832 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Mon, 13 Nov 2023 19:00:04 +0800 Subject: [PATCH 12/20] Update utils.py --- examples/pytorch/ogc/utils.py | 79 ++++++++++------------------------- 1 file changed, 21 insertions(+), 58 deletions(-) diff --git a/examples/pytorch/ogc/utils.py b/examples/pytorch/ogc/utils.py index 5e7ef2b13dd6..95b61b6b07bf 100644 --- a/examples/pytorch/ogc/utils.py +++ b/examples/pytorch/ogc/utils.py @@ -1,34 +1,7 @@ -import numpy as np -import scipy.sparse as sp +import dgl.sparse as dglsp import torch import torch.nn as nn import torch.nn.functional as F -import torch.optim as optim - - -def sparse_mx_to_torch_sparse_tensor(sparse_mx): - """Convert a scipy sparse matrix to a torch sparse tensor.""" - sparse_mx = sparse_mx.tocoo().astype(np.float32) - indices = torch.from_numpy( - np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64) - ) - values = torch.from_numpy(sparse_mx.data) - shape = torch.Size(sparse_mx.shape) - return torch.sparse.FloatTensor(indices, values, shape) - - -def symmetric_normalize_adjacency(graph): - """Symmetric normalize graph adjacency matrix.""" - adj = graph.adjacency_matrix() - in_degs = graph.in_degrees().float() - in_norm = torch.pow(in_degs, -0.5).unsqueeze(-1) - degi = torch.diag(torch.squeeze(torch.t(in_norm))) - degi = sp.coo_matrix(degi.cpu()).tocsr() - adj = sp.csr_matrix( - (adj.val.cpu(), (adj.row.cpu(), adj.col.cpu())), shape=adj.shape - ) - adj = degi.dot(adj.dot(degi)) - return adj class LinearNeuralNetwork(nn.Module): @@ -39,34 +12,24 @@ def __init__(self, nfeat, nclass, bias=True): def forward(self, x): return self.W(x) - def test(self, embeds, g): - self.eval() - with torch.no_grad(): - output = self(embeds) - pred = output.argmax(dim=-1) - labels = g.ndata["label"] - test_mask = g.ndata["test_mask"] - tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"] - loss_tv = F.mse_loss( - output[tv_mask], F.one_hot(labels).float()[tv_mask] - ) - accs = [] - for mask in [tv_mask, test_mask]: - accs.append( - float((pred[mask] == labels[mask]).sum() / mask.sum()) - ) - return loss_tv.item(), accs[0], accs[1], pred - def update_weight(self, embeds, g, lr_clf): - optimizer = optim.SGD(self.parameters(), lr=lr_clf) - self.train() - optimizer.zero_grad() - output = self(embeds) - labels = g.ndata["label"] - tv_mask = g.ndata["train_mask"] + g.ndata["val_mask"] - loss_tv = F.mse_loss( - output[tv_mask], F.one_hot(labels).float()[tv_mask], reduction="sum" - ) - loss_tv.backward() - optimizer.step() - return self(embeds).data, self.W.weight.data +def symmetric_normalize_adjacency(graph): + """Symmetric normalize graph adjacency matrix.""" + indices = torch.stack(graph.edges()) + n = graph.num_nodes() + adj = dglsp.spmatrix(indices, shape=(n, n)) + deg_invsqrt = dglsp.diag(adj.sum(0)) ** -0.5 + return deg_invsqrt @ adj @ deg_invsqrt + + +def model_test(model, embeds): + model.eval() + with torch.no_grad(): + output = model(embeds) + pred = output.argmax(dim=-1) + test_mask, tv_mask = model.test_mask, model.tv_mask + loss_tv = F.mse_loss(output[tv_mask], model.label_one_hot[tv_mask]) + accs = [] + for mask in [tv_mask, test_mask]: + accs.append(float((pred[mask] == model.label[mask]).sum() / mask.sum())) + return loss_tv.item(), accs[0], accs[1], pred From 7330ca3cd36aac9a2679819abefae641549211c9 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Mon, 13 Nov 2023 19:17:55 +0800 Subject: [PATCH 13/20] Create train.py --- examples/pytorch/ogc/train.py | 126 ++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 examples/pytorch/ogc/train.py diff --git a/examples/pytorch/ogc/train.py b/examples/pytorch/ogc/train.py new file mode 100644 index 000000000000..d78c63ae4ddb --- /dev/null +++ b/examples/pytorch/ogc/train.py @@ -0,0 +1,126 @@ +import argparse +import time + +import dgl.sparse as dglsp + +import torch.nn.functional as F +import torch.optim as optim +from dgl import AddSelfLoop +from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset + +from ogc import OGC +from utils import model_test, symmetric_normalize_adjacency + + +def train(model, embeds, lazy_adj, args): + patience = 0 + _, _, last_acc, last_output = model_test(model, embeds) + + tv_mask = model.tv_mask + optimizer = optim.SGD(model.parameters(), lr=args.lr_clf) + + for i in range(64): + model.train() + output = model(embeds) + loss_tv = F.mse_loss( + output[tv_mask], model.label_one_hot[tv_mask], reduction="sum" + ) + optimizer.zero_grad() + loss_tv.backward() + optimizer.step() + + # Updating node embeds by LGC and SEB jointly. + embeds = model.update_embeds(embeds, lazy_adj, args) + + loss_tv, acc_tv, acc_test, pred = model_test(model, embeds) + print( + "epoch {} loss_tv {:.4f} acc_tv {:.4f} acc_test {:.4f}".format( + i + 1, loss_tv, acc_tv, acc_test + ) + ) + + sim_rate = float(int((pred == last_output).sum()) / int(pred.shape[0])) + if sim_rate > args.max_sim_rate: + patience += 1 + if patience > args.max_patience: + break + last_acc = acc_test + last_output = pred + return last_acc + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "--dataset", + type=str, + default="citeseer", + choices=["cora", "citeseer", "pubmed"], + help="dataset to use", + ) + parser.add_argument( + "--decline", type=float, default=0.9, help="decline rate" + ) + parser.add_argument( + "--lr_sup", + type=float, + default=0.001, + help="learning rate for supervised loss", + ) + parser.add_argument( + "--lr_clf", + type=float, + default=0.5, + help="learning rate for the used linear classifier", + ) + parser.add_argument( + "--beta", + type=float, + default=0.1, + help="moving probability that a node moves to its neighbors", + ) + parser.add_argument( + "--max_sim_rate", + type=float, + default=0.995, + help="max label prediction similarity between iterations", + ) + parser.add_argument( + "--max_patience", + type=int, + default=2, + help="tolerance for consecutively similar test predictions", + ) + parser.add_argument( + "--device", + type=str, + default="cpu", + choices=["cpu", "cuda"], + help="device to use", + ) + args, _ = parser.parse_known_args() + + # Load and preprocess dataset. + transform = AddSelfLoop() + if args.dataset == "cora": + data = CoraGraphDataset(transform=transform) + elif args.dataset == "citeseer": + data = CiteseerGraphDataset(transform=transform) + elif args.dataset == "pubmed": + data = PubmedGraphDataset(transform=transform) + else: + raise ValueError("Unknown dataset: {}".format(args.dataset)) + graph = data[0].to(args.device) + features = graph.ndata["feat"] + adj = symmetric_normalize_adjacency(graph) + I_N = dglsp.identity((features.shape[0], features.shape[0])) + # Lazy random walk (also known as lazy graph convolution). + lazy_adj = dglsp.add((1 - args.beta) * I_N, args.beta * adj).to(args.device) + + model = OGC(graph).to(args.device) + start_time = time.time() + res = train(model, features, lazy_adj, args) + time_tot = time.time() - start_time + + print(f"Test Acc:{res:.4f}") + print(f"Total Time:{time_tot:.4f}") From 7dbee82c1f8695f35767ae96a9e63c6234f9278e Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 23 Nov 2023 22:01:04 +0800 Subject: [PATCH 14/20] Create train.py --- examples/pytorch/ggcm/train.py | 88 ++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 examples/pytorch/ggcm/train.py diff --git a/examples/pytorch/ggcm/train.py b/examples/pytorch/ggcm/train.py new file mode 100644 index 000000000000..5cebb314c7c6 --- /dev/null +++ b/examples/pytorch/ggcm/train.py @@ -0,0 +1,88 @@ +import argparse +import time +import copy + +import torch.nn.functional as F +import torch.optim as optim + +from dgl import AddSelfLoop +from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset + +from ggcm import GGCM +from utils import model_test, symmetric_normalize_adjacency + + +def train(model, embedds, args): + # Evaluate embedding by classification with the given split setting + best_acc = -1 + optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) + + for i in range(args.epochs): + model.train() + output = model(embedds) + loss = F.cross_entropy(output[model.train_mask], model.label[model.train_mask]) + optimizer.zero_grad() + loss.backward() + optimizer.step() + + loss_val, acc_val, acc_test = model_test(model, embedds) + if acc_val > best_acc: + best_acc, best_model = acc_val, copy.deepcopy(model) + + print(f'{i+1} {loss_val:.4f} {acc_val:.3f} acc_test={acc_test:.3f}') + + loss_val, acc_val, acc_test = model_test(best_model, embedds) + return acc_test + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + '--dataset', + type=str, + default="citeseer", + help='Dataset to use.', + ) + parser.add_argument('--epochs', type=int, default=100) + parser.add_argument('--lr', type=float, default=0.2) + parser.add_argument('--degree', type=int, default=16) + parser.add_argument('--decline', type=float, default=1) + parser.add_argument('--negative_rate', type=float, default=20.0) + parser.add_argument('--wd', type=float, nargs='*', default=1e-3) + parser.add_argument('--alpha', type=float, default=0.12) + parser.add_argument('--decline_neg', type=float, default=1.0) + parser.add_argument( + '--device', + type=str, + default='cpu', + choices=['cpu', 'cuda'], + help='device to use', + ) + args, _ = parser.parse_known_args() + + transform = (AddSelfLoop()) + if args.dataset == "cora": + num_edges = CoraGraphDataset()[0].num_edges() + data = CoraGraphDataset(transform=transform) + elif args.dataset == "citeseer": + num_edges = CiteseerGraphDataset()[0].num_edges() + data = CiteseerGraphDataset(transform=transform) + elif args.dataset == "pubmed": + num_edges = PubmedGraphDataset()[0].num_edges() + data = PubmedGraphDataset(transform=transform) + else: + raise ValueError("Unknown dataset: {}".format(args.dataset)) + + graph = data[0] + graph = graph.to(args.device) + features = graph.ndata["feat"] + adj = symmetric_normalize_adjacency(graph) + + avg_edge_num = int(args.negative_rate * num_edges / features.shape[0]) + avg_edge_num = ((avg_edge_num + 1) // 2) * 2 + + model = GGCM(graph, args).to(args.device) + start_time = time.time() + embedds = GGCM.update_embedds(features, adj, avg_edge_num, args) + test_acc = train(model, embedds, args) + print(f'Final test acc: {test_acc:.4f}') + print(f'Total Time: {time.time() - start_time:.4f}') From a833478f3408df21a4e8df1be640a78dfdd0c937 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 23 Nov 2023 22:01:34 +0800 Subject: [PATCH 15/20] Create ggcm.py --- examples/pytorch/ggcm/ggcm.py | 50 +++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 examples/pytorch/ggcm/ggcm.py diff --git a/examples/pytorch/ggcm/ggcm.py b/examples/pytorch/ggcm/ggcm.py new file mode 100644 index 000000000000..56d7f8cbb918 --- /dev/null +++ b/examples/pytorch/ggcm/ggcm.py @@ -0,0 +1,50 @@ +import dgl.sparse as dglsp + +import torch +import torch.nn as nn +import torch.nn.functional as F + +from utils import LinearNeuralNetwork, lazy_random_walk, inverse_graph_convolution + + +class GGCM(nn.Module): + def __init__(self, graph, args): + super(GGCM, self).__init__() + self.linear_nn = LinearNeuralNetwork(nfeat=graph.ndata["feat"].shape[1], + nclass=F.one_hot(graph.ndata["label"]).shape[1], + bias=True).to(args.device) + + self.label = graph.ndata["label"] + self.test_mask = graph.ndata["test_mask"] + self.train_mask = graph.ndata["train_mask"] + self.val_mask = graph.ndata["val_mask"] + + def forward(self, x): + return self.linear_nn(x) + + def update_embedds(features, A_hat, avg_edge_num, args): + beta = 1.0 + beta_neg = 1.0 + K = args.degree + X = features.clone() + temp_sum = torch.zeros_like(features) + I_N = dglsp.identity((features.shape[0], features.shape[0])) + + for _ in range(K): + # lazy graph convolution (LGC) + lazy_A = lazy_random_walk(A_hat, beta, I_N).to(args.device) + + # inverse graph convlution (IGC), lazy version + neg_A_hat = inverse_graph_convolution( + avg_edge_num, features.shape[0], args.device).to(args.device) + inv_lazy_A = lazy_random_walk(neg_A_hat, beta_neg, I_N).to(args.device) + inv_features = dglsp.spmm(inv_lazy_A, features) + features = dglsp.spmm(lazy_A, features) + + # add for multi-scale version + temp_sum += (features + inv_features) / 2.0 + beta *= args.decline + beta_neg *= args.decline_neg + + embedds = args.alpha * X + (1 - args.alpha) * (temp_sum / (K * 1.0)) + return embedds From cee45b63f59533c0e700159d3419611af447e210 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 23 Nov 2023 22:02:23 +0800 Subject: [PATCH 16/20] Create utils.py --- examples/pytorch/ggcm/utils.py | 51 ++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 examples/pytorch/ggcm/utils.py diff --git a/examples/pytorch/ggcm/utils.py b/examples/pytorch/ggcm/utils.py new file mode 100644 index 000000000000..6f9c31146896 --- /dev/null +++ b/examples/pytorch/ggcm/utils.py @@ -0,0 +1,51 @@ +import dgl.sparse as dglsp +import torch +import torch.nn as nn +import torch.nn.functional as F +import networkx as nx + + +class LinearNeuralNetwork(nn.Module): + def __init__(self, nfeat, nclass, bias=True): + super(LinearNeuralNetwork, self).__init__() + self.W = nn.Linear(nfeat, nclass, bias=bias) + + def forward(self, x): + return self.W(x) + + +def symmetric_normalize_adjacency(graph): + """Symmetric normalize graph adjacency matrix.""" + indices = torch.stack(graph.edges()) + n = graph.num_nodes() + adj = dglsp.spmatrix(indices, shape=(n, n)) + deg_invsqrt = dglsp.diag(adj.sum(0)) ** -0.5 + return deg_invsqrt @ adj @ deg_invsqrt + + +def model_test(model, embedds): + model.eval() + with torch.no_grad(): + output = model(embedds) + pred = output.argmax(dim=-1) + test_mask, val_mask = model.test_mask, model.val_mask + loss = F.cross_entropy(output[val_mask], model.label[val_mask]) + accs = [] + for mask in [val_mask, test_mask]: + accs.append(float((pred[mask] == model.label[mask]).sum()/mask.sum())) + return loss.item(), accs[0], accs[1] + + +def inverse_graph_convolution(k, n, device): + adj = nx.adjacency_matrix(nx.random_regular_graph(k, n)).tocoo() + indices = torch.tensor([adj.row.tolist(), adj.col.tolist()]) + values = torch.tensor(adj.data.tolist()) + adj_sym_nor = dglsp.spmatrix(indices, values, adj.shape).coalesce().to(device) + I_N = dglsp.identity((n, n)).to(dtype=torch.int64) + # re-normalization trick + adj_sym_nor = dglsp.sub(2 * I_N, adj_sym_nor) / (k + 2) + return adj_sym_nor + + +def lazy_random_walk(adj, beta, I_N): + return dglsp.add((1 - beta) * I_N, beta * adj) From 715b5d2e6393e7c2314bdc47c6b17a97ca1ff3c5 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 4 Jan 2024 22:39:57 +0800 Subject: [PATCH 17/20] Update ggcm.py --- examples/pytorch/ggcm/ggcm.py | 61 +++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 28 deletions(-) diff --git a/examples/pytorch/ggcm/ggcm.py b/examples/pytorch/ggcm/ggcm.py index 56d7f8cbb918..f02107fdb3cd 100644 --- a/examples/pytorch/ggcm/ggcm.py +++ b/examples/pytorch/ggcm/ggcm.py @@ -2,49 +2,54 @@ import torch import torch.nn as nn -import torch.nn.functional as F -from utils import LinearNeuralNetwork, lazy_random_walk, inverse_graph_convolution +from utils import ( + inverse_graph_convolution, + lazy_random_walk, + symmetric_normalize_adjacency, +) class GGCM(nn.Module): - def __init__(self, graph, args): + def __init__(self): super(GGCM, self).__init__() - self.linear_nn = LinearNeuralNetwork(nfeat=graph.ndata["feat"].shape[1], - nclass=F.one_hot(graph.ndata["label"]).shape[1], - bias=True).to(args.device) - - self.label = graph.ndata["label"] - self.test_mask = graph.ndata["test_mask"] - self.train_mask = graph.ndata["train_mask"] - self.val_mask = graph.ndata["val_mask"] - - def forward(self, x): - return self.linear_nn(x) - - def update_embedds(features, A_hat, avg_edge_num, args): + + def get_embedding(self, graph, args): + # get the learned node embeddings beta = 1.0 beta_neg = 1.0 - K = args.degree - X = features.clone() + layer_num, alpha = args.layer_num, args.alpha + device = args.device + features = graph.ndata["feat"] + orig_feats = features.clone() temp_sum = torch.zeros_like(features) - I_N = dglsp.identity((features.shape[0], features.shape[0])) - for _ in range(K): - # lazy graph convolution (LGC) - lazy_A = lazy_random_walk(A_hat, beta, I_N).to(args.device) + node_num = features.shape[0] + I_N = dglsp.identity((node_num, node_num)) + A_hat = symmetric_normalize_adjacency(graph) + + # the inverser random adj + edge_num = int(args.negative_rate * graph.num_edges() / node_num) + # need n*k odd, for networkx + edge_num = ((edge_num + 1) // 2) * 2 + for _ in range(layer_num): # inverse graph convlution (IGC), lazy version - neg_A_hat = inverse_graph_convolution( - avg_edge_num, features.shape[0], args.device).to(args.device) - inv_lazy_A = lazy_random_walk(neg_A_hat, beta_neg, I_N).to(args.device) + neg_A_hat = inverse_graph_convolution(edge_num, node_num, I_N).to( + device + ) + inv_lazy_A = lazy_random_walk(neg_A_hat, beta_neg, I_N).to(device) inv_features = dglsp.spmm(inv_lazy_A, features) + + # lazy graph convolution (LGC) + lazy_A = lazy_random_walk(A_hat, beta, I_N).to(device) features = dglsp.spmm(lazy_A, features) # add for multi-scale version temp_sum += (features + inv_features) / 2.0 beta *= args.decline beta_neg *= args.decline_neg - - embedds = args.alpha * X + (1 - args.alpha) * (temp_sum / (K * 1.0)) - return embedds + embeds = alpha * orig_feats + (1 - alpha) * ( + temp_sum / (layer_num * 1.0) + ) + return embeds From 6edb1d2cc106d431aa5e6e75c7c797decfd29e9f Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 4 Jan 2024 22:49:36 +0800 Subject: [PATCH 18/20] Update train.py --- examples/pytorch/ggcm/train.py | 148 ++++++++++++++++++++------------- 1 file changed, 92 insertions(+), 56 deletions(-) diff --git a/examples/pytorch/ggcm/train.py b/examples/pytorch/ggcm/train.py index 5cebb314c7c6..0f8c87fae3f4 100644 --- a/examples/pytorch/ggcm/train.py +++ b/examples/pytorch/ggcm/train.py @@ -1,7 +1,7 @@ import argparse -import time import copy +import torch import torch.nn.functional as F import torch.optim as optim @@ -9,80 +9,116 @@ from dgl.data import CiteseerGraphDataset, CoraGraphDataset, PubmedGraphDataset from ggcm import GGCM -from utils import model_test, symmetric_normalize_adjacency +from utils import Classifier -def train(model, embedds, args): - # Evaluate embedding by classification with the given split setting - best_acc = -1 - optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd) +def evaluate(model, embeds, graph): + model.eval() + with torch.no_grad(): + output = model(embeds) + pred = output.argmax(dim=-1) + label = graph.ndata["label"] + val_mask, test_mask = graph.ndata["val_mask"], graph.ndata["test_mask"] + loss = F.cross_entropy(output[val_mask], label[val_mask]) + accs = [] + for mask in [val_mask, test_mask]: + accs.append(float((pred[mask] == label[mask]).sum() / mask.sum())) + return loss.item(), accs[0], accs[1] + + +def main(args): + # prepare data + transform = AddSelfLoop() + if args.dataset == "cora": + data = CoraGraphDataset(transform=transform) + elif args.dataset == "citeseer": + data = CiteseerGraphDataset(transform=transform) + elif args.dataset == "pubmed": + data = PubmedGraphDataset(transform=transform) + else: + raise ValueError("Unknown dataset: {}".format(args.dataset)) + graph = data[0].to(args.device) + features = graph.ndata["feat"] + train_mask = graph.ndata["train_mask"] + in_feats = features.shape[1] + n_classes = data.num_classes + + # get node embedding + ggcm = GGCM() + embeds = ggcm.get_embedding(graph, args) + + # create classifier model + classifier = Classifier(in_feats, n_classes) + optimizer = optim.Adam( + classifier.parameters(), lr=args.lr, weight_decay=args.wd + ) + + # train classifier + best_acc = -1 for i in range(args.epochs): - model.train() - output = model(embedds) - loss = F.cross_entropy(output[model.train_mask], model.label[model.train_mask]) + classifier.train() + output = classifier(embeds) + loss = F.cross_entropy( + output[train_mask], graph.ndata["label"][train_mask] + ) optimizer.zero_grad() loss.backward() optimizer.step() - loss_val, acc_val, acc_test = model_test(model, embedds) + loss_val, acc_val, acc_test = evaluate(classifier, embeds, graph) if acc_val > best_acc: - best_acc, best_model = acc_val, copy.deepcopy(model) + best_acc, best_model = acc_val, copy.deepcopy(classifier) + + print(f"{i+1} {loss_val:.4f} {acc_val:.3f} acc_test={acc_test:.3f}") - print(f'{i+1} {loss_val:.4f} {acc_val:.3f} acc_test={acc_test:.3f}') + _, _, acc_test = evaluate(best_model, embeds, graph) + print(f"Final test acc: {acc_test:.4f}") - loss_val, acc_val, acc_test = model_test(best_model, embedds) - return acc_test if __name__ == "__main__": - parser = argparse.ArgumentParser() + parser = argparse.ArgumentParser(description="GGCM") parser.add_argument( - '--dataset', + "--dataset", type=str, default="citeseer", - help='Dataset to use.', + choices=["citeseer", "cora", "pubmed"], + help="Dataset to use.", ) - parser.add_argument('--epochs', type=int, default=100) - parser.add_argument('--lr', type=float, default=0.2) - parser.add_argument('--degree', type=int, default=16) - parser.add_argument('--decline', type=float, default=1) - parser.add_argument('--negative_rate', type=float, default=20.0) - parser.add_argument('--wd', type=float, nargs='*', default=1e-3) - parser.add_argument('--alpha', type=float, default=0.12) - parser.add_argument('--decline_neg', type=float, default=1.0) + parser.add_argument("--decline", type=float, default=1, help="Decline.") + parser.add_argument("--alpha", type=float, default=0.15, help="Alpha.") parser.add_argument( - '--device', + "--epochs", type=int, default=100, help="Number of epochs to train." + ) + parser.add_argument( + "--lr", type=float, default=0.13, help="Initial learning rate." + ) + parser.add_argument( + "--layer_num", type=int, default=16, help="Degree of the approximation." + ) + parser.add_argument( + "--negative_rate", + type=float, + default=20.0, + help="Negative sampling rate for a negative graph.", + ) + parser.add_argument( + "--wd", + type=float, + nargs="*", + default=2e-3, + help="Weight decay (L2 loss on parameters).", + ) + parser.add_argument( + "--decline_neg", type=float, default=1.0, help="Decline negative." + ) + parser.add_argument( + "--device", type=str, - default='cpu', - choices=['cpu', 'cuda'], - help='device to use', + default="cpu", + choices=["cpu", "cuda"], + help="device to use", ) args, _ = parser.parse_known_args() - transform = (AddSelfLoop()) - if args.dataset == "cora": - num_edges = CoraGraphDataset()[0].num_edges() - data = CoraGraphDataset(transform=transform) - elif args.dataset == "citeseer": - num_edges = CiteseerGraphDataset()[0].num_edges() - data = CiteseerGraphDataset(transform=transform) - elif args.dataset == "pubmed": - num_edges = PubmedGraphDataset()[0].num_edges() - data = PubmedGraphDataset(transform=transform) - else: - raise ValueError("Unknown dataset: {}".format(args.dataset)) - - graph = data[0] - graph = graph.to(args.device) - features = graph.ndata["feat"] - adj = symmetric_normalize_adjacency(graph) - - avg_edge_num = int(args.negative_rate * num_edges / features.shape[0]) - avg_edge_num = ((avg_edge_num + 1) // 2) * 2 - - model = GGCM(graph, args).to(args.device) - start_time = time.time() - embedds = GGCM.update_embedds(features, adj, avg_edge_num, args) - test_acc = train(model, embedds, args) - print(f'Final test acc: {test_acc:.4f}') - print(f'Total Time: {time.time() - start_time:.4f}') + main(args) From 0f1641ab7c91fd3d604491275fde956225829fac Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 4 Jan 2024 22:50:23 +0800 Subject: [PATCH 19/20] Update utils.py --- examples/pytorch/ggcm/utils.py | 44 +++++++++++++--------------------- 1 file changed, 17 insertions(+), 27 deletions(-) diff --git a/examples/pytorch/ggcm/utils.py b/examples/pytorch/ggcm/utils.py index 6f9c31146896..877736b22654 100644 --- a/examples/pytorch/ggcm/utils.py +++ b/examples/pytorch/ggcm/utils.py @@ -1,17 +1,21 @@ +import dgl import dgl.sparse as dglsp +import networkx as nx import torch import torch.nn as nn -import torch.nn.functional as F -import networkx as nx -class LinearNeuralNetwork(nn.Module): - def __init__(self, nfeat, nclass, bias=True): - super(LinearNeuralNetwork, self).__init__() - self.W = nn.Linear(nfeat, nclass, bias=bias) +class Classifier(nn.Module): + def __init__(self, in_feats, n_classes): + super(Classifier, self).__init__() + self.fc = nn.Linear(in_feats, n_classes) + self.reset_parameters() + + def reset_parameters(self): + self.fc.reset_parameters() def forward(self, x): - return self.W(x) + return self.fc(x) def symmetric_normalize_adjacency(graph): @@ -23,27 +27,13 @@ def symmetric_normalize_adjacency(graph): return deg_invsqrt @ adj @ deg_invsqrt -def model_test(model, embedds): - model.eval() - with torch.no_grad(): - output = model(embedds) - pred = output.argmax(dim=-1) - test_mask, val_mask = model.test_mask, model.val_mask - loss = F.cross_entropy(output[val_mask], model.label[val_mask]) - accs = [] - for mask in [val_mask, test_mask]: - accs.append(float((pred[mask] == model.label[mask]).sum()/mask.sum())) - return loss.item(), accs[0], accs[1] - - -def inverse_graph_convolution(k, n, device): - adj = nx.adjacency_matrix(nx.random_regular_graph(k, n)).tocoo() - indices = torch.tensor([adj.row.tolist(), adj.col.tolist()]) - values = torch.tensor(adj.data.tolist()) - adj_sym_nor = dglsp.spmatrix(indices, values, adj.shape).coalesce().to(device) - I_N = dglsp.identity((n, n)).to(dtype=torch.int64) +def inverse_graph_convolution(edge_num, node_num, I_N): + graph = dgl.from_networkx(nx.random_regular_graph(edge_num, node_num)) + indices = torch.stack(graph.edges()) + adj = dglsp.spmatrix(indices, shape=(node_num, node_num)).coalesce() + # re-normalization trick - adj_sym_nor = dglsp.sub(2 * I_N, adj_sym_nor) / (k + 2) + adj_sym_nor = dglsp.sub(2 * I_N, adj) / (edge_num + 2) return adj_sym_nor From 87fb2e8133c7502acfd139f209e7459cb892e191 Mon Sep 17 00:00:00 2001 From: SinuoXu <147471236+SinuoXu@users.noreply.github.com> Date: Thu, 4 Jan 2024 22:52:27 +0800 Subject: [PATCH 20/20] Create README.md --- examples/pytorch/ggcm/README.md | 41 +++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 examples/pytorch/ggcm/README.md diff --git a/examples/pytorch/ggcm/README.md b/examples/pytorch/ggcm/README.md new file mode 100644 index 000000000000..1f105b0701b2 --- /dev/null +++ b/examples/pytorch/ggcm/README.md @@ -0,0 +1,41 @@ +# DGL Implementation of GGCM + +This DGL example implements the GGCM method from the paper: [From Cluster Assumption to Graph Convolution: Graph-based Semi-Supervised Learning Revisited](https://arxiv.org/abs/2309.13599). +The authors' original implementation can be found [here](https://github.com/zhengwang100/ogc_ggcm). + + +## Example Implementor + +This example was implemented by [Sinuo Xu](https://github.com/SinuoXu) when she was an undergraduate at SJTU. + + +## Dependencies +Python 3.11.5
+PyTorch 2.0.1
+DGL 1.1.2
+scikit-learn 1.3.1
+ + +## Dataset +The DGL's built-in Citeseer, Cora and Pubmed datasets, as follows: +| Dataset | #Nodes | #Edges | #Feats | #Classes | #Train Nodes | #Val Nodes | #Test Nodes | +| :-: | :-: | :-: | :-: | :-: | :-: | :-: | :-: | +| Citeseer | 3,327 | 9,228 | 3,703 | 6 | 120 | 500 | 1000 | +|Cora |2,708| 10,556| 1,433| 7 |140| 500| 1000| +|Pubmed| 19,717| 88,651| 500 |3| 60| 500| 1000| + + +## Usage +Run with the following (available dataset: "cora", "citeseer", "pubmed") +```bash +python train.py --dataset citeseer +python train.py --dataset cora --decline 1.0 --alpha 0.15 --epochs 100 --lr 0.2 --layer_num 16 --negative_rate 20.0 --wd 1e-5 --decline_neg 0.5 +python train.py --dataset pubmed --decline 1.0 --alpha 0.1 --epochs 100 --lr 0.2 --layer_num 16 --negative_rate 20.0 --wd 2e-5 --decline_neg 0.5 +``` + +## Performance + +|Dataset|citeseer|cora|pubmed| +| :-: | :-: | :-: | :-: | +| GGCM (DGL)|74.1|83.5|80.7| +|GGCM (reported) |74.2|83.6|80.8|