In [None]:
import argparse
import os.path as osp
import torch
from torch_geometric.datasets import PPI
from torch_geometric.data import DataLoader
from torch_geometric.nn import GATConv
from sklearn.metrics import f1_score
from networkx.readwrite import json_graph

In [None]:
class GeniePathLazy(torch.nn.Module):
    def __init__(self, in_dim, out_dim):
        super(GeniePathLazy, self).__init__()
        self.lin1 = torch.nn.Linear(in_dim, dim)
        self.breadths = torch.nn.ModuleList(
            [Breadth(dim, dim) for i in range(layer_num)])
        self.depths = torch.nn.ModuleList(
            [Depth(dim * 2, lstm_hidden) for i in range(layer_num)])
        self.lin2 = torch.nn.Linear(dim, out_dim)
    def forward(self, x, edge_index):
        x = self.lin1(x)
        h = torch.zeros(1, x.shape[0], lstm_hidden, device=x.device)
        c = torch.zeros(1, x.shape[0], lstm_hidden, device=x.device)
        h_tmps = []
        for i, l in enumerate(self.breadths):
            h_tmps.append(self.breadths[i](x, edge_index))
        x = x[None, :]
        for i, l in enumerate(self.depths):
            in_cat = torch.cat((h_tmps[i][None, :], x), -1)
            x, (h, c) = self.depths[i](in_cat, h, c)
        x = self.lin2(x[0])
        return x

class Breadth(torch.nn.Module):
    def __init__(self, in_dim, out_dim):
        super(Breadth, self).__init__()
        self.gatconv = GATConv(in_dim, out_dim, heads=1)
        
    def forward(self, x, edge_index):
        x = torch.tanh(self.gatconv(x, edge_index))
        return x

class Depth(torch.nn.Module):
    def __init__(self, in_dim, hidden):
        super(Depth, self).__init__()
        self.lstm = torch.nn.LSTM(in_dim, hidden, 1, bias=False)
    def forward(self, x, h, c):
        x, (h, c) = self.lstm(x, (h, c))
        return x, (h, c)

    
# class GeniePathLayer(torch.nn.Module):
#     def __init__(self, in_dim):
#         super(GeniePathLayer, self).__init__()
#         self.breadth_func = Breadth(in_dim, dim)
#         self.depth_func = Depth(dim, lstm_hidden)
#     def forward(self, x, edge_index, h, c):
#         x = self.breadth_func(x, edge_index)
#         x = x[None, :]
#         x, (h, c) = self.depth_func(x, h, c)
#         x = x[0]
#         return x, (h, c)

    
# class GeniePath(torch.nn.Module):
#     def __init__(self, in_dim, out_dim):
#         super(GeniePath, self).__init__()
#         self.lin1 = torch.nn.Linear(in_dim, dim)
#         self.gplayers = torch.nn.ModuleList(
#             [GeniePathLayer(dim) for i in range(layer_num)])
#         self.lin2 = torch.nn.Linear(dim, out_dim)

#     def forward(self, x, edge_index):
#         x = self.lin1(x)
#         h = torch.zeros(1, x.shape[0], lstm_hidden, device=x.device)
#         c = torch.zeros(1, x.shape[0], lstm_hidden, device=x.device)
#         for i, l in enumerate(self.gplayers):
#             x, (h, c) = self.gplayers[i](x, edge_index, h, c)
#         x = self.lin2(x)
#         return x


def train():
    model.train()
    total_loss = 0
    for data in train_loader:
        num_graphs = data.num_graphs
        data.batch = None
        data = data.to(device)
        optimizer.zero_grad()
        loss = loss_op(model(data.x, data.edge_index), data.y)
        total_loss += loss.item() * num_graphs
        loss.backward()
        optimizer.step()
    return total_loss / len(train_loader.dataset)


def test(loader):
    model.eval()
    ys, preds = [], []
    for data in loader:
        ys.append(data.y)
        with torch.no_grad():
            out = model(data.x.to(device), data.edge_index.to(device))
        preds.append((out > 0).float().cpu())
    y, pred = torch.cat(ys, dim=0).numpy(), torch.cat(preds, dim=0).numpy()
    return f1_score(y, pred, average='micro') if pred.sum() > 0 else 0

In [5]:
dataset = PPI(root='./')
train_dataset = PPI('./', split='train')
val_dataset = PPI('./', split='val')
test_dataset = PPI('./', split='test')
train_loader = DataLoader(train_dataset, batch_size=1, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)

dim = 256
lstm_hidden = 256
layer_num = 4

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = torch.device('cpu')
# kwargs = {'GeniePath': GeniePath, 'GeniePathLazy': GeniePathLazy}
model = GeniePathLazy(train_dataset.num_features,
                           train_dataset.num_classes).to(device)
loss_op = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.005)

In [10]:
for epoch in range(0, 101):
    if epoch%10==0:
        val_f1 = test(val_loader)
        print(epoch, val_f1)
    loss = train()
#     test_f1 = test(test_loader)
#     print('Epoch: {:02d}, Loss: {:.4f}, Val: {:.4f}, Test: {:.4f}'.format(
#         epoch, loss, val_f1, test_f1))

0 0.3541159254439488
10 0.7400444352825305
20 0.8568920961529084
30 0.8479616947831412
40 0.8741310604144098
50 0.9059154880146002
60 0.8861367618972613
70 0.8976634014932785
80 0.9120209360671339
90 0.915925093112699
100 0.8900664634801938


1. 拆解两个数据集 ppi + cora, 通过双要素重构(边，点) dataset; 

In [17]:
# dataset = PPI(root='./')
train_dataset = PPI('./', split='train')

Downloading https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/ppi.zip
Extracting ./ppi.zip
Processing...
Done!


In [73]:
train_dataset.__class__.__mro__

(torch_geometric.datasets.ppi.PPI,
 torch_geometric.data.in_memory_dataset.InMemoryDataset,
 torch_geometric.data.dataset.Dataset,
 torch.utils.data.dataset.Dataset,
 object)

In [18]:
import numpy as np

In [82]:
root = '/home/qibo/all_project/Graph反欺诈/PYG/raw'
feat_path = root + '/train_feats.npy'
label_path = root + '/train_labels.npy'
graph_path = root + '/train_graph.json'
graph_id = root + '/train_graph_id.npy'

In [46]:
feats = np.load(feat_path)
labels = np.load(label_path)
graph_ids = np.load(graph_id)
with open(graph_path, 'r') as f:
    graph = json.load(f)

process 逻辑；

1. 尽管存在多个子图，不管此维度，拿到全量 node_feats(44906, 50), labels(44906), edges(1271274), 同时拿到各点子图id(44906);
   
2. 对每个子图， 将子图到 edge(34085), x(1767, 50), y(1767) 放入Data中 实例化； 

In [None]:
class Dataset(torch.utils.data.Dataset):
    """
    Dataset base class for creating graph datasets.
    See `here <https://pytorch-geometric.readthedocs.io/en/latest/notes/
    create_dataset.html>`__ for the accompanying tutorial.
    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.Data` object and returns a transformed
            version. The data object will be transformed before every access.
            (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.Data` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
        pre_filter (callable, optional): A function that takes in an
            :obj:`torch_geometric.data.Data` object and returns a boolean
            value, indicating whether the data object should be included in the
            final dataset. (default: :obj:`None`)
    """

    @property
    def raw_file_names(self):
        r"""The name of the files to find in the :obj:`self.raw_dir` folder in
        order to skip the download."""
        raise NotImplementedError

    @property
    def processed_file_names(self):
        r"""The name of the files to find in the :obj:`self.processed_dir`
        folder in order to skip the processing."""
        raise NotImplementedError

    def download(self):
        r"""Downloads the dataset to the :obj:`self.raw_dir` folder."""
        raise NotImplementedError

    def process(self):
        r"""Processes the dataset to the :obj:`self.processed_dir` folder."""
        raise NotImplementedError

    def __len__(self):
        r"""The number of examples in the dataset."""
        raise NotImplementedError

    def get(self, idx):
        r"""Gets the data object at index :obj:`idx`."""
        raise NotImplementedError

    def __init__(self,
                 root,
                 transform=None,
                 pre_transform=None,
                 pre_filter=None):
        super(Dataset, self).__init__()

        self.root = osp.expanduser(osp.normpath(root))
        self.raw_dir = osp.join(self.root, 'raw')
        self.processed_dir = osp.join(self.root, 'processed')
        self.transform = transform
        self.pre_transform = pre_transform
        self.pre_filter = pre_filter
        self._download()
        self._process()

    @property
    def num_node_features(self):
        r"""Returns the number of features per node in the dataset."""
        return self[0].num_node_features

    @property
    def num_features(self):
        r"""Alias for :py:attr:`~num_node_features`."""
        return self.num_node_features

    @property
    def num_edge_features(self):
        r"""Returns the number of features per edge in the dataset."""
        return self[0].num_edge_features

    @property
    def raw_paths(self):
        r"""The filepaths to find in order to skip the download."""
        files = to_list(self.raw_file_names)
        return [osp.join(self.raw_dir, f) for f in files]

    @property
    def processed_paths(self):
        r"""The filepaths to find in the :obj:`self.processed_dir`
        folder in order to skip the processing."""
        files = to_list(self.processed_file_names)
        return [osp.join(self.processed_dir, f) for f in files]

    def _download(self):
        if files_exist(self.raw_paths):  # pragma: no cover
            return
        makedirs(self.raw_dir)
        self.download()

    def _process(self):
        if files_exist(self.processed_paths):  # pragma: no cover
            return
        print('Processing...')
        makedirs(self.processed_dir)
        self.process()
        print('Done!')

    def __getitem__(self, idx):  # pragma: no cover
        r"""Gets the data object at index :obj:`idx` and transforms it (in case
        a :obj:`self.transform` is given)."""
        data = self.get(idx)
        data = data if self.transform is None else self.transform(data)
        return data

    def __repr__(self):  # pragma: no cover
        return '{}({})'.format(self.__class__.__name__, len(self))

In [None]:
class InMemoryDataset(Dataset):
    r"""Dataset base class for creating graph datasets which fit completely
    into memory.
    See `here <https://pytorch-geometric.readthedocs.io/en/latest/notes/
    create_dataset.html#creating-in-memory-datasets>`__ for the accompanying
    tutorial.
    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.Data` object and returns a transformed
            version. The data object will be transformed before every access.
            (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.Data` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
        pre_filter (callable, optional): A function that takes in an
            :obj:`torch_geometric.data.Data` object and returns a boolean
            value, indicating whether the data object should be included in the
            final dataset. (default: :obj:`None`)
    """

    @property
    def raw_file_names(self):
        r"""The name of the files to find in the :obj:`self.raw_dir` folder in
        order to skip the download."""
        raise NotImplementedError

    @property
    def processed_file_names(self):
        r"""The name of the files to find in the :obj:`self.processed_dir`
        folder in order to skip the processing."""
        raise NotImplementedError

    def download(self):
        r"""Downloads the dataset to the :obj:`self.raw_dir` folder."""
        raise NotImplementedError

    def process(self):
        r"""Processes the dataset to the :obj:`self.processed_dir` folder."""
        raise NotImplementedError

    def __init__(self, root, transform=None, pre_transform=None,
                 pre_filter=None):
        super(InMemoryDataset, self).__init__(root, transform, pre_transform,
                                              pre_filter)
        self.data, self.slices = None, None

    @property
    def num_classes(self):
        r"""The number of classes in the dataset."""
        data = self.data
        return data.y.max().item() + 1 if data.y.dim() == 1 else data.y.size(1)

    def __len__(self):
        return self.slices[list(self.slices.keys())[0]].size(0) - 1

    def __getitem__(self, idx):
        r"""Gets the data object at index :obj:`idx` and transforms it (in case
        a :obj:`self.transform` is given).
        Returns a data object, if :obj:`idx` is a scalar, and a new dataset in
        case :obj:`idx` is a slicing object, *e.g.*, :obj:`[2:5]`, a LongTensor
        or a ByteTensor."""
        if isinstance(idx, int):
            data = self.get(idx)
            data = data if self.transform is None else self.transform(data)
            return data
        elif isinstance(idx, slice):
            return self.__indexing__(range(*idx.indices(len(self))))
        elif torch.is_tensor(idx) and idx.dtype == torch.long:
            return self.__indexing__(idx)
        elif torch.is_tensor(idx) and idx.dtype == torch.uint8:
            return self.__indexing__(idx.nonzero())

        raise IndexError(
            'Only integers, slices (`:`) and long or byte tensors are valid '
            'indices (got {}).'.format(type(idx).__name__))

    def shuffle(self, return_perm=False):
        r"""Randomly shuffles the examples in the dataset.
        Args:
            return_perm (bool, optional): If set to :obj:`True`, will
                additionally return the random permutation used to shuffle the
                dataset. (default: :obj:`False`)
        """
        perm = torch.randperm(len(self))
        dataset = self.__indexing__(perm)
        return (dataset, perm) if return_perm is True else dataset

    def get(self, idx):
        data = self.data.__class__()

        if hasattr(self.data, '__num_nodes__'):
            data.num_nodes = self.data.__num_nodes__[idx]

        for key in self.data.keys:
            item, slices = self.data[key], self.slices[key]
            s = list(repeat(slice(None), item.dim()))
            s[self.data.__cat_dim__(key, item)] = slice(
                slices[idx], slices[idx + 1])
            data[key] = item[s]
        return data

    def __indexing__(self, index):
        copy = self.__class__.__new__(self.__class__)
        copy.__dict__ = self.__dict__.copy()
        copy.data, copy.slices = self.collate([self.get(i) for i in index])
        return copy

    def collate(self, data_list):
        r"""Collates a python list of data objects to the internal storage
        format of :class:`torch_geometric.data.InMemoryDataset`."""
        keys = data_list[0].keys
        data = data_list[0].__class__()
        for key in keys:
            data[key] = []
        slices = {key: [0] for key in keys}
        for item, key in product(data_list, keys):
            data[key].append(item[key])
            if torch.is_tensor(item[key]):
                s = slices[key][-1] + item[key].size(
                    item.__cat_dim__(key, item[key]))
            elif isinstance(item[key], int) or isinstance(item[key], float):
                s = slices[key][-1] + 1
            else:
                raise ValueError('Unsupported attribute type')
            slices[key].append(s)
        if hasattr(data_list[0], '__num_nodes__'):
            data.__num_nodes__ = []
            for item in data_list:
                data.__num_nodes__.append(item.num_nodes)
        for key in keys:
            if torch.is_tensor(data_list[0][key]):
                data[key] = torch.cat(
                    data[key], dim=data.__cat_dim__(key, data_list[0][key]))
            else:
                data[key] = torch.tensor(data[key])
            slices[key] = torch.tensor(slices[key], dtype=torch.long)
        return data, slices

In [None]:
class PPI(InMemoryDataset):
    url = 'https://s3.us-east-2.amazonaws.com/dgl.ai/dataset/ppi.zip'
    def __init__(self,
                 root,
                 split='train',
                 transform=None,
                 pre_transform=None,
                 pre_filter=None):
        assert split in ['train', 'val', 'test']
        super(PPI, self).__init__(root, transform, pre_transform, pre_filter)
        if split == 'train':
            self.data, self.slices = torch.load(self.processed_paths[0])
        elif split == 'val':
            self.data, self.slices = torch.load(self.processed_paths[1])
        elif split == 'test':
            self.data, self.slices = torch.load(self.processed_paths[2])

    @property
    def raw_file_names(self):
        splits = ['train', 'valid', 'test']
        files = ['feats.npy', 'graph_id.npy', 'graph.json', 'labels.npy']
        return ['{}_{}'.format(s, f) for s, f in product(splits, files)]

    @property
    def processed_file_names(self):
        return ['train.pt', 'val.pt', 'test.pt']

    def download(self):
        path = download_url(self.url, self.root)
        extract_zip(path, self.raw_dir)
        os.unlink(path)

    def process(self):
        for s, split in enumerate(['train', 'valid', 'test']):
            path = osp.join(self.raw_dir, '{}_graph.json').format(split)
            with open(path, 'r') as f:
                G = nx.DiGraph(json_graph.node_link_graph(json.load(f)))
            x = np.load(osp.join(self.raw_dir, '{}_feats.npy').format(split))
            x = torch.from_numpy(x).to(torch.float)
            y = np.load(osp.join(self.raw_dir, '{}_labels.npy').format(split))
            y = torch.from_numpy(y).to(torch.float)
            data_list = []
            path = osp.join(self.raw_dir, '{}_graph_id.npy').format(split)
            idx = torch.from_numpy(np.load(path)).to(torch.long)
            idx = idx - idx.min()
            for i in range(idx.max().item() + 1):
                mask = idx == i
                G_s = G.subgraph(mask.nonzero().view(-1).tolist())
                edge_index = torch.tensor(list(G_s.edges)).t().contiguous()
                edge_index = edge_index - edge_index.min()
                edge_index, _ = remove_self_loops(edge_index)
                data = Data(edge_index=edge_index, x=x[mask], y=y[mask])
                if self.pre_filter is not None and not self.pre_filter(data):
                    continue
                if self.pre_transform is not None:
                    data = self.pre_transform(data)
                data_list.append(data)
            torch.save(self.collate(data_list), self.processed_paths[s])

# Cora 数据集

In [16]:
root = '/home/qibo/all_project/Graph反欺诈/PYG/raw'
feat_path = root + '/cora.npz'


In [20]:
import numpy as np

In [26]:
import scipy.sparse as sp

In [33]:
with np.load(feat_path) as f:
    a = parse_npz(f)

In [35]:
a.shape

torch.Size([19793, 8710])

In [12]:
from torch_geometric.data import InMemoryDataset
class CoraFull(InMemoryDataset):
    r"""The full Cora citation network dataset from the
    `"Deep Gaussian Embedding of Graphs: Unsupervised Inductive Learning via
    Ranking" <https://arxiv.org/abs/1707.03815>`_ paper.
    Nodes represent documents and edges represent citation links.
    Args:
        root (string): Root directory where the dataset should be saved.
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.Data` object and returns a transformed
            version. The data object will be transformed before every access.
            (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.Data` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    """

    url = 'https://github.com/abojchevski/graph2gauss/raw/master/data/cora.npz'

    def __init__(self, root, transform=None, pre_transform=None):
        super(CoraFull, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return 'cora.npz'

    @property
    def processed_file_names(self):
        return 'data.pt'

    def download(self):
        download_url(self.url, self.raw_dir)

    def process(self):
        data = read_npz(self.raw_paths[0])
        data = data if self.pre_transform is None else self.pre_transform(data)
        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])

    def __repr__(self):
        return '{}()'.format(self.__class__.__name__)

In [15]:
from itertools import repeat
from torch_geometric.datasets import CoraFull
dataset = CoraFull(root='./')
# data = dataset[0]

Downloading https://github.com/abojchevski/graph2gauss/raw/master/data/cora.npz
Processing...
Done!


In [None]:
import torch
from torch_geometric.data import InMemoryDataset, download_url
from torch_geometric.read import read_planetoid_data


class Planetoid(InMemoryDataset):
    r"""The citation network datasets "Cora", "CiteSeer" and "PubMed" from the
    `"Revisiting Semi-Supervised Learning with Graph Embeddings"
    <https://arxiv.org/abs/1603.08861>`_ paper.
    Nodes represent documents and edges represent citation links.
    Training, validation and test splits are given by binary masks.

    Args:
        root (string): Root directory where the dataset should be saved.
        name (string): The name of the dataset (:obj:`"Cora"`,
            :obj:`"CiteSeer"`, :obj:`"PubMed"`).
        transform (callable, optional): A function/transform that takes in an
            :obj:`torch_geometric.data.Data` object and returns a transformed
            version. The data object will be transformed before every access.
            (default: :obj:`None`)
        pre_transform (callable, optional): A function/transform that takes in
            an :obj:`torch_geometric.data.Data` object and returns a
            transformed version. The data object will be transformed before
            being saved to disk. (default: :obj:`None`)
    """

    url = 'https://github.com/kimiyoung/planetoid/raw/master/data'

    def __init__(self, root, name, transform=None, pre_transform=None):
        self.name = name
        super(Planetoid, self).__init__(root, transform, pre_transform)
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        names = ['x', 'tx', 'allx', 'y', 'ty', 'ally', 'graph', 'test.index']
        return ['ind.{}.{}'.format(self.name.lower(), name) for name in names]

    @property
    def processed_file_names(self):
        return 'data.pt'

    def download(self):
        for name in self.raw_file_names:
            download_url('{}/{}'.format(self.url, name), self.raw_dir)

    def process(self):
        data = read_planetoid_data(self.raw_dir, self.name)
        data = data if self.pre_transform is None else self.pre_transform(data)
        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])

    def __repr__(self):
        return '{}()'.format(self.name)

cora 数据流过程：
1. x [140, 1433]
2. y [140, 7]
3. allx [1708, 1433]
4. ally [1708, 7]
5. tx [1000, 1433];  # 代表大量孤立点;
6. ty [1000, 7];  # 代表大量孤立点;
7. graph [2708, ~len] --> edge_index [2, 10858];
8. test_index [1000]


process(self)
1. train_idx [140], val_idx [500] , test_idx [1000]
2. x被重新赋值； x = concat(allx, tx)  [2708]
3. y被重新赋值； y = concat(ally, ty)  [2708]
4. train_mask， val_mask, test_mask 分别就是一个binary vector;
5. 将 graph --> edge_indx
6. 三元组打包放到Data对象[x, y, edge_index]
7. 然后再加上 train_mask， val_mask, test_mask.

In [1]:
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, ChebConv  # noqa
import pickle

In [5]:
path = '/home/qibo/all_project/Graph反欺诈/PYG/raw/' + 'ind.cora.'
x_path = path + 'x'
y_path = path + 'y'
all_x_path = path + 'allx'
all_y_path = path + 'ally'
tx_path = path + 'tx'
ty_path = path + 'ty'
graph_path = path + 'graph'
test_path = path + 'test.index'

with open(x_path, 'rb') as f:
    x = pickle.load(f, encoding='latin1')    

with open(y_path, 'rb') as f:
    y = pickle.load(f, encoding='latin1')    

with open(all_x_path, 'rb') as f:
    allx = pickle.load(f, encoding='latin1')    

with open(all_y_path, 'rb') as f:
    ally = pickle.load(f, encoding='latin1')    

with open(tx_path, 'rb') as f:
    tx = pickle.load(f, encoding='latin1')    

with open(ty_path, 'rb') as f:
    ty = pickle.load(f, encoding='latin1')    

with open(graph_path, 'rb') as f:
    graph = pickle.load(f, encoding='latin1')    


def parse_txt_array(src, sep=None, start=0, end=None, dtype=None, device=None):
    src = [[float(x) for x in line.split(sep)[start:end]] for line in src]
    src = torch.tensor(src, dtype=dtype).squeeze()
    return src
def read_txt_array(path, sep=None, start=0, end=None, dtype=None, device=None):
    with open(path, 'r') as f:
        src = f.read().split('\n')[:-1]
    return parse_txt_array(src, sep, start, end, dtype, device)

test_idx = read_txt_array(test_path, dtype=torch.long)


In [9]:
type(x)

scipy.sparse.csr.csr_matrix

In [2]:
from itertools import repeat
from torch_geometric.datasets import Planetoid
dataset = Planetoid(root='./', name='Cora')
data = dataset[0]

In [147]:
Planetoid.__mro__

(torch_geometric.datasets.planetoid.Planetoid,
 torch_geometric.data.in_memory_dataset.InMemoryDataset,
 torch_geometric.data.dataset.Dataset,
 torch.utils.data.dataset.Dataset,
 object)

In [3]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16, cached=True)
        self.conv2 = GCNConv(16, dataset.num_classes, cached=True)
        # self.conv1 = ChebConv(data.num_features, 16, K=2)
        # self.conv2 = ChebConv(16, data.num_features, K=2)

    def forward(self):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()


def test():
    model.eval()
    logits, accs = model(), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        accs.append(acc)
    return accs


best_val_acc = test_acc = 0
for epoch in range(1, 201):
    train()
    train_acc, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
    print(log.format(epoch, train_acc, best_val_acc, test_acc))

Epoch: 001, Train: 0.6071, Val: 0.3980, Test: 0.3990
Epoch: 002, Train: 0.7143, Val: 0.4580, Test: 0.4600
Epoch: 003, Train: 0.7429, Val: 0.4900, Test: 0.4880
Epoch: 004, Train: 0.7714, Val: 0.4960, Test: 0.4970
Epoch: 005, Train: 0.7714, Val: 0.5060, Test: 0.4910
Epoch: 006, Train: 0.8000, Val: 0.5060, Test: 0.4910
Epoch: 007, Train: 0.8357, Val: 0.5260, Test: 0.5170
Epoch: 008, Train: 0.8643, Val: 0.5480, Test: 0.5540
Epoch: 009, Train: 0.8857, Val: 0.5820, Test: 0.5770
Epoch: 010, Train: 0.9071, Val: 0.6160, Test: 0.6180
Epoch: 011, Train: 0.9500, Val: 0.6260, Test: 0.6540
Epoch: 012, Train: 0.9643, Val: 0.6660, Test: 0.6780
Epoch: 013, Train: 0.9643, Val: 0.6960, Test: 0.7130
Epoch: 014, Train: 0.9857, Val: 0.7080, Test: 0.7300
Epoch: 015, Train: 0.9929, Val: 0.7140, Test: 0.7430
Epoch: 016, Train: 0.9929, Val: 0.7340, Test: 0.7550
Epoch: 017, Train: 0.9929, Val: 0.7420, Test: 0.7640
Epoch: 018, Train: 0.9929, Val: 0.7440, Test: 0.7660
Epoch: 019, Train: 0.9929, Val: 0.7540, Test: 

Epoch: 190, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 191, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 192, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 193, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 194, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 195, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 196, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 197, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 198, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 199, Train: 1.0000, Val: 0.7820, Test: 0.7880
Epoch: 200, Train: 1.0000, Val: 0.7820, Test: 0.7880
