In [1]:
import numpy as np

In [2]:
>>> from scipy.sparse import csgraph
>>> G = np.arange(5) * np.arange(5)[:, np.newaxis]
G

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12],
       [ 0,  4,  8, 12, 16]])

In [15]:
D = [2,3,2,3,3,1]
D_ori = np.diag(D)

D_neg_1_2 = np.array(D, dtype=float)**(-1)
D_neg_1_2 = np.diag(D_neg_1_2)

In [16]:
A =   [[0,1,0,0,1,0],
       [1,0,1,0,1,0],
       [0,1,0,1,0,0],
       [0,0,1,0,1,1],
       [1,1,0,1,0,0],
       [0,0,0,1,0,0]]

A = np.array(A, dtype = float)

In [17]:
D_neg_1_2@A

array([[0.        , 0.5       , 0.        , 0.        , 0.5       ,
        0.        ],
       [0.33333333, 0.        , 0.33333333, 0.        , 0.33333333,
        0.        ],
       [0.        , 0.5       , 0.        , 0.5       , 0.        ,
        0.        ],
       [0.        , 0.        , 0.33333333, 0.        , 0.33333333,
        0.33333333],
       [0.33333333, 0.33333333, 0.        , 0.33333333, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , 1.        , 0.        ,
        0.        ]])

In [284]:
L1 = D_ori - A
L1

array([[ 2., -1.,  0.,  0., -1.,  0.],
       [-1.,  3., -1.,  0., -1.,  0.],
       [ 0., -1.,  2., -1.,  0.,  0.],
       [ 0.,  0., -1.,  3., -1., -1.],
       [-1., -1.,  0., -1.,  3.,  0.],
       [ 0.,  0.,  0., -1.,  0.,  1.]])

In [289]:
csgraph.laplacian(A, normed=False)

array([[ 2., -1., -0., -0., -1., -0.],
       [-1.,  3., -1., -0., -1., -0.],
       [-0., -1.,  2., -1., -0., -0.],
       [-0., -0., -1.,  3., -1., -1.],
       [-1., -1., -0., -1.,  3., -0.],
       [-0., -0., -0., -1., -0.,  1.]])

In [8]:
from torch_scatter.utils.gen import gen
import torch_scatter
import sys
import inspect
import torch
import torch
from torch.nn import Parameter
from torch_geometric.utils import add_remaining_self_loops
from torch_geometric.nn import GATConv

def scatter_(name, src, index, dim_size=None):
    assert name in ['add', 'mean', 'max']

    op = getattr(torch_scatter, 'scatter_{}'.format(name))
    fill_value = -1e9 if name == 'max' else 0
#     print('---------scatter_')
#     print('src:{}\nindex:{}\n'.format(src.size(), index))
    
    out = op(src, index, 0, None, dim_size, fill_value)
#     print('out:{}'.format(out.size()))
#     print('---------scatter_-----------')
    if isinstance(out, tuple):
        out = out[0]
    
    if name == 'max':
        out[out == fill_value] = 0

    return out

def scatter_add(src, index, dim=-1, out=None, dim_size=None, fill_value=0):

    src, out, index, dim = gen(src, index, dim, out, dim_size, fill_value)
    res = out.scatter_add_(dim, index, src)
#     print(dim,'hhhhh')
    return res

def glorot(tensor):
    if tensor is not None:
        stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-1)))
        tensor.data.uniform_(-stdv, stdv)

def zeros(tensor):
    if tensor is not None:
        tensor.data.fill_(0)
        

special_args = [
    'edge_index', 'edge_index_i', 'edge_index_j', 'size', 'size_i', 'size_j'
]
__size_error_msg__ = ('All tensors which should get mapped to the same source '
                      'or target nodes must be of same size in dimension 0.')

is_python2 = sys.version_info[0] < 3
getargspec = inspect.getargspec if is_python2 else inspect.getfullargspec


class MessagePassing(torch.nn.Module):
    def __init__(self, aggr='add', flow='source_to_target'):
        super(MessagePassing, self).__init__()

        self.aggr = aggr
        assert self.aggr in ['add', 'mean', 'max']

        self.flow = flow
        assert self.flow in ['source_to_target', 'target_to_source']

        self.__message_args__ = getargspec(self.message)[0][1:]
#         print('__message_args__:{}'.format(self.__message_args__))
        self.__special_args__ = [(i, arg)
                                 for i, arg in enumerate(self.__message_args__)
                                 if arg in special_args]
#         print('__special_args__:{}'.format(self.__special_args__))
        self.__message_args__ = [
            arg for arg in self.__message_args__ if arg not in special_args
        ]
#         print('__message_args__:{}'.format(self.__message_args__))
        self.__update_args__ = getargspec(self.update)[0][2:]
#         print('__update_args__:{}'.format(self.__update_args__))
#         print('------------------------------')

    def propagate(self, edge_index, size=None, **kwargs):

        size = [None, None] if size is None else list(size)
#         print(size)
        assert len(size) == 2

        i, j = (0, 1) if self.flow == 'target_to_source' else (1, 0)
        ij = {"_i": i, "_j": j}

        message_args = []
        for arg in self.__message_args__:
            if arg[-2:] in ij.keys():
                tmp = kwargs.get(arg[:-2], None)
                if tmp is None:  # pragma: no cover
                    message_args.append(tmp)
                else:
                    idx = ij[arg[-2:]]
                    if isinstance(tmp, tuple) or isinstance(tmp, list):
                        assert len(tmp) == 2
                        if tmp[1 - idx] is not None:
                            if size[1 - idx] is None:
                                size[1 - idx] = tmp[1 - idx].size(0)
                            if size[1 - idx] != tmp[1 - idx].size(0):
                                raise ValueError(__size_error_msg__)
                        tmp = tmp[idx]
                    if size[idx] is None:
                        size[idx] = tmp.size(0)
                    if size[idx] != tmp.size(0):
                        raise ValueError(__size_error_msg__)
#                     print('---------------')
#                     print('tmp_size:{}'.format(tmp.size()))
#                     print('idx:{}'.format(idx))
                    tmp = torch.index_select(tmp, 0, edge_index[idx])
#                     print('idex_selct:{}'.format(edge_index[idx]))
#                     print('tmp_size:{}'.format(tmp.size()))
#                     print('~~~~~~~~~~~~~~~')
                    message_args.append(tmp)
            else:
                message_args.append(kwargs.get(arg, None))
        size[0] = size[1] if size[0] is None else size[0]
        size[1] = size[0] if size[1] is None else size[1]
#         print('size:{}'.format(size))
        kwargs['edge_index'] = edge_index
        kwargs['size'] = size
#         print('.keys():{}'.format(kwargs.keys()))
#         print('__special_args__:{}'.format(self.__special_args__))
        for (idx, arg) in self.__special_args__:
            if arg[-2:] in ij.keys():
                message_args.insert(idx, kwargs[arg[:-2]][ij[arg[-2:]]])
            else:
                message_args.insert(idx, kwargs[arg])
                
#         print('Passing. with message_args:{}'.format(len(message_args)))
#         print('args:1 {}'.format(message_args[0].size()))
#         print('args:2 {}'.format(message_args[1].size()))

        update_args = [kwargs[arg] for arg in self.__update_args__]
#         print('message_args:{}'.format(len(message_args)))
        out = self.message(*message_args)
#         print('---out---1----{}'.format(out.size()))
#         print('self.aggr:{}'.format(self.aggr))
#         print('edge_Idx_I_size:{}\nitself:{}'.format(len(edge_index[i]),edge_index[i]))
#         print('dim_size:{}'.format(size[i]))
        
        out = scatter_(self.aggr, out, edge_index[i], dim_size=size[i])
#         print('---out---2----{}'.format(out.size())) 
        out = self.update(out, *update_args)
#         print('---out---3----{}'.format(out.size()))
        return out

    def message(self, x_j):  # pragma: no cover
        r"""Constructs messages in analogy to :math:`\phi_{\mathbf{\Theta}}`
        for each edge in :math:`(i,j) \in \mathcal{E}`.
        Can take any argument which was initially passed to :meth:`propagate`.
        In addition, features can be lifted to the source node :math:`i` and
        target node :math:`j` by appending :obj:`_i` or :obj:`_j` to the
        variable name, *.e.g.* :obj:`x_i` and :obj:`x_j`."""

        return x_j

    def update(self, aggr_out):  # pragma: no cover
        r"""Updates node embeddings in analogy to
        :math:`\gamma_{\mathbf{\Theta}}` for each node
        :math:`i \in \mathcal{V}`.
        Takes in the output of aggregation as first argument and any argument
        which was initially passed to :meth:`propagate`."""

        return aggr_out
    

class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels, improved=False, cached=False,
                 bias=True, **kwargs):
        super(GCNConv, self).__init__(aggr='add', **kwargs)

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.improved = improved
        self.cached = cached

        self.weight = Parameter(torch.Tensor(in_channels, out_channels))

        if bias:
            self.bias = Parameter(torch.Tensor(out_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        glorot(self.weight)
        zeros(self.bias)
        self.cached_result = None
        self.cached_num_edges = None


    @staticmethod
    def norm(edge_index, num_nodes, edge_weight=None, improved=False,
             dtype=None):
        if edge_weight is None:
            edge_weight = torch.ones((edge_index.size(1), ), dtype=dtype,
                                     device=edge_index.device)

        fill_value = 1 if not improved else 2
        edge_index, edge_weight = add_remaining_self_loops(
            edge_index, edge_weight, fill_value, num_nodes)

        row, col = edge_index
        print('pre_scatter:edgeweight{}\nrow{}'.format(edge_weight.size(), row.size()))
        deg = scatter_add(edge_weight, row, dim=0, dim_size=num_nodes)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        
        print('deg_inv:{}'.format(deg_inv_sqrt.size()))
        return edge_index, deg_inv_sqrt[row] * edge_weight * deg_inv_sqrt[col]


    def forward(self, x, edge_index, edge_weight=None):
        x = torch.matmul(x, self.weight)

        if self.cached and self.cached_result is not None:
            if edge_index.size(1) != self.cached_num_edges:
                raise RuntimeError(
                    'Cached {} number of edges, but found {}. Please '
                    'disable the caching behavior of this layer by removing '
                    'the `cached=True` argument in its constructor.'.format(
                        self.cached_num_edges, edge_index.size(1)))

        if not self.cached or self.cached_result is None:
            self.cached_num_edges = edge_index.size(1)
            edge_index, norm = self.norm(edge_index, x.size(0), edge_weight,
                                         self.improved, x.dtype)
            self.cached_result = edge_index, norm

        edge_index, norm = self.cached_result
        return self.propagate(edge_index, x=x, norm=norm)


    def message(self, x_j, norm):
#         print('x_j:{}'.format(x_j.size()))
#         print('norm:{}'.format(norm.size()))
        return norm.view(-1, 1) * x_j

    def update(self, aggr_out):
        if self.bias is not None:
            aggr_out = aggr_out + self.bias
        return aggr_out

    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
                                   self.out_channels)

In [9]:
import os.path as osp
import torch
import torch.nn.functional as F
from torch_geometric.datasets import Planetoid
import torch_geometric.transforms as T

dataset = 'Cora'

dataset = Planetoid('/home/qibo/all_project/Graph反欺诈/', dataset, T.TargetIndegree())
data = dataset[0]

data.train_mask = torch.zeros(data.num_nodes, dtype=torch.uint8)
data.train_mask[:data.num_nodes - 1000] = 1
data.val_mask = None
data.test_mask = torch.zeros(data.num_nodes, dtype=torch.uint8)
data.test_mask[data.num_nodes - 500:] = 1

In [10]:
temp = data.edge_index

app = torch.tensor([[2707],[8888]], dtype=torch.long)

new_edge = torch.cat([temp, app], dim=-1)

data.edge_index = new_edge

In [11]:
data.edge_index

tensor([[   0,    0,    0,  ..., 2707, 2707, 2707],
        [ 633, 1862, 2582,  ..., 1473, 2706, 8888]])

In [14]:


import torch.nn.functional as F

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16)
        self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index)

        return F.log_softmax(x, dim=1)
    
# class Net(torch.nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = GATConv(dataset.num_features, 8, heads=8, dropout=0.6)
#         # On the Pubmed dataset, use heads=8 in conv2.
#         self.conv2 = GATConv(
#             8 * 8, dataset.num_classes, heads=1, concat=True, dropout=0.6)

#     def forward(self, data):
#         x = F.dropout(data.x, p=0.6, training=self.training)
#         x = F.elu(self.conv1(x, data.edge_index))
#         x = F.dropout(x, p=0.6, training=self.training)
#         x = self.conv2(x, data.edge_index)
#         return F.log_softmax(x, dim=1)
    
# class Net(torch.nn.Module):
#     def __init__(self):
#         super(Net, self).__init__()
#         self.conv1 = SplineConv(dataset.num_features, 16, dim=1, kernel_size=2)
#         self.conv2 = SplineConv(16, dataset.num_classes, dim=1, kernel_size=2)

#     def forward(self):
#         x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
#         x = F.dropout(x, training=self.training)
#         x = F.elu(self.conv1(x, edge_index, edge_attr))
#         x = F.dropout(x, training=self.training)
#         x = self.conv2(x, edge_index, edge_attr)
#         return F.log_softmax(x, dim=1)
    
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')
model = Net().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

model.train()
for epoch in range(2):
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    
model.eval()
_, pred = model(data).max(dim=1)
correct = float (pred[data.test_mask].eq(data.y[data.test_mask]).sum().item())
acc = correct / data.test_mask.sum().item()
print('Accuracy: {:.4f}'.format(acc))


RuntimeError: index out of range at /pytorch/aten/src/TH/generic/THTensorEvenMoreMath.cpp:193

In [None]:
len(set(data.edge_index[0]))

In [342]:
2708 + 10557

13265

In [341]:
data

Data(edge_attr=[10556, 1], edge_index=[2, 10557], test_mask=[2708], train_mask=[2708], x=[2708, 1433], y=[2708])

In [344]:
import json
import glob
import numpy as np
import scipy
import pandas as pd
from tqdm import tqdm
import time
from scipy.sparse import csr_matrix
from scipy import sparse
import pickle
import torch
from torch_geometric.data import Data
import torch
import torch.nn.functional as F
import torch_geometric.transforms as T
from torch_geometric.nn import SplineConv, GCNConv
from torch_geometric.data import InMemoryDataset, download_url


In [2]:
def __get_mask_idx(labels_df, train_valid_split=0.8):
    train_idx = labels_df[labels_df.fundtime<'2019-06-03 00:00:00'].username.tolist()
    test_idx = labels_df[labels_df.fundtime>='2019-06-04 00:00:00'].username.tolist()
    split_idx = int(len(train_idx)*train_valid_split)
    train = train_idx[:split_idx]
    valid = train_idx[split_idx:]
    print(len(train), len(valid), len(test_idx))
    return train, valid, test_idx

def _get_final_label(labels, new_x_phone2idx):
    
    label_dict = labels[['username', 'default_now']].set_index('username').to_dict()['default_now']
    train_phone, valid_phone, test_phone = __get_mask_idx(labels)
    fake_labels = np.zeros(len(new_x_phone2idx))-1

    for phone in label_dict:
        idx = new_x_phone2idx[str(phone)]
        fake_labels[idx] = label_dict[int(phone)]   

    train_mask = [new_x_phone2idx[str(phone)] for phone in train_phone]
    fake_train_labels = np.zeros(len(new_x_phone2idx))
    fake_train_labels[train_mask] = 1

    valid_mask = [new_x_phone2idx[str(phone)] for phone in valid_phone]
    fake_valid_labels = np.zeros(len(new_x_phone2idx))
    fake_valid_labels[valid_mask] = 1
    
    test_mask = [new_x_phone2idx[str(phone)] for phone in test_phone]
    fake_test_labels = np.zeros(len(new_x_phone2idx))
    fake_test_labels[test_mask] = 1
    
    return fake_labels, fake_train_labels, fake_valid_labels, fake_test_labels


def _add_new_feat(x, test_df):
    not_in_edge_lis = []
    for i in test_df.username:
        if str(i) not in x:
            x[str(i)] = [0, 0, 0, 0]
            not_in_edge_lis.append(str(i))
    return x, not_in_edge_lis


def _add_new_edge(edge, test_df, not_in_edge_lis):
    for i in test_df.username:
        if str(i) in not_in_edge_lis:
            edge.append([str(i), str(i)])
    return edge
    
def _add_phone2ix(x_phone2idx, not_in_edge_lis):
    old_len = len(x_phone2idx)
    i=0
    for new_phone in not_in_edge_lis:
        x_phone2idx[new_phone] = old_len+i
        i+=1
    return x_phone2idx

def get_all_info(x, edge, x_phone2idx, test_df):
    print(len(x), len(edge), len(x_phone2idx))
    print('--------------------------')
    x, not_in_edge_lis = _add_new_feat(x, test_df)
    edge = _add_new_edge(edge, test_df, not_in_edge_lis)
    x_phone2idx = _add_phone2ix(x_phone2idx, not_in_edge_lis)
    print(len(x), len(edge), len(x_phone2idx))
    fake_labels, train_mask, valid_mask, test_mask = _get_final_label(test_df, x_phone2idx)
    return x, edge, x_phone2idx, fake_labels, train_mask, valid_mask, test_mask
    

def read_cashbus_data(root):
    
    ##################### finish add test infos into the graph ############################
    with open(root+'/feat.json') as json_file:
        x = json.load(json_file)
    with open(root+'/x_phone2idx.json') as json_file:
        x_phone2idx = json.load(json_file)
    with open(root+'/edge.json') as json_file:
        edge = json.load(json_file)
    labels = pd.read_csv(root+'/four_days_label.csv')
    x, edge, x_phone2idx, y, train_mask, valid_mask, test_mask = get_all_info(x, edge, x_phone2idx, labels)  
    
    ##################### finish add test infos into the graph ############################
    
    feat_mat = []
    for k,v in tqdm(x.items()):
        feat_mat.append(v)
    x = torch.tensor(feat_mat, dtype =torch.float)    
    y = torch.tensor(y, dtype=torch.int64).squeeze()
    edge = np.array(edge).T
    row1 = [x_phone2idx[str(i)] for i in edge[0]]
    row2 = [x_phone2idx[str(i)] for i in edge[1]]
    new_edges = torch.tensor(np.stack([row1, row2]))
    
    ##################### finish all ############################
    data = Data(x=x, edge_index=new_edges, y=y)
    data.train_mask = torch.tensor(train_mask, dtype=torch.uint8)
    data.val_mask = torch.tensor(valid_mask, dtype=torch.uint8)
    data.test_mask = torch.tensor(test_mask, dtype=torch.uint8)
    return data

class CashBus(InMemoryDataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super(CashBus, self).__init__(root, transform, pre_transform)
        print('processed_path:{}'.format(self.processed_paths))
        self.data, self.slices = torch.load(self.processed_paths[0])

    @property
    def raw_file_names(self):
        return ['feat.json', 'x_phone2idx.json', 'edge.json', 'four_days_label.csv']

    @property
    def processed_file_names(self):
        return 'data11.pt'

    def download(self):
        pass

    def process(self):
        print('go pl, raw_dir:{}'.format(self.raw_dir))
        data = read_cashbus_data(self.raw_dir)
        data = data if self.pre_transform is None else self.pre_transform(data)
        data, slices = self.collate([data])
        torch.save((data, slices), self.processed_paths[0])

    def __repr__(self):
        return '{}()'.format(self.name)

In [3]:
st = time.time()
root = '/home/qibo/all_project/Graph反欺诈/PYG/'
dataset = CashBus(root)
data = dataset[0]
print(time.time() - st)

processed_path:['/home/qibo/all_project/Graph反欺诈/PYG/processed/data11.pt']
0.26741838455200195


In [5]:
class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = GCNConv(dataset.num_features, 16, cached=True)
        self.conv1_1 = GCNConv(16, 16, cached=True)
        self.conv1_2 = GCNConv(16, 16, cached=True)
        self.conv2 = GCNConv(16, int(dataset.num_classes), cached=True)

    def forward(self):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, training=self.training)
        
#         x = F.relu(self.conv1_1(x, edge_index))
#         x = F.dropout(x, training=self.training)
        
#         x = F.relu(self.conv1_2(x, edge_index))
#         x = F.dropout(x, training=self.training)
        
        x = self.conv2(x, edge_index)
        return F.log_softmax(x, dim=1)

# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device = torch.device('cpu')

model, data = Net().to(device), data.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)


In [6]:

def train():
    model.train()
    optimizer.zero_grad()
    F.nll_loss(model()[data.train_mask], data.y[data.train_mask]).backward()
    optimizer.step()

def test():
    model.eval()
    logits, accs = model(), []
    for _, mask in data('train_mask', 'val_mask', 'test_mask'):
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
        print(data.y[mask].sum().item() / len(data.y[mask]))
        accs.append(acc)
    return accs


best_val_acc = test_acc = 0
for epoch in range(1, 2):
    print('epoch:{}'.format(epoch))
    st = time.time()
    train()
    print('use {} seconds'.format(time.time() - st))
    train_acc, val_acc, tmp_test_acc = test()
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        test_acc = tmp_test_acc
    log = 'Epoch: {:03d}, Train: {:.4f}, Val: {:.4f}, Test: {:.4f}'
    print(log.format(epoch, train_acc, best_val_acc, test_acc))

epoch:1
use 60.82511258125305 seconds
0.07266795195017055
0.0764218009478673
0.07941313460642757
Epoch: 001, Train: 0.6484, Val: 0.6374, Test: 0.3570
