In [1]:
import time 
import math
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

import cryptoaml.datareader as cdr
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

In [2]:
ellipticdr = cdr.get_data("elliptic")

In [3]:
# GCN model 
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, 
                 in_features, 
                 out_features, 
                 bias=True):
        
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter("bias", None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x, adj):
        support = torch.mm(x, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'
    
class GCN(nn.Module):
    def __init__(self, 
                 nfeat, 
                 nhid, 
                 nclass, 
                 dropout):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        
        # https://github.com/tkipf/pygcn/issues/26#issuecomment-435801483
        # "In this case it’s best to simply take the embeddings just before doing 
        # the last linear projection to the softmax logits. 
        # In other words, if the last layer is softmax(A*H*W), 
        # take either the embedding H directly or A*H."
       
        # extract node embeddings (A*H)
        self.node_embeddings = torch.mm(adj, x)
        
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

In [4]:
import numpy as np
import scipy.sparse as sp
import torch

def load_elliptic(datareader):
    
    # get all labels  
    data = datareader.dataset_.copy()
    labelled_data = data[(data["class"] != -1)]
    
    # get features 
    feature_cols = ["txId"] + datareader.feature_cols_AF_
    labelled_features = labelled_data[feature_cols].copy()
    labelled_features.set_index("txId", inplace=True) 
    features = sp.csr_matrix(labelled_features.values, dtype=np.float32)    
    
    # build edges 
    tx_ids = labelled_features.index
    idx_map = {j: i for i, j in enumerate(tx_ids)}
    edges_unordered = datareader.edges_.copy() 
    edges_unordered = edges_unordered[(edges_unordered["txId2"].isin(set(tx_ids))) & 
                               (edges_unordered["txId1"].isin(set(tx_ids)))].values
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)    
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                    shape=(labelled_features.shape[0], labelled_features.shape[0]),
                    dtype=np.float32)
    
    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    adj = normalize(adj + sp.eye(adj.shape[0]))
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    
    # get idx for train and test 
    total_train = labelled_data[labelled_data["ts"] <= 34].shape[0]
    idx_train = range(total_train)
    idx_train = torch.LongTensor(idx_train)   
    total_test = labelled_data[labelled_data["ts"] > 34].shape[0]
    idx_test = range(total_train, total_train+total_test)
    idx_test = torch.LongTensor(idx_test)
    
    # change data to torch tensors 
    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(labelled_data["class"].values)
    
    return adj, features, labels, idx_train, idx_test

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def precision(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return precision_score(labels, preds, average="binary")

def recall(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return recall_score(labels, preds, average="binary")

def fscore_micro(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return f1_score(labels, preds, average="micro")

def fscore(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return f1_score(labels, preds, average="binary")

def confusion(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return confusion_matrix(labels, preds)

In [5]:
# setup 

# build graph data 
adj, features, labels, idx_train, idx_test = load_elliptic(ellipticdr)

# model 
n_classes = 2
n_features = 166
n_hidden = 100
dropout = 0.5
gcn = GCN(nfeat=n_features,
          nhid=n_hidden,
          nclass=n_classes,
          dropout=dropout)

# optimizer 
learning_rate = 0.001
gcn_params = gcn.parameters()
optimizer = optim.Adam(gcn_params,
                       lr=learning_rate)
weight_ratio = torch.FloatTensor([0.3, 0.7])
loss = nn.CrossEntropyLoss(weight=weight_ratio)

# if torch.cuda.is_available():
#     gcn.cuda()
#     features = features.cuda()
#     adj = adj.cuda()
#     labels = labels.cuda()
#     idx_train = idx_train.cuda()
#     idx_test = idx_test.cuda()

In [6]:
# setup training 
epochs = 1000
best_loss = 1
node_emb_train = None 

for epoch in range(epochs):
    epoch_start = time.time()
    
    gcn.train()
    optimizer.zero_grad()
    output = gcn(features, adj)
    loss_train = loss(output[idx_train], labels[idx_train])
    
    f1_train = fscore(output[idx_train], labels[idx_train])
    
    loss_train.backward()
    optimizer.step()
    
    epoch_finish = time.time() - epoch_start

    if best_loss >= loss_train.item():
        node_emb_train = gcn.node_embeddings[idx_train]

    print(
        "Epoch: {:04d}".format(epoch+1),
        "loss_train: {:.4f}".format(loss_train.item()),
        "f1_train: {:.4f}".format(f1_train),
        "time: {:.4f}s".format(epoch_finish)
    )

print("Optimization Finished!")
print(node_emb_train.shape)

Epoch: 0001 loss_train: 2.9864 f1_train: 0.1301 time: 0.2631s
Epoch: 0002 loss_train: 2.2996 f1_train: 0.1462 time: 0.2377s
Epoch: 0003 loss_train: 1.8138 f1_train: 0.1747 time: 0.2420s
Epoch: 0004 loss_train: 1.5091 f1_train: 0.2004 time: 0.2239s
Epoch: 0005 loss_train: 1.3594 f1_train: 0.2336 time: 0.2305s
Epoch: 0006 loss_train: 1.2972 f1_train: 0.2551 time: 0.2213s
Epoch: 0007 loss_train: 1.0842 f1_train: 0.3038 time: 0.2179s
Epoch: 0008 loss_train: 1.0645 f1_train: 0.3214 time: 0.1938s
Epoch: 0009 loss_train: 0.9673 f1_train: 0.3639 time: 0.2153s
Epoch: 0010 loss_train: 0.9324 f1_train: 0.3805 time: 0.2257s
Epoch: 0011 loss_train: 0.9233 f1_train: 0.3921 time: 0.2127s
Epoch: 0012 loss_train: 0.8872 f1_train: 0.4134 time: 0.2282s
Epoch: 0013 loss_train: 0.8575 f1_train: 0.4342 time: 0.2169s
Epoch: 0014 loss_train: 0.8317 f1_train: 0.4576 time: 0.2178s
Epoch: 0015 loss_train: 0.7878 f1_train: 0.4632 time: 0.2167s
Epoch: 0016 loss_train: 0.7728 f1_train: 0.4803 time: 0.2219s
Epoch: 0

Epoch: 0134 loss_train: 0.2069 f1_train: 0.7530 time: 0.2059s
Epoch: 0135 loss_train: 0.2071 f1_train: 0.7527 time: 0.1892s
Epoch: 0136 loss_train: 0.2069 f1_train: 0.7541 time: 0.2011s
Epoch: 0137 loss_train: 0.2085 f1_train: 0.7536 time: 0.1813s
Epoch: 0138 loss_train: 0.2036 f1_train: 0.7570 time: 0.1824s
Epoch: 0139 loss_train: 0.2004 f1_train: 0.7546 time: 0.1799s
Epoch: 0140 loss_train: 0.2056 f1_train: 0.7606 time: 0.1839s
Epoch: 0141 loss_train: 0.2032 f1_train: 0.7587 time: 0.1743s
Epoch: 0142 loss_train: 0.2020 f1_train: 0.7519 time: 0.1806s
Epoch: 0143 loss_train: 0.2032 f1_train: 0.7556 time: 0.2137s
Epoch: 0144 loss_train: 0.1986 f1_train: 0.7567 time: 0.2352s
Epoch: 0145 loss_train: 0.1988 f1_train: 0.7559 time: 0.2206s
Epoch: 0146 loss_train: 0.1947 f1_train: 0.7605 time: 0.2595s
Epoch: 0147 loss_train: 0.2010 f1_train: 0.7568 time: 0.1797s
Epoch: 0148 loss_train: 0.1974 f1_train: 0.7552 time: 0.2011s
Epoch: 0149 loss_train: 0.1959 f1_train: 0.7604 time: 0.1937s
Epoch: 0

Epoch: 0267 loss_train: 0.1562 f1_train: 0.8090 time: 0.2168s
Epoch: 0268 loss_train: 0.1513 f1_train: 0.8190 time: 0.2237s
Epoch: 0269 loss_train: 0.1538 f1_train: 0.8166 time: 0.2216s
Epoch: 0270 loss_train: 0.1548 f1_train: 0.8169 time: 0.2178s
Epoch: 0271 loss_train: 0.1543 f1_train: 0.8195 time: 0.2206s
Epoch: 0272 loss_train: 0.1519 f1_train: 0.8177 time: 0.2124s
Epoch: 0273 loss_train: 0.1512 f1_train: 0.8203 time: 0.2149s
Epoch: 0274 loss_train: 0.1537 f1_train: 0.8096 time: 0.2100s
Epoch: 0275 loss_train: 0.1534 f1_train: 0.8143 time: 0.2117s
Epoch: 0276 loss_train: 0.1523 f1_train: 0.8148 time: 0.2151s
Epoch: 0277 loss_train: 0.1524 f1_train: 0.8127 time: 0.2124s
Epoch: 0278 loss_train: 0.1531 f1_train: 0.8092 time: 0.2184s
Epoch: 0279 loss_train: 0.1535 f1_train: 0.8183 time: 0.2225s
Epoch: 0280 loss_train: 0.1542 f1_train: 0.8147 time: 0.2252s
Epoch: 0281 loss_train: 0.1542 f1_train: 0.8151 time: 0.2190s
Epoch: 0282 loss_train: 0.1511 f1_train: 0.8161 time: 0.2164s
Epoch: 0

Epoch: 0400 loss_train: 0.1353 f1_train: 0.8324 time: 0.2164s
Epoch: 0401 loss_train: 0.1331 f1_train: 0.8408 time: 0.2079s
Epoch: 0402 loss_train: 0.1359 f1_train: 0.8367 time: 0.2103s
Epoch: 0403 loss_train: 0.1327 f1_train: 0.8399 time: 0.2146s
Epoch: 0404 loss_train: 0.1336 f1_train: 0.8403 time: 0.2147s
Epoch: 0405 loss_train: 0.1329 f1_train: 0.8424 time: 0.2217s
Epoch: 0406 loss_train: 0.1328 f1_train: 0.8418 time: 0.2102s
Epoch: 0407 loss_train: 0.1306 f1_train: 0.8412 time: 0.2088s
Epoch: 0408 loss_train: 0.1307 f1_train: 0.8419 time: 0.2160s
Epoch: 0409 loss_train: 0.1349 f1_train: 0.8383 time: 0.2249s
Epoch: 0410 loss_train: 0.1315 f1_train: 0.8427 time: 0.2510s
Epoch: 0411 loss_train: 0.1328 f1_train: 0.8364 time: 0.2294s
Epoch: 0412 loss_train: 0.1333 f1_train: 0.8408 time: 0.2521s
Epoch: 0413 loss_train: 0.1315 f1_train: 0.8388 time: 0.2383s
Epoch: 0414 loss_train: 0.1315 f1_train: 0.8420 time: 0.2308s
Epoch: 0415 loss_train: 0.1360 f1_train: 0.8347 time: 0.2265s
Epoch: 0

Epoch: 0534 loss_train: 0.1197 f1_train: 0.8563 time: 0.1900s
Epoch: 0535 loss_train: 0.1210 f1_train: 0.8527 time: 0.1880s
Epoch: 0536 loss_train: 0.1218 f1_train: 0.8546 time: 0.1871s
Epoch: 0537 loss_train: 0.1211 f1_train: 0.8495 time: 0.1928s
Epoch: 0538 loss_train: 0.1221 f1_train: 0.8551 time: 0.1838s
Epoch: 0539 loss_train: 0.1213 f1_train: 0.8563 time: 0.1841s
Epoch: 0540 loss_train: 0.1213 f1_train: 0.8513 time: 0.2143s
Epoch: 0541 loss_train: 0.1186 f1_train: 0.8576 time: 0.2162s
Epoch: 0542 loss_train: 0.1189 f1_train: 0.8553 time: 0.1973s
Epoch: 0543 loss_train: 0.1207 f1_train: 0.8518 time: 0.2144s
Epoch: 0544 loss_train: 0.1200 f1_train: 0.8563 time: 0.2865s
Epoch: 0545 loss_train: 0.1211 f1_train: 0.8583 time: 0.3104s
Epoch: 0546 loss_train: 0.1151 f1_train: 0.8623 time: 0.2442s
Epoch: 0547 loss_train: 0.1207 f1_train: 0.8568 time: 0.2536s
Epoch: 0548 loss_train: 0.1200 f1_train: 0.8566 time: 0.2250s
Epoch: 0549 loss_train: 0.1200 f1_train: 0.8532 time: 0.2566s
Epoch: 0

Epoch: 0667 loss_train: 0.1083 f1_train: 0.8701 time: 0.2403s
Epoch: 0668 loss_train: 0.1109 f1_train: 0.8692 time: 0.2213s
Epoch: 0669 loss_train: 0.1142 f1_train: 0.8638 time: 0.2385s
Epoch: 0670 loss_train: 0.1111 f1_train: 0.8642 time: 0.2364s
Epoch: 0671 loss_train: 0.1109 f1_train: 0.8638 time: 0.2279s
Epoch: 0672 loss_train: 0.1109 f1_train: 0.8622 time: 0.2376s
Epoch: 0673 loss_train: 0.1096 f1_train: 0.8655 time: 0.2964s
Epoch: 0674 loss_train: 0.1090 f1_train: 0.8665 time: 0.2924s
Epoch: 0675 loss_train: 0.1087 f1_train: 0.8686 time: 0.2232s
Epoch: 0676 loss_train: 0.1090 f1_train: 0.8640 time: 0.2109s
Epoch: 0677 loss_train: 0.1070 f1_train: 0.8705 time: 0.1940s
Epoch: 0678 loss_train: 0.1099 f1_train: 0.8692 time: 0.3176s
Epoch: 0679 loss_train: 0.1099 f1_train: 0.8714 time: 0.2794s
Epoch: 0680 loss_train: 0.1086 f1_train: 0.8633 time: 0.2382s
Epoch: 0681 loss_train: 0.1096 f1_train: 0.8686 time: 0.2389s
Epoch: 0682 loss_train: 0.1083 f1_train: 0.8683 time: 0.2710s
Epoch: 0

Epoch: 0800 loss_train: 0.1016 f1_train: 0.8763 time: 0.2364s
Epoch: 0801 loss_train: 0.1041 f1_train: 0.8734 time: 0.2347s
Epoch: 0802 loss_train: 0.1015 f1_train: 0.8793 time: 0.2440s
Epoch: 0803 loss_train: 0.1016 f1_train: 0.8721 time: 0.2325s
Epoch: 0804 loss_train: 0.1029 f1_train: 0.8735 time: 0.2344s
Epoch: 0805 loss_train: 0.1026 f1_train: 0.8764 time: 0.2339s
Epoch: 0806 loss_train: 0.1017 f1_train: 0.8765 time: 0.2341s
Epoch: 0807 loss_train: 0.1049 f1_train: 0.8745 time: 0.2345s
Epoch: 0808 loss_train: 0.0999 f1_train: 0.8763 time: 0.2300s
Epoch: 0809 loss_train: 0.1010 f1_train: 0.8776 time: 0.2372s
Epoch: 0810 loss_train: 0.1021 f1_train: 0.8713 time: 0.2374s
Epoch: 0811 loss_train: 0.1011 f1_train: 0.8746 time: 0.1938s
Epoch: 0812 loss_train: 0.1025 f1_train: 0.8769 time: 0.1971s
Epoch: 0813 loss_train: 0.1036 f1_train: 0.8732 time: 0.2029s
Epoch: 0814 loss_train: 0.1010 f1_train: 0.8711 time: 0.1905s
Epoch: 0815 loss_train: 0.1012 f1_train: 0.8766 time: 0.1966s
Epoch: 0

Epoch: 0934 loss_train: 0.0938 f1_train: 0.8829 time: 0.1989s
Epoch: 0935 loss_train: 0.0932 f1_train: 0.8827 time: 0.2158s
Epoch: 0936 loss_train: 0.0954 f1_train: 0.8841 time: 0.2006s
Epoch: 0937 loss_train: 0.0959 f1_train: 0.8855 time: 0.1940s
Epoch: 0938 loss_train: 0.0951 f1_train: 0.8788 time: 0.1948s
Epoch: 0939 loss_train: 0.0961 f1_train: 0.8825 time: 0.2047s
Epoch: 0940 loss_train: 0.0954 f1_train: 0.8839 time: 0.1901s
Epoch: 0941 loss_train: 0.0972 f1_train: 0.8811 time: 0.1956s
Epoch: 0942 loss_train: 0.0970 f1_train: 0.8853 time: 0.1885s
Epoch: 0943 loss_train: 0.0948 f1_train: 0.8822 time: 0.1979s
Epoch: 0944 loss_train: 0.0959 f1_train: 0.8852 time: 0.1893s
Epoch: 0945 loss_train: 0.0951 f1_train: 0.8828 time: 0.2070s
Epoch: 0946 loss_train: 0.0969 f1_train: 0.8819 time: 0.1956s
Epoch: 0947 loss_train: 0.0963 f1_train: 0.8803 time: 0.1936s
Epoch: 0948 loss_train: 0.0938 f1_train: 0.8843 time: 0.1879s
Epoch: 0949 loss_train: 0.0946 f1_train: 0.8800 time: 0.1966s
Epoch: 0

In [7]:
node_emb_test = None 
gcn.eval()
output = gcn(features, adj)
loss_test = loss(output[idx_test], labels[idx_test])


precision_score = precision(output[idx_test], labels[idx_test])
recall_score = recall(output[idx_test], labels[idx_test])
f1_test = fscore(output[idx_test], labels[idx_test])
f1_micro = fscore_micro(output[idx_test], labels[idx_test])
confusion_score = confusion(output[idx_test], labels[idx_test])

node_emb_test = gcn.node_embeddings[idx_test]
print(
    "Test set results:",
    "loss= {:.4f}".format(loss_test.item()),
    "precision= {:.4f}".format(precision_score),
    "recall= {:.4f}".format(recall_score),
    "f1_test= {:.4f}".format(f1_test),
    "f1_micro= {:.4f}".format(f1_micro),
    "confusion= {}".format(confusion_score)
)

Test set results: loss= 0.3632 precision= 0.6916 recall= 0.5466 f1_test= 0.6106 f1_micro= 0.9547 confusion= [[15323   264]
 [  491   592]]


In [8]:
print(node_emb_train)
print(node_emb_test)

np_node_emb = np.concatenate((node_emb_train.cpu().detach().numpy(), 
                            node_emb_test.cpu().detach().numpy()))

print(np_node_emb)

# Create embeddings pandas DataFrame 
node_emb_pd = pd.DataFrame(np_node_emb) 
node_emb_pd.columns = [f"NE_{i}" for i in range(np_node_emb.shape[1])]
display(node_emb_pd)

tensor([[0.0000, 0.2144, 0.9432,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.8666, 0.4166,  ..., 0.0000, 0.0163, 0.0000],
        [0.0000, 0.2767, 0.2550,  ..., 0.1394, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.3190, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 1.4094, 0.0000, 0.0000],
        [0.0000, 0.4305, 0.0000,  ..., 1.0562, 0.0000, 0.0000]],
       grad_fn=<IndexBackward>)
tensor([[0.0000, 0.9012, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 1.0121, 0.0000,  ..., 0.7758, 0.0000, 0.0000],
        [0.0000, 0.7476, 0.0000,  ..., 0.7062, 0.0000, 0.0000],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 0.1259, 0.0000, 0.0000],
        [0.0000, 1.0824, 0.0000,  ..., 0.8512, 0.6905, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0990, 0.0000, 0.0000]],
       grad_fn=<IndexBackward>)
[[0.         0.21435647 0.94317627 ... 0.         0.         0.        ]
 [0.         0.8666105  0.41661337 ... 0.         0.01634093 0.    

Unnamed: 0,NE_0,NE_1,NE_2,NE_3,NE_4,NE_5,NE_6,NE_7,NE_8,NE_9,...,NE_90,NE_91,NE_92,NE_93,NE_94,NE_95,NE_96,NE_97,NE_98,NE_99
0,0.0,0.214356,0.943176,0.196256,0.607267,0.361846,1.557223,1.574131,0.209287,0.076003,...,0.000000,0.0,0.451862,0.183161,0.000000,0.074456,0.190027,0.000000,0.000000,0.0
1,0.0,0.866611,0.416613,0.025965,0.743440,0.054661,0.769524,1.446789,0.445361,0.033966,...,0.091498,0.0,0.858419,0.000000,0.005393,0.021094,0.277284,0.000000,0.016341,0.0
2,0.0,0.276732,0.254969,0.551423,0.247976,0.187643,0.000000,0.000000,0.000000,0.000000,...,0.543029,0.0,0.000000,0.030060,0.000000,0.000000,0.000000,0.139384,0.000000,0.0
3,0.0,0.157053,0.254969,1.115224,0.248506,0.187643,0.000000,0.000000,0.000000,0.036924,...,0.443598,0.0,0.360247,0.035661,0.000000,0.000000,0.000000,0.139384,0.000000,0.0
4,0.0,0.852650,0.364693,0.000000,3.646550,0.000000,0.000000,6.867769,0.000000,1.270580,...,0.246011,0.0,1.101321,4.461850,0.000000,0.000000,0.000000,0.600567,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46559,0.0,0.000000,0.000000,0.000000,0.000000,0.381118,0.000000,0.000000,0.654668,0.221343,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.722984,0.118735,0.000000,0.0
46560,0.0,0.000000,0.000000,0.000000,0.000000,0.572999,0.000000,0.000000,0.552214,0.095016,...,0.000000,0.0,0.000000,0.000000,0.000000,0.122833,0.504956,0.000000,0.000000,0.0
46561,0.0,0.000000,0.000000,0.000000,0.000000,0.522394,0.000000,0.000000,0.115867,0.214168,...,0.142971,0.0,0.000000,0.378454,0.000000,0.000000,0.335576,0.125906,0.000000,0.0
46562,0.0,1.082445,0.000000,0.070579,0.608728,0.727974,0.000000,0.000000,1.096430,0.279903,...,1.106855,0.0,0.000000,0.757327,0.000000,0.000000,0.116640,0.851179,0.690499,0.0


In [9]:
data = ellipticdr.dataset_.copy()
txIds = data[(data["class"] != -1)]["txId"].values
node_emb_pd.insert(0, "txId", txIds)
print(txIds)

[232438397 232029206 232344069 ... 158375075 147478192 158375402]


In [10]:
display(node_emb_pd)

Unnamed: 0,txId,NE_0,NE_1,NE_2,NE_3,NE_4,NE_5,NE_6,NE_7,NE_8,...,NE_90,NE_91,NE_92,NE_93,NE_94,NE_95,NE_96,NE_97,NE_98,NE_99
0,232438397,0.0,0.214356,0.943176,0.196256,0.607267,0.361846,1.557223,1.574131,0.209287,...,0.000000,0.0,0.451862,0.183161,0.000000,0.074456,0.190027,0.000000,0.000000,0.0
1,232029206,0.0,0.866611,0.416613,0.025965,0.743440,0.054661,0.769524,1.446789,0.445361,...,0.091498,0.0,0.858419,0.000000,0.005393,0.021094,0.277284,0.000000,0.016341,0.0
2,232344069,0.0,0.276732,0.254969,0.551423,0.247976,0.187643,0.000000,0.000000,0.000000,...,0.543029,0.0,0.000000,0.030060,0.000000,0.000000,0.000000,0.139384,0.000000,0.0
3,27553029,0.0,0.157053,0.254969,1.115224,0.248506,0.187643,0.000000,0.000000,0.000000,...,0.443598,0.0,0.360247,0.035661,0.000000,0.000000,0.000000,0.139384,0.000000,0.0
4,3881097,0.0,0.852650,0.364693,0.000000,3.646550,0.000000,0.000000,6.867769,0.000000,...,0.246011,0.0,1.101321,4.461850,0.000000,0.000000,0.000000,0.600567,0.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46559,80329479,0.0,0.000000,0.000000,0.000000,0.000000,0.381118,0.000000,0.000000,0.654668,...,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.722984,0.118735,0.000000,0.0
46560,158406298,0.0,0.000000,0.000000,0.000000,0.000000,0.572999,0.000000,0.000000,0.552214,...,0.000000,0.0,0.000000,0.000000,0.000000,0.122833,0.504956,0.000000,0.000000,0.0
46561,158375075,0.0,0.000000,0.000000,0.000000,0.000000,0.522394,0.000000,0.000000,0.115867,...,0.142971,0.0,0.000000,0.378454,0.000000,0.000000,0.335576,0.125906,0.000000,0.0
46562,147478192,0.0,1.082445,0.000000,0.070579,0.608728,0.727974,0.000000,0.000000,1.096430,...,1.106855,0.0,0.000000,0.757327,0.000000,0.000000,0.116640,0.851179,0.690499,0.0


In [11]:
node_emb_pd.to_csv("elliptic_embs.csv", index=False)