In [1]:
import time 
import math
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

import cryptoaml.datareader as cdr
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

In [2]:
ellipticdr = cdr.get_data("elliptic")

In [3]:
# GCN model 
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, 
                 in_features, 
                 out_features, 
                 bias=True):
        
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter("bias", None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x, adj):
        support = torch.mm(x, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'
    
class GCN(nn.Module):
    def __init__(self, 
                 nfeat, 
                 nhid, 
                 nclass, 
                 dropout):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        
        # https://github.com/tkipf/pygcn/issues/26#issuecomment-435801483
        # "In this case it’s best to simply take the embeddings just before doing 
        # the last linear projection to the softmax logits. 
        # In other words, if the last layer is softmax(A*H*W), 
        # take either the embedding H directly or A*H."
       
        # extract node embeddings (A*H)
        self.node_embeddings = torch.mm(adj, x)
        
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

In [4]:
import numpy as np
import scipy.sparse as sp
import torch

def load_elliptic(datareader):
    
    # get all labels  
    data = datareader.dataset_.copy()
    labelled_data = data[(data["class"] != -1)]
    
    # get features 
    feature_cols = ["txId"] + datareader.feature_cols_AF_
    labelled_features = labelled_data[feature_cols].copy()
    labelled_features.set_index("txId", inplace=True) 
    features = sp.csr_matrix(labelled_features.values, dtype=np.float32)    
    
    # build edges 
    tx_ids = labelled_features.index
    idx_map = {j: i for i, j in enumerate(tx_ids)}
    edges_unordered = datareader.edges_.copy() 
    edges_unordered = edges_unordered[(edges_unordered["txId2"].isin(set(tx_ids))) & 
                               (edges_unordered["txId1"].isin(set(tx_ids)))].values
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)    
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                    shape=(labelled_features.shape[0], labelled_features.shape[0]),
                    dtype=np.float32)
    
    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    adj = normalize(adj + sp.eye(adj.shape[0]))
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    
    # get idx for train and test 
    total_train = labelled_data[labelled_data["ts"] <= 34].shape[0]
    idx_train = range(total_train)
    idx_train = torch.LongTensor(idx_train)   
    total_test = labelled_data[labelled_data["ts"] > 34].shape[0]
    idx_test = range(total_train, total_train+total_test)
    idx_test = torch.LongTensor(idx_test)
    
    # change data to torch tensors 
    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(labelled_data["class"].values)
    
    return adj, features, labels, idx_train, idx_test

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def precision(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return precision_score(labels, preds, average="binary")

def recall(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return recall_score(labels, preds, average="binary")

def fscore_micro(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return f1_score(labels, preds, average="micro")

def fscore(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return f1_score(labels, preds, average="binary")

def confusion(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return confusion_matrix(labels, preds)

In [5]:
# setup 

# build graph data 
adj, features, labels, idx_train, idx_test = load_elliptic(ellipticdr)

# model 
n_classes = 2
n_features = 166
n_hidden = 100
dropout = 0.5
gcn = GCN(nfeat=n_features,
          nhid=n_hidden,
          nclass=n_classes,
          dropout=dropout)

# optimizer 
learning_rate = 0.001
gcn_params = gcn.parameters()
optimizer = optim.Adam(gcn_params,
                       lr=learning_rate)
weight_ratio = torch.FloatTensor([0.3, 0.7])
loss = nn.CrossEntropyLoss(weight=weight_ratio)

# if torch.cuda.is_available():
#     gcn.cuda()
#     features = features.cuda()
#     adj = adj.cuda()
#     labels = labels.cuda()
#     idx_train = idx_train.cuda()
#     idx_test = idx_test.cuda()

In [6]:
# setup training 
epochs = 1000
best_loss = 1
node_emb_train = None 

for epoch in range(epochs):
    epoch_start = time.time()
    
    gcn.train()
    optimizer.zero_grad()
    output = gcn(features, adj)
    loss_train = loss(output[idx_train], labels[idx_train])
    
    f1_train = fscore(output[idx_train], labels[idx_train])
    
    loss_train.backward()
    optimizer.step()
    
    epoch_finish = time.time() - epoch_start

    if best_loss >= loss_train.item():
        node_emb_train = gcn.node_embeddings[idx_train]

    print(
        "Epoch: {:04d}".format(epoch+1),
        "loss_train: {:.4f}".format(loss_train.item()),
        "f1_train: {:.4f}".format(f1_train),
        "time: {:.4f}s".format(epoch_finish)
    )

print("Optimization Finished!")
print(node_emb_train.shape)

Epoch: 0001 loss_train: 1.9781 f1_train: 0.1001 time: 0.2333s
Epoch: 0002 loss_train: 1.6039 f1_train: 0.1449 time: 0.2463s
Epoch: 0003 loss_train: 1.4025 f1_train: 0.1989 time: 0.2331s
Epoch: 0004 loss_train: 1.2137 f1_train: 0.2480 time: 0.2505s
Epoch: 0005 loss_train: 1.1218 f1_train: 0.2907 time: 0.3513s
Epoch: 0006 loss_train: 1.0397 f1_train: 0.3398 time: 0.2986s
Epoch: 0007 loss_train: 0.9964 f1_train: 0.3623 time: 0.2270s
Epoch: 0008 loss_train: 0.9194 f1_train: 0.4015 time: 0.2804s
Epoch: 0009 loss_train: 0.9251 f1_train: 0.4046 time: 0.1772s
Epoch: 0010 loss_train: 0.9189 f1_train: 0.4185 time: 0.2256s
Epoch: 0011 loss_train: 0.8784 f1_train: 0.4371 time: 0.2223s
Epoch: 0012 loss_train: 0.8309 f1_train: 0.4567 time: 0.2419s
Epoch: 0013 loss_train: 0.8481 f1_train: 0.4581 time: 0.3530s
Epoch: 0014 loss_train: 0.8174 f1_train: 0.4703 time: 0.2185s
Epoch: 0015 loss_train: 0.7955 f1_train: 0.4841 time: 0.2909s
Epoch: 0016 loss_train: 0.8044 f1_train: 0.4856 time: 0.2967s
Epoch: 0

Epoch: 0134 loss_train: 0.1829 f1_train: 0.7720 time: 0.1844s
Epoch: 0135 loss_train: 0.1849 f1_train: 0.7727 time: 0.1874s
Epoch: 0136 loss_train: 0.1822 f1_train: 0.7840 time: 0.1753s
Epoch: 0137 loss_train: 0.1831 f1_train: 0.7836 time: 0.1839s
Epoch: 0138 loss_train: 0.1839 f1_train: 0.7758 time: 0.1761s
Epoch: 0139 loss_train: 0.1826 f1_train: 0.7778 time: 0.1792s
Epoch: 0140 loss_train: 0.1809 f1_train: 0.7774 time: 0.1794s
Epoch: 0141 loss_train: 0.1844 f1_train: 0.7814 time: 0.1925s
Epoch: 0142 loss_train: 0.1785 f1_train: 0.7841 time: 0.1787s
Epoch: 0143 loss_train: 0.1744 f1_train: 0.7881 time: 0.1747s
Epoch: 0144 loss_train: 0.1756 f1_train: 0.7851 time: 0.1794s
Epoch: 0145 loss_train: 0.1761 f1_train: 0.7855 time: 0.1762s
Epoch: 0146 loss_train: 0.1706 f1_train: 0.7911 time: 0.1784s
Epoch: 0147 loss_train: 0.1789 f1_train: 0.7871 time: 0.1837s
Epoch: 0148 loss_train: 0.1725 f1_train: 0.7925 time: 0.1845s
Epoch: 0149 loss_train: 0.1752 f1_train: 0.7845 time: 0.1794s
Epoch: 0

Epoch: 0267 loss_train: 0.1278 f1_train: 0.8441 time: 0.1769s
Epoch: 0268 loss_train: 0.1329 f1_train: 0.8416 time: 0.1733s
Epoch: 0269 loss_train: 0.1313 f1_train: 0.8405 time: 0.1842s
Epoch: 0270 loss_train: 0.1276 f1_train: 0.8434 time: 0.1869s
Epoch: 0271 loss_train: 0.1296 f1_train: 0.8367 time: 0.1781s
Epoch: 0272 loss_train: 0.1241 f1_train: 0.8437 time: 0.1856s
Epoch: 0273 loss_train: 0.1307 f1_train: 0.8346 time: 0.1759s
Epoch: 0274 loss_train: 0.1263 f1_train: 0.8463 time: 0.1785s
Epoch: 0275 loss_train: 0.1307 f1_train: 0.8363 time: 0.1784s
Epoch: 0276 loss_train: 0.1266 f1_train: 0.8509 time: 0.1944s
Epoch: 0277 loss_train: 0.1262 f1_train: 0.8497 time: 0.1748s
Epoch: 0278 loss_train: 0.1254 f1_train: 0.8472 time: 0.1905s
Epoch: 0279 loss_train: 0.1275 f1_train: 0.8421 time: 0.1771s
Epoch: 0280 loss_train: 0.1289 f1_train: 0.8457 time: 0.1914s
Epoch: 0281 loss_train: 0.1262 f1_train: 0.8422 time: 0.1849s
Epoch: 0282 loss_train: 0.1270 f1_train: 0.8444 time: 0.1740s
Epoch: 0

Epoch: 0400 loss_train: 0.1092 f1_train: 0.8626 time: 0.2176s
Epoch: 0401 loss_train: 0.1125 f1_train: 0.8602 time: 0.2192s
Epoch: 0402 loss_train: 0.1084 f1_train: 0.8678 time: 0.2204s
Epoch: 0403 loss_train: 0.1095 f1_train: 0.8615 time: 0.2227s
Epoch: 0404 loss_train: 0.1086 f1_train: 0.8634 time: 0.2241s
Epoch: 0405 loss_train: 0.1096 f1_train: 0.8629 time: 0.2224s
Epoch: 0406 loss_train: 0.1089 f1_train: 0.8649 time: 0.2219s
Epoch: 0407 loss_train: 0.1108 f1_train: 0.8645 time: 0.2216s
Epoch: 0408 loss_train: 0.1092 f1_train: 0.8670 time: 0.2262s
Epoch: 0409 loss_train: 0.1102 f1_train: 0.8643 time: 0.2133s
Epoch: 0410 loss_train: 0.1103 f1_train: 0.8627 time: 0.2258s
Epoch: 0411 loss_train: 0.1130 f1_train: 0.8578 time: 0.2273s
Epoch: 0412 loss_train: 0.1095 f1_train: 0.8658 time: 0.2183s
Epoch: 0413 loss_train: 0.1115 f1_train: 0.8619 time: 0.2291s
Epoch: 0414 loss_train: 0.1099 f1_train: 0.8636 time: 0.2208s
Epoch: 0415 loss_train: 0.1109 f1_train: 0.8691 time: 0.2203s
Epoch: 0

Epoch: 0533 loss_train: 0.0973 f1_train: 0.8779 time: 0.2375s
Epoch: 0534 loss_train: 0.0988 f1_train: 0.8774 time: 0.2387s
Epoch: 0535 loss_train: 0.0966 f1_train: 0.8795 time: 0.2373s
Epoch: 0536 loss_train: 0.1011 f1_train: 0.8810 time: 0.2543s
Epoch: 0537 loss_train: 0.0961 f1_train: 0.8797 time: 0.2461s
Epoch: 0538 loss_train: 0.0959 f1_train: 0.8800 time: 0.2365s
Epoch: 0539 loss_train: 0.0946 f1_train: 0.8803 time: 0.2387s
Epoch: 0540 loss_train: 0.0990 f1_train: 0.8772 time: 0.2382s
Epoch: 0541 loss_train: 0.0975 f1_train: 0.8793 time: 0.2387s
Epoch: 0542 loss_train: 0.0988 f1_train: 0.8814 time: 0.2359s
Epoch: 0543 loss_train: 0.0986 f1_train: 0.8751 time: 0.2729s
Epoch: 0544 loss_train: 0.0978 f1_train: 0.8739 time: 0.1850s
Epoch: 0545 loss_train: 0.0972 f1_train: 0.8808 time: 0.1953s
Epoch: 0546 loss_train: 0.0978 f1_train: 0.8773 time: 0.2501s
Epoch: 0547 loss_train: 0.0977 f1_train: 0.8805 time: 0.2021s
Epoch: 0548 loss_train: 0.0976 f1_train: 0.8753 time: 0.2086s
Epoch: 0

Epoch: 0666 loss_train: 0.0872 f1_train: 0.8867 time: 0.2410s
Epoch: 0667 loss_train: 0.0900 f1_train: 0.8891 time: 0.2438s
Epoch: 0668 loss_train: 0.0896 f1_train: 0.8920 time: 0.2538s
Epoch: 0669 loss_train: 0.0910 f1_train: 0.8842 time: 0.2436s
Epoch: 0670 loss_train: 0.0895 f1_train: 0.8869 time: 0.2433s
Epoch: 0671 loss_train: 0.0908 f1_train: 0.8859 time: 0.2494s
Epoch: 0672 loss_train: 0.0893 f1_train: 0.8925 time: 0.2473s
Epoch: 0673 loss_train: 0.0882 f1_train: 0.8877 time: 0.2464s
Epoch: 0674 loss_train: 0.0882 f1_train: 0.8920 time: 0.2540s
Epoch: 0675 loss_train: 0.0881 f1_train: 0.8880 time: 0.2416s
Epoch: 0676 loss_train: 0.0896 f1_train: 0.8866 time: 0.2366s
Epoch: 0677 loss_train: 0.0899 f1_train: 0.8858 time: 0.2518s
Epoch: 0678 loss_train: 0.0906 f1_train: 0.8828 time: 0.2334s
Epoch: 0679 loss_train: 0.0877 f1_train: 0.8892 time: 0.2434s
Epoch: 0680 loss_train: 0.0872 f1_train: 0.8918 time: 0.2568s
Epoch: 0681 loss_train: 0.0870 f1_train: 0.8851 time: 0.2419s
Epoch: 0

Epoch: 0799 loss_train: 0.0839 f1_train: 0.8908 time: 0.2475s
Epoch: 0800 loss_train: 0.0827 f1_train: 0.8961 time: 0.2470s
Epoch: 0801 loss_train: 0.0824 f1_train: 0.8956 time: 0.2492s
Epoch: 0802 loss_train: 0.0825 f1_train: 0.8925 time: 0.2481s
Epoch: 0803 loss_train: 0.0809 f1_train: 0.8981 time: 0.2524s
Epoch: 0804 loss_train: 0.0827 f1_train: 0.8914 time: 0.2423s
Epoch: 0805 loss_train: 0.0828 f1_train: 0.8934 time: 0.2445s
Epoch: 0806 loss_train: 0.0819 f1_train: 0.8968 time: 0.2494s
Epoch: 0807 loss_train: 0.0820 f1_train: 0.8928 time: 0.2483s
Epoch: 0808 loss_train: 0.0826 f1_train: 0.8929 time: 0.2636s
Epoch: 0809 loss_train: 0.0843 f1_train: 0.8878 time: 0.2445s
Epoch: 0810 loss_train: 0.0822 f1_train: 0.8934 time: 0.2624s
Epoch: 0811 loss_train: 0.0819 f1_train: 0.8959 time: 0.2580s
Epoch: 0812 loss_train: 0.0824 f1_train: 0.8953 time: 0.2524s
Epoch: 0813 loss_train: 0.0824 f1_train: 0.8944 time: 0.2755s
Epoch: 0814 loss_train: 0.0841 f1_train: 0.8967 time: 0.2481s
Epoch: 0

Epoch: 0932 loss_train: 0.0777 f1_train: 0.8959 time: 0.2683s
Epoch: 0933 loss_train: 0.0793 f1_train: 0.8987 time: 0.2569s
Epoch: 0934 loss_train: 0.0749 f1_train: 0.9023 time: 0.2651s
Epoch: 0935 loss_train: 0.0773 f1_train: 0.8999 time: 0.2460s
Epoch: 0936 loss_train: 0.0765 f1_train: 0.9012 time: 0.2684s
Epoch: 0937 loss_train: 0.0760 f1_train: 0.9011 time: 0.2576s
Epoch: 0938 loss_train: 0.0765 f1_train: 0.9031 time: 0.2808s
Epoch: 0939 loss_train: 0.0744 f1_train: 0.9025 time: 0.2654s
Epoch: 0940 loss_train: 0.0755 f1_train: 0.9033 time: 0.2847s
Epoch: 0941 loss_train: 0.0763 f1_train: 0.9050 time: 0.2480s
Epoch: 0942 loss_train: 0.0764 f1_train: 0.9000 time: 0.2625s
Epoch: 0943 loss_train: 0.0779 f1_train: 0.9013 time: 0.2540s
Epoch: 0944 loss_train: 0.0764 f1_train: 0.9030 time: 0.2514s
Epoch: 0945 loss_train: 0.0760 f1_train: 0.9050 time: 0.2582s
Epoch: 0946 loss_train: 0.0758 f1_train: 0.9039 time: 0.2505s
Epoch: 0947 loss_train: 0.0757 f1_train: 0.8996 time: 0.2498s
Epoch: 0

In [7]:
node_emb_test = None 
gcn.eval()
output = gcn(features, adj)
loss_test = loss(output[idx_test], labels[idx_test])


precision_score = precision(output[idx_test], labels[idx_test])
recall_score = recall(output[idx_test], labels[idx_test])
f1_test = fscore(output[idx_test], labels[idx_test])
f1_micro = fscore_micro(output[idx_test], labels[idx_test])
confusion_score = confusion(output[idx_test], labels[idx_test])

node_emb_test = gcn.node_embeddings[idx_test]
print(
    "Test set results:",
    "loss= {:.4f}".format(loss_test.item()),
    "precision= {:.4f}".format(precision_score),
    "recall= {:.4f}".format(recall_score),
    "f1_test= {:.4f}".format(f1_test),
    "f1_micro= {:.4f}".format(f1_micro),
    "confusion= {}".format(confusion_score)
)

Test set results: loss= 0.4403 precision= 0.8236 recall= 0.5217 f1_test= 0.6388 f1_micro= 0.9617 confusion= [[15466   121]
 [  518   565]]


In [8]:
print(node_emb_train)
print(node_emb_test)

np_node_emb = np.concatenate((node_emb_train.cpu().detach().numpy(), 
                            node_emb_test.cpu().detach().numpy()))

print(np_node_emb)

# Create embeddings pandas DataFrame 
node_emb_pd = pd.DataFrame(np_node_emb) 
node_emb_pd.columns = [f"NE_{i}" for i in range(np_node_emb.shape[1])]
display(node_emb_pd)

tensor([[0.0144, 0.0000, 0.0000,  ..., 0.3601, 0.0100, 0.0362],
        [0.0145, 0.0641, 0.0000,  ..., 0.1262, 0.0000, 0.0000],
        [0.6548, 0.0000, 0.3025,  ..., 0.0000, 0.2569, 0.1477],
        ...,
        [1.8311, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [1.0885, 0.0000, 2.3265,  ..., 0.0000, 0.0000, 0.0000],
        [0.5055, 0.6984, 1.6701,  ..., 0.0589, 0.0000, 0.0000]],
       grad_fn=<IndexBackward>)
tensor([[1.2898, 0.0000, 1.8571,  ..., 2.6637, 0.0000, 0.0000],
        [1.9974, 1.3258, 1.9122,  ..., 0.9287, 0.8299, 0.0000],
        [2.0133, 1.4777, 1.7579,  ..., 0.9109, 0.6846, 0.0000],
        ...,
        [0.1266, 0.2627, 0.5814,  ..., 0.0997, 0.0000, 0.0000],
        [1.1080, 0.7954, 1.6985,  ..., 1.0842, 0.6903, 0.0000],
        [0.1266, 0.4636, 0.7815,  ..., 0.0997, 0.0000, 0.0000]],
       grad_fn=<IndexBackward>)
[[0.01441987 0.         0.         ... 0.3600765  0.01003565 0.0362152 ]
 [0.01452639 0.06412103 0.         ... 0.1262463  0.         0.    

Unnamed: 0,NE_0,NE_1,NE_2,NE_3,NE_4,NE_5,NE_6,NE_7,NE_8,NE_9,...,NE_90,NE_91,NE_92,NE_93,NE_94,NE_95,NE_96,NE_97,NE_98,NE_99
0,0.014420,0.000000,0.000000,0.420473,0.342348,1.147490,0.000000,0.001206,0.109308,0.028354,...,0.232963,0.137486,0.135894,0.089023,0.950082,1.042759,0.150506,0.360076,0.010036,0.036215
1,0.014526,0.064121,0.000000,0.509672,0.080512,0.452515,0.000000,0.132689,0.043171,0.080764,...,0.439266,0.804838,0.192160,0.942009,1.576531,0.285096,0.206587,0.126246,0.000000,0.000000
2,0.654794,0.000000,0.302485,0.000000,0.000000,0.014492,0.098832,0.000000,0.000000,0.000000,...,0.080809,0.352127,0.000000,0.000000,1.048357,0.086639,0.354941,0.000000,0.256939,0.147740
3,0.988228,0.000000,0.357932,0.000000,0.000000,0.000000,0.098832,0.000000,0.000000,0.000000,...,0.000000,0.352127,0.000000,0.000000,1.048357,0.086639,0.354941,0.000000,0.256939,0.237226
4,1.024098,0.000000,0.000000,1.065698,0.595967,0.000000,0.000000,0.479150,0.000000,0.000000,...,0.000000,0.914960,0.991711,0.000000,2.853590,0.000000,2.902895,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46559,0.296902,0.000000,0.913962,1.412212,3.264163,2.379227,0.816699,0.140808,0.000000,0.510657,...,0.153997,0.000000,0.605920,0.312746,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
46560,0.296902,0.000000,0.583974,0.749661,3.091289,2.040888,0.938242,0.000000,0.000000,0.093894,...,0.226862,0.000000,0.142135,0.312746,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
46561,0.126640,0.262672,0.581446,0.642942,3.248808,1.313747,0.346274,0.010555,0.000000,0.103225,...,0.922574,0.000000,0.003342,0.000000,0.000000,0.000000,0.000000,0.099667,0.000000,0.000000
46562,1.107991,0.795370,1.698550,0.523229,1.684893,1.138802,0.792776,0.818324,0.538285,1.661696,...,0.108614,0.000000,1.033117,1.487325,0.000000,0.517031,0.000000,1.084218,0.690306,0.000000


In [9]:
data = ellipticdr.dataset_.copy()
txIds = data[(data["class"] != -1)]["txId"].values
node_emb_pd.insert(0, "txId", txIds)
print(txIds)

[232438397 232029206 232344069 ... 158375075 147478192 158375402]


In [10]:
display(node_emb_pd)

Unnamed: 0,txId,NE_0,NE_1,NE_2,NE_3,NE_4,NE_5,NE_6,NE_7,NE_8,...,NE_90,NE_91,NE_92,NE_93,NE_94,NE_95,NE_96,NE_97,NE_98,NE_99
0,232438397,0.014420,0.000000,0.000000,0.420473,0.342348,1.147490,0.000000,0.001206,0.109308,...,0.232963,0.137486,0.135894,0.089023,0.950082,1.042759,0.150506,0.360076,0.010036,0.036215
1,232029206,0.014526,0.064121,0.000000,0.509672,0.080512,0.452515,0.000000,0.132689,0.043171,...,0.439266,0.804838,0.192160,0.942009,1.576531,0.285096,0.206587,0.126246,0.000000,0.000000
2,232344069,0.654794,0.000000,0.302485,0.000000,0.000000,0.014492,0.098832,0.000000,0.000000,...,0.080809,0.352127,0.000000,0.000000,1.048357,0.086639,0.354941,0.000000,0.256939,0.147740
3,27553029,0.988228,0.000000,0.357932,0.000000,0.000000,0.000000,0.098832,0.000000,0.000000,...,0.000000,0.352127,0.000000,0.000000,1.048357,0.086639,0.354941,0.000000,0.256939,0.237226
4,3881097,1.024098,0.000000,0.000000,1.065698,0.595967,0.000000,0.000000,0.479150,0.000000,...,0.000000,0.914960,0.991711,0.000000,2.853590,0.000000,2.902895,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46559,80329479,0.296902,0.000000,0.913962,1.412212,3.264163,2.379227,0.816699,0.140808,0.000000,...,0.153997,0.000000,0.605920,0.312746,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
46560,158406298,0.296902,0.000000,0.583974,0.749661,3.091289,2.040888,0.938242,0.000000,0.000000,...,0.226862,0.000000,0.142135,0.312746,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
46561,158375075,0.126640,0.262672,0.581446,0.642942,3.248808,1.313747,0.346274,0.010555,0.000000,...,0.922574,0.000000,0.003342,0.000000,0.000000,0.000000,0.000000,0.099667,0.000000,0.000000
46562,147478192,1.107991,0.795370,1.698550,0.523229,1.684893,1.138802,0.792776,0.818324,0.538285,...,0.108614,0.000000,1.033117,1.487325,0.000000,0.517031,0.000000,1.084218,0.690306,0.000000


In [None]:
node_emb_pd.to_csv("elliptic_embs.csv", index=False)

In [14]:
# display(node_emb_pd["NE_99"].max())