In [1]:
import time 
import math
import torch
import pandas as pd
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.nn.parameter import Parameter
from torch.nn.modules.module import Module

import cryptoaml.datareader as cdr
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import confusion_matrix

In [2]:
ellipticdr = cdr.get_data("elliptic")

In [3]:
# GCN model https://github.com/tkipf/pygcn
class GraphConvolution(Module):
    """
    Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
    """

    def __init__(self, 
                 in_features, 
                 out_features, 
                 bias=True):
        
        super(GraphConvolution, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.weight = Parameter(torch.FloatTensor(in_features, out_features))
        if bias:
            self.bias = Parameter(torch.FloatTensor(out_features))
        else:
            self.register_parameter("bias", None)
        self.reset_parameters()

    def reset_parameters(self):
        stdv = 1. / math.sqrt(self.weight.size(1))
        self.weight.data.uniform_(-stdv, stdv)
        if self.bias is not None:
            self.bias.data.uniform_(-stdv, stdv)

    def forward(self, x, adj):
        support = torch.mm(x, self.weight)
        output = torch.spmm(adj, support)
        if self.bias is not None:
            return output + self.bias
        else:
            return output

    def __repr__(self):
        return self.__class__.__name__ + ' (' \
               + str(self.in_features) + ' -> ' \
               + str(self.out_features) + ')'
    
class GCN(nn.Module):
    def __init__(self, 
                 nfeat, 
                 nhid, 
                 nclass, 
                 dropout):
        super(GCN, self).__init__()
        self.gc1 = GraphConvolution(nfeat, nhid)
        self.gc2 = GraphConvolution(nhid, nclass)
        self.dropout = dropout

    def forward(self, x, adj):
        x = F.relu(self.gc1(x, adj))
        x = F.dropout(x, self.dropout, training=self.training)
        
        # https://github.com/tkipf/pygcn/issues/26#issuecomment-435801483
        # "In this case it’s best to simply take the embeddings just before doing 
        # the last linear projection to the softmax logits. 
        # In other words, if the last layer is softmax(A*H*W), 
        # take either the embedding H directly or A*H."
       
        # extract node embeddings (A*H)
        self.node_embeddings = torch.mm(adj, x)
        
        x = self.gc2(x, adj)
        return F.log_softmax(x, dim=1)

In [4]:
import numpy as np
import scipy.sparse as sp
import torch

def load_elliptic(datareader):
    
    # get all labels  
    data = datareader.dataset_.copy()
    labelled_data = data[(data["class"] != -1)]
    
    # get features 
    feature_cols = ["txId"] + datareader.feature_cols_AF_
    labelled_features = labelled_data[feature_cols].copy()
    labelled_features.set_index("txId", inplace=True) 
    features = sp.csr_matrix(labelled_features.values, dtype=np.float32)    
    
    # build edges 
    tx_ids = labelled_features.index
    idx_map = {j: i for i, j in enumerate(tx_ids)}
    edges_unordered = datareader.edges_.copy() 
    edges_unordered = edges_unordered[(edges_unordered["txId2"].isin(set(tx_ids))) & 
                               (edges_unordered["txId1"].isin(set(tx_ids)))].values
    edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
                     dtype=np.int32).reshape(edges_unordered.shape)    
    adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
                    shape=(labelled_features.shape[0], labelled_features.shape[0]),
                    dtype=np.float32)
    
    # build symmetric adjacency matrix
    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    adj = normalize(adj + sp.eye(adj.shape[0]))
    adj = sparse_mx_to_torch_sparse_tensor(adj)
    
    # get idx for train and test 
    total_train = labelled_data[labelled_data["ts"] <= 34].shape[0]
    idx_train = range(total_train)
    idx_train = torch.LongTensor(idx_train)   
    total_test = labelled_data[labelled_data["ts"] > 34].shape[0]
    idx_test = range(total_train, total_train+total_test)
    idx_test = torch.LongTensor(idx_test)
    
    # change data to torch tensors 
    features = torch.FloatTensor(np.array(features.todense()))
    labels = torch.LongTensor(labelled_data["class"].values)
    
    return adj, features, labels, idx_train, idx_test

def normalize(mx):
    """Row-normalize sparse matrix"""
    rowsum = np.array(mx.sum(1))
    r_inv = np.power(rowsum, -1).flatten()
    r_inv[np.isinf(r_inv)] = 0.
    r_mat_inv = sp.diags(r_inv)
    mx = r_mat_inv.dot(mx)
    return mx

def sparse_mx_to_torch_sparse_tensor(sparse_mx):
    """Convert a scipy sparse matrix to a torch sparse tensor."""
    sparse_mx = sparse_mx.tocoo().astype(np.float32)
    indices = torch.from_numpy(
        np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
    values = torch.from_numpy(sparse_mx.data)
    shape = torch.Size(sparse_mx.shape)
    return torch.sparse.FloatTensor(indices, values, shape)

def precision(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return precision_score(labels, preds, average="binary")

def recall(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return recall_score(labels, preds, average="binary")

def fscore_micro(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return f1_score(labels, preds, average="micro")

def fscore(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return f1_score(labels, preds, average="binary")

def confusion(output, labels):
    preds = output.max(1)[1].type_as(labels)
    return confusion_matrix(labels, preds)

In [5]:
# setup 

# build graph data 
adj, features, labels, idx_train, idx_test = load_elliptic(ellipticdr)

# model 
n_classes = 2
n_features = 166
n_hidden = 100
dropout = 0.5
gcn = GCN(nfeat=n_features,
          nhid=n_hidden,
          nclass=n_classes,
          dropout=dropout)

# optimizer 
learning_rate = 0.001
gcn_params = gcn.parameters()
optimizer = optim.Adam(gcn_params,
                       lr=learning_rate)
weight_ratio = torch.FloatTensor([0.3, 0.7])
loss = nn.CrossEntropyLoss(weight=weight_ratio)

# if torch.cuda.is_available():
#     gcn.cuda()
#     features = features.cuda()
#     adj = adj.cuda()
#     labels = labels.cuda()
#     idx_train = idx_train.cuda()
#     idx_test = idx_test.cuda()

In [6]:
# setup training 
epochs = 1000
best_loss = 1
node_emb_train = None 

for epoch in range(epochs):
    epoch_start = time.time()
    
    gcn.train()
    optimizer.zero_grad()
    output = gcn(features, adj)
    loss_train = loss(output[idx_train], labels[idx_train])
    
    f1_train = fscore(output[idx_train], labels[idx_train])
    
    loss_train.backward()
    optimizer.step()
    
    epoch_finish = time.time() - epoch_start

    if best_loss >= loss_train.item():
        node_emb_train = gcn.node_embeddings[idx_train]

    print(
        "Epoch: {:04d}".format(epoch+1),
        "loss_train: {:.4f}".format(loss_train.item()),
        "f1_train: {:.4f}".format(f1_train),
        "time: {:.4f}s".format(epoch_finish)
    )

print("Optimization Finished!")
print(node_emb_train.shape)

Epoch: 0001 loss_train: 4.3496 f1_train: 0.1904 time: 0.2460s
Epoch: 0002 loss_train: 3.5720 f1_train: 0.1961 time: 0.1948s
Epoch: 0003 loss_train: 2.7399 f1_train: 0.2141 time: 0.1935s
Epoch: 0004 loss_train: 2.1970 f1_train: 0.2398 time: 0.1759s
Epoch: 0005 loss_train: 1.8622 f1_train: 0.2586 time: 0.1879s
Epoch: 0006 loss_train: 1.5127 f1_train: 0.2776 time: 0.1792s
Epoch: 0007 loss_train: 1.3232 f1_train: 0.3008 time: 0.1986s
Epoch: 0008 loss_train: 1.1446 f1_train: 0.3217 time: 0.2272s
Epoch: 0009 loss_train: 1.0620 f1_train: 0.3362 time: 0.2237s
Epoch: 0010 loss_train: 0.9787 f1_train: 0.3488 time: 0.2229s
Epoch: 0011 loss_train: 0.9536 f1_train: 0.3585 time: 0.2264s
Epoch: 0012 loss_train: 0.9455 f1_train: 0.3647 time: 0.2168s
Epoch: 0013 loss_train: 0.8918 f1_train: 0.3716 time: 0.2226s
Epoch: 0014 loss_train: 0.8890 f1_train: 0.3769 time: 0.2118s
Epoch: 0015 loss_train: 0.8540 f1_train: 0.3939 time: 0.2122s
Epoch: 0016 loss_train: 0.8449 f1_train: 0.3940 time: 0.2110s
Epoch: 0

Epoch: 0134 loss_train: 0.2632 f1_train: 0.7059 time: 0.2405s
Epoch: 0135 loss_train: 0.2710 f1_train: 0.7078 time: 0.2647s
Epoch: 0136 loss_train: 0.2702 f1_train: 0.7041 time: 0.2249s
Epoch: 0137 loss_train: 0.2541 f1_train: 0.7090 time: 0.2323s
Epoch: 0138 loss_train: 0.2672 f1_train: 0.7016 time: 0.2211s
Epoch: 0139 loss_train: 0.2564 f1_train: 0.7109 time: 0.2189s
Epoch: 0140 loss_train: 0.2587 f1_train: 0.7048 time: 0.2220s
Epoch: 0141 loss_train: 0.2577 f1_train: 0.7051 time: 0.2381s
Epoch: 0142 loss_train: 0.2552 f1_train: 0.7066 time: 0.2378s
Epoch: 0143 loss_train: 0.2555 f1_train: 0.7131 time: 0.2205s
Epoch: 0144 loss_train: 0.2566 f1_train: 0.7056 time: 0.2546s
Epoch: 0145 loss_train: 0.2558 f1_train: 0.7101 time: 0.2157s
Epoch: 0146 loss_train: 0.2523 f1_train: 0.7117 time: 0.2186s
Epoch: 0147 loss_train: 0.2489 f1_train: 0.7186 time: 0.2228s
Epoch: 0148 loss_train: 0.2396 f1_train: 0.7242 time: 0.2203s
Epoch: 0149 loss_train: 0.2517 f1_train: 0.7151 time: 0.2208s
Epoch: 0

Epoch: 0267 loss_train: 0.1648 f1_train: 0.8024 time: 0.2187s
Epoch: 0268 loss_train: 0.1669 f1_train: 0.7980 time: 0.2318s
Epoch: 0269 loss_train: 0.1646 f1_train: 0.8037 time: 0.2861s
Epoch: 0270 loss_train: 0.1660 f1_train: 0.8011 time: 0.2207s
Epoch: 0271 loss_train: 0.1694 f1_train: 0.7963 time: 0.2243s
Epoch: 0272 loss_train: 0.1634 f1_train: 0.7962 time: 0.2213s
Epoch: 0273 loss_train: 0.1655 f1_train: 0.7981 time: 0.2473s
Epoch: 0274 loss_train: 0.1666 f1_train: 0.7962 time: 0.2246s
Epoch: 0275 loss_train: 0.1636 f1_train: 0.8046 time: 0.2240s
Epoch: 0276 loss_train: 0.1614 f1_train: 0.8035 time: 0.2260s
Epoch: 0277 loss_train: 0.1640 f1_train: 0.8025 time: 0.2119s
Epoch: 0278 loss_train: 0.1633 f1_train: 0.7995 time: 0.2815s
Epoch: 0279 loss_train: 0.1623 f1_train: 0.7996 time: 0.1742s
Epoch: 0280 loss_train: 0.1650 f1_train: 0.8100 time: 0.1838s
Epoch: 0281 loss_train: 0.1649 f1_train: 0.8036 time: 0.2627s
Epoch: 0282 loss_train: 0.1632 f1_train: 0.8085 time: 0.1761s
Epoch: 0

Epoch: 0400 loss_train: 0.1389 f1_train: 0.8321 time: 0.2670s
Epoch: 0401 loss_train: 0.1362 f1_train: 0.8330 time: 0.2494s
Epoch: 0402 loss_train: 0.1420 f1_train: 0.8294 time: 0.2256s
Epoch: 0403 loss_train: 0.1402 f1_train: 0.8272 time: 0.2578s
Epoch: 0404 loss_train: 0.1412 f1_train: 0.8295 time: 0.3055s
Epoch: 0405 loss_train: 0.1400 f1_train: 0.8267 time: 0.3246s
Epoch: 0406 loss_train: 0.1387 f1_train: 0.8301 time: 0.2608s
Epoch: 0407 loss_train: 0.1393 f1_train: 0.8315 time: 0.2784s
Epoch: 0408 loss_train: 0.1375 f1_train: 0.8318 time: 0.2702s
Epoch: 0409 loss_train: 0.1363 f1_train: 0.8335 time: 0.2649s
Epoch: 0410 loss_train: 0.1387 f1_train: 0.8343 time: 0.2892s
Epoch: 0411 loss_train: 0.1401 f1_train: 0.8342 time: 0.3564s
Epoch: 0412 loss_train: 0.1363 f1_train: 0.8360 time: 0.2988s
Epoch: 0413 loss_train: 0.1373 f1_train: 0.8335 time: 0.2981s
Epoch: 0414 loss_train: 0.1370 f1_train: 0.8355 time: 0.2831s
Epoch: 0415 loss_train: 0.1365 f1_train: 0.8367 time: 0.3121s
Epoch: 0

Epoch: 0533 loss_train: 0.1224 f1_train: 0.8523 time: 0.2438s
Epoch: 0534 loss_train: 0.1218 f1_train: 0.8538 time: 0.2431s
Epoch: 0535 loss_train: 0.1239 f1_train: 0.8509 time: 0.2585s
Epoch: 0536 loss_train: 0.1228 f1_train: 0.8512 time: 0.2595s
Epoch: 0537 loss_train: 0.1243 f1_train: 0.8499 time: 0.2405s
Epoch: 0538 loss_train: 0.1232 f1_train: 0.8513 time: 0.2307s
Epoch: 0539 loss_train: 0.1216 f1_train: 0.8512 time: 0.2339s
Epoch: 0540 loss_train: 0.1222 f1_train: 0.8495 time: 0.2390s
Epoch: 0541 loss_train: 0.1219 f1_train: 0.8496 time: 0.2349s
Epoch: 0542 loss_train: 0.1223 f1_train: 0.8479 time: 0.2412s
Epoch: 0543 loss_train: 0.1226 f1_train: 0.8522 time: 0.2039s
Epoch: 0544 loss_train: 0.1218 f1_train: 0.8522 time: 0.1977s
Epoch: 0545 loss_train: 0.1264 f1_train: 0.8507 time: 0.1946s
Epoch: 0546 loss_train: 0.1208 f1_train: 0.8533 time: 0.1922s
Epoch: 0547 loss_train: 0.1246 f1_train: 0.8457 time: 0.1932s
Epoch: 0548 loss_train: 0.1227 f1_train: 0.8518 time: 0.2078s
Epoch: 0

Epoch: 0666 loss_train: 0.1157 f1_train: 0.8570 time: 0.2358s
Epoch: 0667 loss_train: 0.1160 f1_train: 0.8625 time: 0.2381s
Epoch: 0668 loss_train: 0.1151 f1_train: 0.8617 time: 0.2508s
Epoch: 0669 loss_train: 0.1119 f1_train: 0.8610 time: 0.2344s
Epoch: 0670 loss_train: 0.1149 f1_train: 0.8595 time: 0.2342s
Epoch: 0671 loss_train: 0.1129 f1_train: 0.8621 time: 0.2465s
Epoch: 0672 loss_train: 0.1144 f1_train: 0.8591 time: 0.2360s
Epoch: 0673 loss_train: 0.1167 f1_train: 0.8572 time: 0.2290s
Epoch: 0674 loss_train: 0.1110 f1_train: 0.8629 time: 0.2308s
Epoch: 0675 loss_train: 0.1122 f1_train: 0.8602 time: 0.2477s
Epoch: 0676 loss_train: 0.1162 f1_train: 0.8594 time: 0.2317s
Epoch: 0677 loss_train: 0.1122 f1_train: 0.8553 time: 0.2320s
Epoch: 0678 loss_train: 0.1132 f1_train: 0.8618 time: 0.2454s
Epoch: 0679 loss_train: 0.1139 f1_train: 0.8609 time: 0.2459s
Epoch: 0680 loss_train: 0.1163 f1_train: 0.8593 time: 0.2379s
Epoch: 0681 loss_train: 0.1169 f1_train: 0.8547 time: 0.2504s
Epoch: 0

Epoch: 0799 loss_train: 0.1080 f1_train: 0.8666 time: 0.2292s
Epoch: 0800 loss_train: 0.1061 f1_train: 0.8699 time: 0.2310s
Epoch: 0801 loss_train: 0.1087 f1_train: 0.8658 time: 0.2427s
Epoch: 0802 loss_train: 0.1065 f1_train: 0.8721 time: 0.2447s
Epoch: 0803 loss_train: 0.1046 f1_train: 0.8720 time: 0.2423s
Epoch: 0804 loss_train: 0.1065 f1_train: 0.8723 time: 0.2424s
Epoch: 0805 loss_train: 0.1050 f1_train: 0.8665 time: 0.2381s
Epoch: 0806 loss_train: 0.1051 f1_train: 0.8679 time: 0.2447s
Epoch: 0807 loss_train: 0.1075 f1_train: 0.8612 time: 0.2449s
Epoch: 0808 loss_train: 0.1088 f1_train: 0.8652 time: 0.2516s
Epoch: 0809 loss_train: 0.1052 f1_train: 0.8701 time: 0.2341s
Epoch: 0810 loss_train: 0.1068 f1_train: 0.8711 time: 0.2328s
Epoch: 0811 loss_train: 0.1050 f1_train: 0.8666 time: 0.2509s
Epoch: 0812 loss_train: 0.1066 f1_train: 0.8693 time: 0.2475s
Epoch: 0813 loss_train: 0.1086 f1_train: 0.8667 time: 0.2454s
Epoch: 0814 loss_train: 0.1077 f1_train: 0.8645 time: 0.2548s
Epoch: 0

Epoch: 0932 loss_train: 0.1007 f1_train: 0.8751 time: 0.2662s
Epoch: 0933 loss_train: 0.1018 f1_train: 0.8758 time: 0.2459s
Epoch: 0934 loss_train: 0.1015 f1_train: 0.8752 time: 0.2491s
Epoch: 0935 loss_train: 0.0993 f1_train: 0.8746 time: 0.2474s
Epoch: 0936 loss_train: 0.1009 f1_train: 0.8723 time: 0.2526s
Epoch: 0937 loss_train: 0.1022 f1_train: 0.8711 time: 0.2443s
Epoch: 0938 loss_train: 0.1023 f1_train: 0.8746 time: 0.2544s
Epoch: 0939 loss_train: 0.0996 f1_train: 0.8700 time: 0.2438s
Epoch: 0940 loss_train: 0.0976 f1_train: 0.8802 time: 0.2376s
Epoch: 0941 loss_train: 0.1004 f1_train: 0.8769 time: 0.2353s
Epoch: 0942 loss_train: 0.0978 f1_train: 0.8735 time: 0.2318s
Epoch: 0943 loss_train: 0.0994 f1_train: 0.8763 time: 0.2364s
Epoch: 0944 loss_train: 0.1022 f1_train: 0.8725 time: 0.2439s
Epoch: 0945 loss_train: 0.1009 f1_train: 0.8734 time: 0.2720s
Epoch: 0946 loss_train: 0.1013 f1_train: 0.8728 time: 0.2403s
Epoch: 0947 loss_train: 0.1005 f1_train: 0.8725 time: 0.2534s
Epoch: 0

In [7]:
node_emb_test = None 
gcn.eval()
output = gcn(features, adj)
loss_test = loss(output[idx_test], labels[idx_test])


precision_score = precision(output[idx_test], labels[idx_test])
recall_score = recall(output[idx_test], labels[idx_test])
f1_test = fscore(output[idx_test], labels[idx_test])
f1_micro = fscore_micro(output[idx_test], labels[idx_test])
confusion_score = confusion(output[idx_test], labels[idx_test])

node_emb_test = gcn.node_embeddings[idx_test]
print(
    "Test set results:",
    "loss= {:.4f}".format(loss_test.item()),
    "precision= {:.4f}".format(precision_score),
    "recall= {:.4f}".format(recall_score),
    "f1_test= {:.4f}".format(f1_test),
    "f1_micro= {:.4f}".format(f1_micro),
    "confusion= {}".format(confusion_score)
)

Test set results: loss= 0.3388 precision= 0.7145 recall= 0.5522 f1_test= 0.6229 f1_micro= 0.9566 confusion= [[15348   239]
 [  485   598]]


In [8]:
print(node_emb_train)
print(node_emb_test)

np_node_emb = np.concatenate((node_emb_train.cpu().detach().numpy(), 
                            node_emb_test.cpu().detach().numpy()))

print(np_node_emb)

# Create embeddings pandas DataFrame 
node_emb_pd = pd.DataFrame(np_node_emb) 
node_emb_pd.columns = [f"NE_{i}" for i in range(np_node_emb.shape[1])]
display(node_emb_pd)

tensor([[0.0000, 0.0083, 0.3024,  ..., 0.1794, 1.6047, 0.8650],
        [0.0000, 0.0713, 0.0217,  ..., 0.3394, 0.2266, 0.9797],
        [0.0000, 0.2618, 0.3410,  ..., 0.0000, 0.1967, 0.6465],
        ...,
        [0.0000, 0.0000, 0.0000,  ..., 1.4406, 0.0000, 0.0000],
        [0.0000, 0.3884, 0.2667,  ..., 1.2470, 0.0000, 0.0000],
        [0.0000, 0.0845, 0.0000,  ..., 0.8972, 0.0000, 0.0000]],
       grad_fn=<IndexBackward>)
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000, 13.4875],
        [ 0.4090,  1.5558,  0.0000,  ...,  0.0000,  0.0000,  0.7072],
        [ 0.2371,  1.3477,  0.0000,  ...,  0.0000,  0.0000,  1.4946],
        ...,
        [ 0.0000,  0.2896,  0.0000,  ...,  1.1371,  0.0000,  0.0000],
        [ 0.0000,  0.4188,  0.0000,  ...,  0.2333,  0.0000,  1.9477],
        [ 0.0000,  0.2896,  0.0000,  ...,  1.1371,  0.0000,  0.0000]],
       grad_fn=<IndexBackward>)
[[0.         0.00832899 0.30244613 ... 0.1793779  1.6046935  0.8649827 ]
 [0.         0.07125264 0.02169

Unnamed: 0,NE_0,NE_1,NE_2,NE_3,NE_4,NE_5,NE_6,NE_7,NE_8,NE_9,...,NE_90,NE_91,NE_92,NE_93,NE_94,NE_95,NE_96,NE_97,NE_98,NE_99
0,0.000000,0.008329,0.302446,1.323299,0.0,0.613717,0.0,0.488811,0.000000,0.000000,...,0.006043,0.000000,0.000000,0.000000,0.046523,0.626475,1.034606,0.179378,1.604694,0.864983
1,0.000000,0.071253,0.021693,0.072501,0.0,0.028073,0.0,0.108444,0.000000,0.021302,...,0.006307,0.000000,0.000000,0.000000,0.061458,0.384066,0.621358,0.339422,0.226634,0.979717
2,0.000000,0.261773,0.340979,0.000000,0.0,0.000000,0.0,0.273804,0.025174,0.059816,...,0.000000,0.217886,0.049815,0.050207,0.000000,1.775647,0.000000,0.000000,0.196674,0.646533
3,0.430564,0.261773,0.340979,0.000000,0.0,0.000000,0.0,0.234948,0.025174,0.000000,...,0.000000,0.217886,0.000000,0.000000,0.000000,1.264930,0.466865,0.000000,0.000000,0.346438
4,1.269725,6.051972,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.452678,...,0.000000,0.000000,0.041546,0.000000,2.234658,6.183120,0.000000,1.412065,1.704852,0.475080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46559,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.095185,0.010942,0.000000,0.000000,0.863316,2.010445,0.000000
46560,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.085342,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.095185,0.010942,0.000000,0.000000,1.540508,1.055462,0.000000
46561,0.000000,0.289573,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.137098,0.000000,0.000000
46562,0.000000,0.418750,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,...,0.000000,0.127624,0.000000,0.321237,0.770373,0.000000,0.000000,0.233340,0.000000,1.947674


In [9]:
data = ellipticdr.dataset_.copy()
txIds = data[(data["class"] != -1)]["txId"].values
node_emb_pd.insert(0, "txId", txIds)
print(txIds)

[232438397 232029206 232344069 ... 158375075 147478192 158375402]


In [10]:
display(node_emb_pd)

Unnamed: 0,txId,NE_0,NE_1,NE_2,NE_3,NE_4,NE_5,NE_6,NE_7,NE_8,...,NE_90,NE_91,NE_92,NE_93,NE_94,NE_95,NE_96,NE_97,NE_98,NE_99
0,232438397,0.000000,0.008329,0.302446,1.323299,0.0,0.613717,0.0,0.488811,0.000000,...,0.006043,0.000000,0.000000,0.000000,0.046523,0.626475,1.034606,0.179378,1.604694,0.864983
1,232029206,0.000000,0.071253,0.021693,0.072501,0.0,0.028073,0.0,0.108444,0.000000,...,0.006307,0.000000,0.000000,0.000000,0.061458,0.384066,0.621358,0.339422,0.226634,0.979717
2,232344069,0.000000,0.261773,0.340979,0.000000,0.0,0.000000,0.0,0.273804,0.025174,...,0.000000,0.217886,0.049815,0.050207,0.000000,1.775647,0.000000,0.000000,0.196674,0.646533
3,27553029,0.430564,0.261773,0.340979,0.000000,0.0,0.000000,0.0,0.234948,0.025174,...,0.000000,0.217886,0.000000,0.000000,0.000000,1.264930,0.466865,0.000000,0.000000,0.346438
4,3881097,1.269725,6.051972,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.041546,0.000000,2.234658,6.183120,0.000000,1.412065,1.704852,0.475080
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
46559,80329479,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.095185,0.010942,0.000000,0.000000,0.863316,2.010445,0.000000
46560,158406298,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.085342,0.000000,...,0.000000,0.000000,0.000000,0.095185,0.010942,0.000000,0.000000,1.540508,1.055462,0.000000
46561,158375075,0.000000,0.289573,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,1.137098,0.000000,0.000000
46562,147478192,0.000000,0.418750,0.000000,0.000000,0.0,0.000000,0.0,0.000000,0.000000,...,0.000000,0.127624,0.000000,0.321237,0.770373,0.000000,0.000000,0.233340,0.000000,1.947674


In [11]:
node_emb_pd.to_csv("elliptic_embs.csv", index=False)

In [12]:
# display(node_emb_pd["NE_99"].max())