In [129]:
import pandas as pd 

In [130]:
pwd = r'X:\Datasets\Blockchain\xblock.pro\eth-phishing-detection\datasets\etherscan\ex_1'

In [132]:
phishing_tx = pd.read_csv(pwd + r'\phishing_node_tx.csv')
normal_tx = pd.read_csv(pwd + r'\normal_node_tx.csv')

In [134]:
tx_features = pd.concat([phishing_tx, normal_tx], axis=0, ignore_index=True)


In [135]:
tx_features.tail()

Unnamed: 0,address,balance_0,balance_1,balance_2,balance_3,balance_4,balance_5,balance_6,balance_7,balance_8,...,balance_41,balance_42,balance_43,balance_44,balance_45,balance_46,balance_47,balance_48,balance_49,label
6087,0xff5b76bcfc194410a37617b747a8f5505e57225f,56.660934,0.01,456.01,36.01,36.03739,44.53739,45.53739,53.08739,54.08739,...,2152.493857,2152.493857,2152.493857,2152.493857,2152.493857,2152.493857,2152.493857,2152.493857,2152.493857,1
6088,0xffbdd48a8ed97337e17d2584015081a9b538285e,0.1,1.1,2.1,55.156248,110.422816,170.005359,234.039583,284.785329,298.603517,...,809.194451,809.194451,809.194451,809.194451,809.194451,809.194451,809.194451,809.194451,809.194451,1
6089,0xffd6b9bc7e6aa15fff8033d5a19ed387de31f379,5.821953,5.921827,6.796517,6.927946,7.09004,7.13547,7.327551,7.37098,0.003631,...,7.038819,7.0806,15.680306,15.720181,16.074965,16.11681,16.25258,18.318512,20.862937,1
6090,0xfff547625741e87836166ebc69182e164f3a10fa,34.339995,68.709707,102.917031,136.638651,170.304861,203.96317,237.626745,271.299103,305.094887,...,1059.372958,1059.372958,1059.372958,1059.372958,1059.372958,1059.372958,1059.372958,1059.372958,1059.372958,1
6091,0xfffc21bc8b74fd9f9b86be03d7a35afdcda0f0fc,0.005,107.83535,217.79843,336.72629,336.73129,718.59457,718.59557,770.10557,770.10557,...,770.10557,770.10557,770.10557,770.10557,770.10557,770.10557,770.10557,770.10557,770.10557,1


In [136]:
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print(device)

cpu


In [137]:
# LSTM 分类模型

import numpy as np
from torch.utils.data import Dataset, DataLoader
from torch import nn
import torch.nn.functional as F

class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        rout, (h_n, h_c) = self.lstm(x, None)
        # print(rout.shape, rout[-1,:].shape)
        out = self.fc(rout.reshape(-1, self.hidden_size))
        return out

In [138]:
# 数据集传入data loader
class TxDataset(Dataset):
    def __init__(self, tx_features, labels):
        self.tx_features = tx_features
        self.labels = labels
        
    def __len__(self):
        return len(self.tx_features)
    
    def __getitem__(self, idx):
        return self.tx_features[idx], self.labels[idx]

# 数据集划分
from sklearn.model_selection import train_test_split

feature = tx_features.copy()

X = feature.drop(['label', 'address'], axis=1).values
y = feature['label'].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 数据集转换
X_train = torch.from_numpy(X_train).float().to(device)
X_test = torch.from_numpy(X_test).float().to(device)

y_train = torch.from_numpy(y_train).long().to(device)
y_test = torch.from_numpy(y_test).long().to(device)

# 数据集封装
train_dataset = TxDataset(X_train, y_train)
test_dataset = TxDataset(X_test, y_test)


In [139]:
batch_size = 32
input_size = train_dataset[0][0].shape[0]
hidden_size = 64
num_layers = 2
num_classes = 2

print(input_size)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

model = LSTM(input_size, hidden_size, num_layers, num_classes).to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


50


In [140]:
from sklearn.metrics import classification_report

In [141]:
# Train the model
epoches = 50
total_step = len(train_loader)
for epoch in range(epoches):
    for i, (data, labels) in enumerate(train_loader):
        # Forward pass
        outputs = model(data)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        
        if (i+1) % 10 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                .format(epoch+1, epoches, i+1, total_step, loss.item()), end='\r')
        
    # Test the model
    model.eval()
    prediction = []
    with torch.no_grad():
        correct = 0
        total = 0
        for data, labels in test_loader:
            outputs = model(data)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            prediction.append(predicted.cpu().numpy())
    
    prediction = np.concatenate(prediction)
    result = classification_report(y_test.cpu().numpy(), prediction, output_dict=True)['0'].values()
    result = list(result)

    print("Epoch [{}/{}], Precision: {:.3f}, Recall: {:.3f}, F1: {:.3f}".format(epoch+1, epoches, *result))


Epoch [1/50], Precision: 0.733, Recall: 0.786, F1: 0.759
Epoch [2/50], Precision: 0.805, Recall: 0.716, F1: 0.758
Epoch [3/50], Precision: 0.819, Recall: 0.762, F1: 0.789
Epoch [4/50], Precision: 0.838, Recall: 0.801, F1: 0.819
Epoch [5/50], Precision: 0.809, Recall: 0.845, F1: 0.826
Epoch [6/50], Precision: 0.875, Recall: 0.729, F1: 0.795
Epoch [7/50], Precision: 0.865, Recall: 0.767, F1: 0.813
Epoch [8/50], Precision: 0.873, Recall: 0.771, F1: 0.819
Epoch [9/50], Precision: 0.880, Recall: 0.795, F1: 0.835
Epoch [10/50], Precision: 0.820, Recall: 0.789, F1: 0.804
Epoch [11/50], Precision: 0.873, Recall: 0.797, F1: 0.834
Epoch [12/50], Precision: 0.923, Recall: 0.683, F1: 0.785
Epoch [13/50], Precision: 0.854, Recall: 0.799, F1: 0.825
Epoch [14/50], Precision: 0.863, Recall: 0.793, F1: 0.827
Epoch [15/50], Precision: 0.849, Recall: 0.842, F1: 0.845
Epoch [16/50], Precision: 0.840, Recall: 0.862, F1: 0.851
Epoch [17/50], Precision: 0.892, Recall: 0.775, F1: 0.830
Epoch [18/50], Precisio