## Features needed for training
1. Gas Price of the Transaction
2. Mean Gas Price of Transactions in the Last 10 Blocks
3.  Standard Deviation of Gas Price of Transactions in the
Last Ten Blocks
4. Mean Gas Price of Transactions by the same EOA (externally owned account)
5. Standard Deviation of Gas Price in Transactions by the
same EOA
6. Usage of Gas Tokens
7. Predicted Gas Price

In [62]:
from web3 import Web3
from tqdm import tqdm
import pandas as pd
import numpy as np
import random
import torch
import requests

In [63]:
web3 = Web3(Web3.HTTPProvider("https://intensive-sly-mountain.quiknode.pro/a3f5256d7f2af6541d483cce3f1d49c94c01879e/"))
print(web3.is_connected())

True


In [64]:
df_insertion = pd.read_csv ('../data/insertion_attacks.csv', delimiter=',')
print('# of insertion: ', len(df_insertion))

# of insertion:  196691


#### Helper functions used in preparation of multiple features

In [77]:
def convert_from_wei_to_gwei(gas_price_in_wei):
    gas_price_in_gwei = float(web3.from_wei(gas_price_in_wei, "gwei"))
    return gas_price_in_gwei

In [90]:
def get_transaction_by_sender_and_block_nr(block_number, sender, web3):
    
    block = web3.eth.get_block(block_number, full_transactions=True)
    
    for transaction in block.transactions:        
        if transaction["from"] == sender:
            return transaction
    

### Feature 1
Gas price of transaction

In [66]:
def get_transaction_gas_price_in_eth_by_sender_and_block_nr(block_number, sender, web3):
    block = web3.eth.get_block(block_number, full_transactions=True)
    
    for transaction in block.transactions:        
        if transaction["from"] == sender:
            return convert_from_wei_to_gwei(transaction["gasPrice"])


get_transaction_gas_price_in_eth_by_sender_and_block_nr(5599805, '0xFF28319a7cD2136ea7283E7cDb0675B50AC29Dd2', web3)

9.97

In [87]:
def get_transaction_gas_price_in_gwei(transaction, web3):
    return convert_from_wei_to_gwei(transaction["gasPrice"])

### Feature 2 and 3
- Mean Gas Price of Transactions in the Last 10 Blocks
- Standard Deviation of Gas Price of Transactions in the Last 10 Blocks

In [67]:
def get_mean_and_std_gas_price_of_last_n_blocks(last_n_blocks, curr_block, web3):
    web3.eth.get_block(curr_block)
    
    gas_prices = []
    for i in range(last_n_blocks):
        block = web3.eth.get_block(curr_block - i, full_transactions=True)
        
        for transaction in block.transactions:
            gas_prices.append(convert_from_wei_to_gwei(transaction["gasPrice"]))
    
    return np.mean(gas_prices), np.std(gas_prices)

### Feature 4
- Mean Gas Price of Transactions by the same EOA (externally owned account)
- Standard Deviation of Gas Price in Transactions by the
same EOA

In [68]:
# prepare data for insertion

def get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(last_n_blocks, curr_block, eoa_address, web3):
    web3.eth.get_block(curr_block)
    
    gas_prices = []
    for i in range(last_n_blocks):
        block = web3.eth.get_block(curr_block - i, full_transactions=True)
        
        for transaction in block.transactions:
            if transaction["from"] == eoa_address:
                gas_prices.append(convert_from_wei_to_gwei(transaction["gasPrice"]))
    return np.mean(gas_prices), np.std(gas_prices)

### Feature 6
Usage of gas tokens -> check if gas-token contract addresses are used in internal transactions via Etherscan API.  
Typically internal transaction of type *self-destruct*.

In [69]:
def get_internal_transactions(tx_hash):
    # API endpoint
    url = 'https://api.etherscan.io/api'

    # Parameters
    params = {
        'module': 'account',
        'action': 'txlistinternal',
        'txhash': tx_hash,
        'apikey': '1PN1111XBM2W5HIQCSMQH6RA65JVYPQM1R'
    }

    try:
        # Sending GET request
        response = requests.get(url, params=params, timeout=3)
    
        # Checking if request was successful
        if response.status_code == 200:
            data = response.json()
            return data["result"]
        else:
            #print('Error occurred:', response.status_code)
            return None
        
    except requests.exceptions.Timeout:
        #print('Request did not go through: timeout occurred')
        return None
    except requests.exceptions.ConnectionError:
        print('Connection error')
        return None

In [70]:
def is_gas_token_contract_in_internal_transaction(transaction_hash):
    
    gas_token_addresses = {"0x0000000000b3f879cb30fe243b4dfee438691c04": "GST2",
                           "0x88d60255f917e3eb94eae199d827dad837fac4cb": "GST1",
                           "0x0000000000004946c0e9f43f4dee607b0ef1fa1c": "CHI"}
        
    internal_transactions = get_internal_transactions(transaction_hash)
    
    if not internal_transactions:
        return False
    
    for transaction in internal_transactions:
        if transaction["from"] in gas_token_addresses.keys() or transaction["to"] in gas_token_addresses.keys():
            return True
    
    return False

In [71]:
def is_transaction_using_gas_token(block_number, address, web3):
    
    block = web3.eth.get_block(block_number, full_transactions=True)
    
    for transaction in block.transactions:  
        if not (transaction["from"] == address or transaction["to"] == address):
            continue
        else:
            transaction_hash = transaction["hash"].hex()
            return is_gas_token_contract_in_internal_transaction(transaction_hash)

In [81]:
def is_transaction_using_gas_token(transaction_hash):
    return is_gas_token_contract_in_internal_transaction(transaction_hash)

### Feature 7
Predicted gas price --> train modell for this

Since the data was collected 3 years ago, we don't predict the gas-price trained on the gas-prices of the last 100 block from now, but from the highest block in the dataset.

In [72]:
model = torch.jit.load('./lstm-feature-7.pt')
mean_train = torch.load('./mean_train.pt')
std_train = torch.load('./std_train.pt')

In [73]:
def get_predicted_gas_price(block_nr, address):
    gas_prices_last_15_transactions = get_gas_price_of_last_n_transactions(15, block_nr, address)
    
    model.eval()
    with torch.no_grad():
        tensor = torch.tensor(gas_prices_last_15_transactions).view(1,1,15)
        predicted_curr_gas_price = model(tensor)[:, -1].item()
        
        # transform back
        return predicted_curr_gas_price * std_train.item() + mean_train.item()
    
    
def get_gas_price_of_last_n_transactions(n, block_nr, address):
    gas_prices = []
    index_curr_transaction = None
    block = web3.eth.get_block(block_nr, full_transactions=True)
    transactions = block.transactions
    for index, transaction in enumerate(transactions):
            if transaction["from"] == address:
                index_curr_transaction = index
    
    if index_curr_transaction > n:
        for i in range(index_curr_transaction - n, index_curr_transaction):
            gas_prices.append(convert_from_wei_to_gwei(transaction["gasPrice"]))
    else:
        # prepend more transaction from previous blocks
        curr_block = block_nr - 1
        while len(gas_prices) < n :
            block = web3.eth.get_block(block_nr, full_transactions=True)
            
            for transaction in reversed(block.transactions):
                gas_prices.insert(0, convert_from_wei_to_gwei(transaction["gasPrice"]))
                if len(gas_prices) == n:
                    break
            curr_block -= 1
    return gas_prices

### Putting all together

In [92]:
def extract_n_entries_insertion_attack(csv_column: str, nr_of_entries) -> pd.DataFrame:
    
    entries = []
    
    for index, entry in tqdm(df_insertion.sample(nr_of_entries).iterrows()):
        
        block_nr = entry["Block Number"]
        address = entry[csv_column]
        
        transaction = get_transaction_by_sender_and_block_nr(block_nr, address, web3)
        transaction_hash = transaction["hash"].hex()
        
        mean_gas_price_last_10_blocks, std_gas_price_last_10_blocks = get_mean_and_std_gas_price_of_last_n_blocks(10, block_nr, web3)
        
        mean_gas_price_last_n_blocks_same_EOA, std_gas_price_last_n_blocks_same_EOA = get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(20, block_nr, address, web3)
        
        new_entry = {
            "blockNumber": block_nr,
            "address": address,
            "transactionHash": transaction_hash,
            "gasPrice": get_transaction_gas_price_in_gwei(transaction, web3),
            "meanGasPriceLastTenBlocks": mean_gas_price_last_10_blocks,
            "stdGasPriceLastTenBlocks": std_gas_price_last_10_blocks,
            "meanGasPriceLastTenBlocksSameEOA": mean_gas_price_last_n_blocks_same_EOA,
            "stdGasPriceLastTenBlocksSameEOA": std_gas_price_last_n_blocks_same_EOA,
            "usedGasToken": is_transaction_using_gas_token(transaction_hash),
            "predictedGasPrice": get_predicted_gas_price(block_nr, address)
        }
        entries.append(new_entry)
    
    return pd.DataFrame(entries) 

**Feature Extraction First Attacker**

In [93]:
#feature_insertion_first_atk_df = extract_n_entries_insertion_attack(csv_column="First Attacker", nr_of_entries=10)
#feature_insertion_first_atk_df.to_csv('../data/insertion_atks_first_atk.csv')
#feature_insertion_first_atk_df

10it [02:06, 12.68s/it]


Unnamed: 0,blockNumber,address,gasPrice,meanGasPriceLastTenBlocks,stdGasPriceLastTenBlocks,meanGasPriceLastTenBlocksSameEOA,stdGasPriceLastTenBlocksSameEOA,usedGasToken,predictedGasPrice
0,10660145,0xaaa2e80AB7D7b3C216af30Fc8165E7823e74cc62,157.1,187.741131,44.03462,188.05,43.84065,False,170.477368
1,10627182,0x25b2A252C07A1e930a0bc87Cf250ca6DF007eA84,140.0,101.178055,48.743137,140.0,1.499998e-09,True,169.691838
2,11158728,0xa21caEbD27a296678176aC886735bfd18F875B8f,321.000031,107.708466,1199.887252,4832.590195,11835.62,False,171.009181
3,10938352,0xa21caEbD27a296678176aC886735bfd18F875B8f,85.0,76.978745,39.9692,85.0,7.000004e-09,False,171.881307
4,10816188,0x30b8235f492265A734347C0bF36E2FFcAD887be2,168.218613,110.771239,57.38116,136.829653,30.36238,False,169.46357
5,10628329,0xe4Ec5Ba53cAEcFE979570d5396d1d2dc5e6c3BD5,96.8,59.400777,26.772247,96.8,1.499998e-09,False,159.949819
6,11065112,0xa21caEbD27a296678176aC886735bfd18F875B8f,84.0,75.664731,22.638317,95.5,12.51998,False,172.073812
7,10346807,0x4ed50D27bae39043754AF35572958BeCe22f2C2A,391.696152,39.066554,89.167082,391.696152,0.0,True,172.050478
8,10960354,0xe4Ec5Ba53cAEcFE979570d5396d1d2dc5e6c3BD5,122.0,67.009696,63.745134,85.5,21.46509,False,171.965535
9,10718278,0x86254Cb5A96C161E503d3255d67Fdddec056FEfe,90.0,73.378585,28.359983,90.0,0.0,False,172.48381


**Feature Extraction Second Attacker**


In [None]:
#feature_insertion_second_atk_df = extract_n_entries_insertion_attack(csv_column="Second Attacker", nr_of_entries=30)
#feature_insertion_second_atk_df.to_csv('../data/insertion_atks_second_atk.csv')

**Feature Extraction Whale/Victim**

In [15]:
#feature_insertion_whale_txs_df = extract_n_entries_insertion_attack(csv_column="Whale", nr_of_entries=100)


1it [00:12, 12.48s/it]


KeyboardInterrupt: 

### Feature extraction - random transactions 

Function to get n random block numbers out of the blocks in which the attacks where collected.

In [94]:
def get_n_random_block_numbers_from_insertion_data(n: int):
    
    unique_block_numbers = df_insertion["Block Number"].unique()
    random_block_numbers = pd.Series(unique_block_numbers).sample(n=n).tolist()
    return random_block_numbers

Function to get n random blocks in the block-range of the sampled attacks.

In [95]:
def get_n_random_block_numbers_from_insertion_data_range(n: int):
        
    min_block_number = df_insertion["Block Number"].min()
    max_block_number = df_insertion["Block Number"].max()
    
    block_number_range = range(min_block_number, max_block_number + 1)
    
    random_block_numbers = random.choices(block_number_range, k=n)
    return random_block_numbers

Function to get n random blocks over all blocks on ethereum main net.

In [96]:
def get_n_random_block_numbers_from_total_block_range(n: int):
        
    current_block_number = web3.eth.block_number
    
    block_number_range = range(1, current_block_number + 1)
    
    random_block_numbers = random.choices(block_number_range, k=n)
    return random_block_numbers

Function to get random transaction out of a block.

In [97]:
def address_in_insertion_data(block_number: int, address: str):
    
    if block_number not in df_insertion["Block Number"].values:
        return False
    
    df_subset_with_block_number = df_insertion[df_insertion["Block Number"] == block_number]
    
    if any(address in df_subset_with_block_number[col].values for col in ["First Attacker", "Second Attacker", "Whale"]):
        return True
    
    return False    

In [98]:
def get_random_transaction_from_block(block_number: int):
    
    # Retrieve txs from block
    block = web3.eth.get_block(block_number)
    transactions = block['transactions']
    
    if len(transactions) == 0:
        print(f"Block {block_number} has no transactions!")
        return None
    
    # Choose a random transaction from block
    random_transaction_hash = random.choice(transactions)    
    random_transaction = web3.eth.get_transaction(random_transaction_hash)
    
    if address_in_insertion_data(block_number, random_transaction["from"]):
        print("Randomly sampled transaction already in data set!")
        get_random_transaction_from_block(block_number)
    
    return random_transaction

### Sample random transactions

In [101]:
def sample_random_transaction(nr_of_random_blocks: int) -> pd.DataFrame:
    
    # Get random block numbers
    block_numbers = []
    
    random_blocks_per_sampling = int(nr_of_random_blocks/3)
    
    random_block_numbers_from_insertion_data = get_n_random_block_numbers_from_insertion_data(random_blocks_per_sampling)
    random_block_numbers_from_insertion_data_range = get_n_random_block_numbers_from_insertion_data_range(random_blocks_per_sampling)
    random_block_numbers_from_total_range = get_n_random_block_numbers_from_total_block_range(random_blocks_per_sampling)

    block_numbers.extend(random_block_numbers_from_insertion_data)
    block_numbers.extend(random_block_numbers_from_insertion_data_range)
    block_numbers.extend(random_block_numbers_from_total_range)
    
    # Get random transactions out of blocks
    entries = []
    
    
    for block in tqdm(block_numbers):
        transaction = get_random_transaction_from_block(block)
        if transaction:
            mean_gas_price_last_10_blocks, std_gas_price_last_10_blocks = get_mean_and_std_gas_price_of_last_n_blocks(10, block, web3)
            mean_gas_price_last_n_blocks_same_EOA, std_gas_price_last_n_blocks_same_EOA = get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(20, block, transaction["from"], web3)
            
            address = transaction["from"]
            transaction_hash = transaction["hash"].hex()

            
            new_entry = {
                "blockNumber": block,
                "address": address,
                "transactionHash": transaction_hash,
                "gasPrice": get_transaction_gas_price_in_gwei(transaction, web3),
                "meanGasPriceLastTenBlocks": mean_gas_price_last_10_blocks,
                "stdGasPriceLastTenBlocks": std_gas_price_last_10_blocks,
                "meanGasPriceLastTenBlocksSameEOA": mean_gas_price_last_n_blocks_same_EOA,
                "stdGasPriceLastTenBlocksSameEOA": std_gas_price_last_n_blocks_same_EOA,
                "usedGasToken": is_transaction_using_gas_token(transaction_hash),
                "predictedGasPrice": get_predicted_gas_price(block, address)
            }
            entries.append(new_entry)
                
    return pd.DataFrame(entries) 
    

In [102]:
#feature_random_transactions_df = sample_random_transaction(10)
#feature_random_transactions_df.to_csv('../data/random_sampled_transactions.csv')

100%|██████████| 9/9 [01:34<00:00, 10.47s/it]

Block 3389621 has no transactions!





In [103]:
#feature_random_transactions_df

Unnamed: 0,blockNumber,address,gasPrice,meanGasPriceLastTenBlocks,stdGasPriceLastTenBlocks,meanGasPriceLastTenBlocksSameEOA,stdGasPriceLastTenBlocksSameEOA,usedGasToken,predictedGasPrice
0,11063005,0x2F0F84173FBf597aE56e24cCacc3ef22C0c550d0,53.0,73.731292,405.425033,53.0,0.0,False,173.997202
1,10657571,0x94268A0c522eC1b46C23B4e60ef942EABc94D45C,162.796906,220.052025,79.493293,162.796906,0.0,False,170.36096
2,10547013,0xCD60E0a1D34879A28322a3244D48c4a081421B34,69.0,73.315605,25.559162,69.0,0.0,False,172.191
3,6449262,0xf39F6032A876D7ea1001C2c96416cE88f24E1b9c,12.8,15.397848,21.25373,12.8,0.0,False,164.683731
4,6041371,0x84681Cb4134F53784A6f30f8157adc628be4dB64,1.0,7.986379,17.288979,1.0,0.0,False,65.647157
5,8842099,0xa3dB009E03A2177B13fC4EA41045d0Fd34026cA9,1.0,13.905568,11.946664,1.0,0.0,False,65.647157
6,17499124,0xDe8dE160A7aa8DCFc0b711B77464F0a5BF32fd3F,15.235994,18.209321,18.695738,15.235994,0.0,False,172.2958
7,17373031,0x72cDebCfBA7e1c00b80801e157573428142433e2,55.8,62.449696,43.495257,55.8,0.0,False,172.542602
