## Features needed for training
1. Gas Price of the Transaction
2. Mean Gas Price of Transactions in the Last 10 Blocks
3.  Standard Deviation of Gas Price of Transactions in the
Last Ten Blocks
4. Mean Gas Price of Transactions by the same EOA (externally owned account)
5. Standard Deviation of Gas Price in Transactions by the
same EOA
6. Usage of Gas Tokens
7. Predicted Gas Price

In [49]:
from web3 import Web3
from tqdm import tqdm
import pandas as pd
import numpy as np
import random
import torch

In [50]:
web3 = Web3(Web3.HTTPProvider("https://intensive-sly-mountain.quiknode.pro/a3f5256d7f2af6541d483cce3f1d49c94c01879e/"))
print(web3.is_connected())

True


In [51]:
df_insertion = pd.read_csv ('../data/insertion_attacks.csv', delimiter=',')
print('# of insertion: ', len(df_insertion))

# of insertion:  196691


### Insertion-Dataset Exploration

Explore occurrences of same wallet and same block:

In [52]:
columns = ["First Attacker", "Whale", "Second Attacker"]

for col in columns:
    occurrences = df_insertion.groupby(['Block Number', col]).size().reset_index(name='Count')
    nr_rows_with_same_address_and_block_nr = occurrences[occurrences['Count'] > 1].shape[0]
    print(f"Number of rows in {col} with same address and block number: {nr_rows_with_same_address_and_block_nr}")

Number of rows in First Attacker with same address and block number: 17
Number of rows in Whale with same address and block number: 5730
Number of rows in Second Attacker with same address and block number: 17


In [53]:
concatenated_values = np.vstack((df_insertion[['Block Number', "First Attacker"]].values,
                                df_insertion[['Block Number', "Whale"]].values,
                                df_insertion[['Block Number', "Second Attacker"]].values))

concatenated_df = pd.DataFrame(concatenated_values, columns=['Block Number', 'Address'])
concatenated_df

Unnamed: 0,Block Number,Address
0,5599805,0xfF1b9745f68F84F036E5e92c920038d895FB701A
1,5574870,0x4fCc2FF6c75923D33B4F5aF4C524461014B2EE1C
2,5599933,0xfF1b9745f68F84F036E5e92c920038d895FB701A
3,5625057,0xfF1b9745f68F84F036E5e92c920038d895FB701A
4,5625138,0xfF1b9745f68F84F036E5e92c920038d895FB701A
...,...,...
590068,11022873,0x18a85f755F7508f9CEe4698003a8F5D1e231AB22
590069,11022878,0x12b967bba40E3220e9697B5fdBb75BA32626E1FC
590070,11022880,0xdB9c8428e68a1E6B2244C92127D2dcD708F893f4
590071,11022881,0xe4Ec5Ba53cAEcFE979570d5396d1d2dc5e6c3BD5


In [54]:
# Get unique rows
unique_df = concatenated_df.drop_duplicates()

print(f"{unique_df.shape[0]} unique blockNr-Address pairs")

473515 unique blockNr-Address pairs


In [55]:
unique_addresses = concatenated_df["Address"].unique()
print(f"{unique_addresses.shape[0]} unique addresses")

41350 unique addresses


Explore of duplicated occurrences of block number

In [56]:
occurrences= df_insertion.groupby(['Block Number']).size().reset_index(name='Count')
nr_rows_with_same_block_nr = occurrences[occurrences['Count'] > 1].shape[0]
print(f"Number of rows with same block number: {nr_rows_with_same_block_nr}")

Number of rows with same block number: 24929


In [57]:
unique_blocks = df_insertion["Block Number"].unique()
print(f"{unique_blocks.shape[0]} unique block numbers")

167503 unique block numbers


### Feature 1
Gas price of transaction

In [58]:
def get_transaction_gas_price_in_eth_by_sender_and_block_nr(block_number, sender, web3):
    block = web3.eth.get_block(block_number, full_transactions=True)
    
    for transaction in block.transactions:        
        if transaction["from"] == sender:
            return (transaction["gasPrice"]) / 10**18

get_transaction_gas_price_in_eth_by_sender_and_block_nr(5599805, '0xFF28319a7cD2136ea7283E7cDb0675B50AC29Dd2', web3)

9.97e-09

### Feature 2 and 3
- Mean Gas Price of Transactions in the Last 10 Blocks
- Standard Deviation of Gas Price of Transactions in the Last 10 Blocks

In [59]:
def get_mean_and_std_gas_price_of_last_n_blocks(last_n_blocks, curr_block, web3):
    web3.eth.get_block(curr_block)
    
    gas_prices = []
    for i in range(last_n_blocks):
        block = web3.eth.get_block(curr_block - i, full_transactions=True)
        
        for transaction in block.transactions:
            gas_prices.append(transaction["gasPrice"] / 10**18)
    
    return np.mean(gas_prices), np.std(gas_prices)

### Feature 4
- Mean Gas Price of Transactions by the same EOA (externally owned account)
- Standard Deviation of Gas Price in Transactions by the
same EOA

In [60]:
# prepare data for insertion

def get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(last_n_blocks, curr_block, eoa_address, web3):
    web3.eth.get_block(curr_block)
    
    gas_prices = []
    for i in range(last_n_blocks):
        block = web3.eth.get_block(curr_block - i, full_transactions=True)
        
        for transaction in block.transactions:
            if transaction["from"] == eoa_address:
                gas_prices.append((transaction["gasPrice"]) / 10**18)
    return np.mean(gas_prices), np.std(gas_prices)

### Feature 6
Usage of gas tokens -> check if they use the gas token address

In [80]:
def is_transaction_using_gas_token(block_number, address, web3):
    block = web3.eth.get_block(block_number, full_transactions=True)
    
    for transaction in block.transactions:        
        if transaction["from"] == address and (transaction["to"] == '0x0000000000b3F879cb30FE243b4Dfee438691c04' or transaction["to"] == '0x88d60255F917e3eb94eaE199d827DAd837fac4cB'):
            return True
    return False

### Feature 7
Predicted gas price --> train modell for this

Since the data was collected 3 years ago, we don't predict the gas-price trained on the gas-prices of the last 100 block from now, but from the highest block in the dataset.

In [62]:
model = torch.jit.load('./lstm-feature-7.pt')
mean_train = torch.load('./mean_train.pt')
std_train = torch.load('./std_train.pt')

In [75]:
def get_predicted_gas_price(block_nr, address):
    gas_prices_last_15_transactions = get_gas_price_of_last_n_transactions(15, block_nr, address)
    
    model.eval()
    with torch.no_grad():
        tensor = torch.tensor(gas_prices_last_15_transactions).view(1,1,15)
        predicted_curr_gas_price = model(tensor)[:, -1].item()
        
        # transform back
        return predicted_curr_gas_price * std_train.item() + mean_train.item()
    
    
def get_gas_price_of_last_n_transactions(n, block_nr, address):
    gas_prices = []
    index_curr_transaction = None
    block = web3.eth.get_block(block_nr, full_transactions=True)
    transactions = block.transactions
    for index, transaction in enumerate(transactions):
            if transaction["from"] == address:
                index_curr_transaction = index
    
    if index_curr_transaction > n:
        for i in range(index_curr_transaction - n, index_curr_transaction):
            gas_prices.append(transactions[i]["gasPrice"] / 10**18)
    else:
        # prepend more transaction from previous blocks
        curr_block = block_nr - 1
        while len(gas_prices) < n :
            block = web3.eth.get_block(block_nr, full_transactions=True)
            
            for transaction in reversed(block.transactions):
                gas_prices.insert(0, transaction["gasPrice"] / 10**18)
                if len(gas_prices) == n:
                    break
            curr_block -= 1
    return gas_prices

### Putting all together

In [78]:
def extract_n_entries_insertion_attack(csv_column: str, nr_of_entries) -> pd.DataFrame:
    
    entries = []
    
    for index, entry in tqdm(df_insertion.sample(nr_of_entries).iterrows()):
        block_nr = entry["Block Number"]
        address = entry[csv_column]
        
        mean_gas_price_last_10_blocks, std_gas_price_last_10_blocks = get_mean_and_std_gas_price_of_last_n_blocks(10, block_nr, web3)
        
        mean_gas_price_last_n_blocks_same_EOA, std_gas_price_last_n_blocks_same_EOA = get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(20, block_nr, address, web3)
        
        new_entry = {
            "blockNumber": block_nr,
            "address": address,
            "gasPrice": get_transaction_gas_price_in_eth_by_sender_and_block_nr(block_nr, address, web3),
            "meanGasPriceLastTenBlocks": mean_gas_price_last_10_blocks,
            "stdGasPriceLastTenBlocks": std_gas_price_last_10_blocks,
            "meanGasPriceLastTenBlocksSameEOA": mean_gas_price_last_n_blocks_same_EOA,
            "stdGasPriceLastTenBlocksSameEOA": std_gas_price_last_n_blocks_same_EOA,
            "usedGasToken": is_transaction_using_gas_token(block_nr, address, web3),
            "predictedGasPrice": get_predicted_gas_price(block_nr, address)
        }
        entries.append(new_entry)
    
    return pd.DataFrame(entries) 

**Feature Extraction First Attacker**

In [None]:
feature_insertion_first_atk_df = extract_n_entries_insertion_attack(csv_column="First Attacker", nr_of_entries=30)
feature_insertion_first_atk_df.to_csv('../data/insertion_atks_first_atk.csv')
feature_insertion_second_atk_df = extract_n_entries_insertion_attack(csv_column="Second Attacker", nr_of_entries=30)
feature_insertion_second_atk_df.to_csv('../data/insertion_atks_second_atk.csv')

9it [01:22,  9.40s/it]

**Feature Extraction Second Attacker**


**Feature Extraction Whale/Victim**

In [15]:
feature_insertion_whale_txs_df = extract_n_entries_insertion_attack(csv_column="Whale", nr_of_entries=100)


1it [00:12, 12.48s/it]


KeyboardInterrupt: 

### Feature extraction - random transactions 

Function to get n random block numbers out of the blocks in which the attacks where collected.

In [16]:
def get_n_random_block_numbers_from_insertion_data(n: int):
    
    unique_block_numbers = df_insertion["Block Number"].unique()
    random_block_numbers = pd.Series(unique_block_numbers).sample(n=n).tolist()
    return random_block_numbers

Function to get n random blocks in the block-range of the sampled attacks.

In [17]:
def get_n_random_block_numbers_from_insertion_data_range(n: int):
        
    min_block_number = df_insertion["Block Number"].min()
    max_block_number = df_insertion["Block Number"].max()
    
    block_number_range = range(min_block_number, max_block_number + 1)
    
    random_block_numbers = random.choices(block_number_range, k=n)
    return random_block_numbers

Function to get n random blocks over all blocks on ethereum main net.

In [18]:
def get_n_random_block_numbers_from_total_block_range(n: int):
        
    current_block_number = web3.eth.block_number
    
    block_number_range = range(1, current_block_number + 1)
    
    random_block_numbers = random.choices(block_number_range, k=n)
    return random_block_numbers

Function to get random transaction out of a block.

In [19]:
def address_in_insertion_data(block_number: int, address: str):
    
    if block_number not in df_insertion["Block Number"].values:
        return False
    
    df_subset_with_block_number = df_insertion[df_insertion["Block Number"] == block_number]
    
    if any(address in df_subset_with_block_number[col].values for col in ["First Attacker", "Second Attacker", "Whale"]):
        return True
    
    return False    

In [20]:
def get_random_transaction_from_block(block_number: int):
    
    # Retrieve txs from block
    block = web3.eth.get_block(block_number)
    transactions = block['transactions']
    
    if len(transactions) == 0:
        print(f"Block {block_number} has no transactions!")
        return None
    
    # Choose a random transaction from block
    random_transaction_hash = random.choice(transactions)    
    random_transaction = web3.eth.get_transaction(random_transaction_hash)
    
    if address_in_insertion_data(block_number, random_transaction["from"]):
        print("Randomly sampled transaction already in data set!")
        get_random_transaction_from_block(block_number)
    
    return random_transaction

In [21]:
get_random_transaction_from_block(100)

Block 100 has no transactions!


### Sample random transactions

In [28]:
def sample_random_transaction(nr_of_random_blocks: int) -> pd.DataFrame:
    
    # Get random block numbers
    block_numbers = []
    
    random_blocks_per_sampling = int(nr_of_random_blocks/3)
    
    random_block_numbers_from_insertion_data = get_n_random_block_numbers_from_insertion_data(random_blocks_per_sampling)
    random_block_numbers_from_insertion_data_range = get_n_random_block_numbers_from_insertion_data_range(random_blocks_per_sampling)
    random_block_numbers_from_total_range = get_n_random_block_numbers_from_total_block_range(random_blocks_per_sampling)

    block_numbers.extend(random_block_numbers_from_insertion_data)
    block_numbers.extend(random_block_numbers_from_insertion_data_range)
    block_numbers.extend(random_block_numbers_from_total_range)
    
    # Get random transactions out of blocks
    entries = []
    
    
    for block in tqdm(block_numbers):
        transaction = get_random_transaction_from_block(block)
        if transaction:
            mean_gas_price_last_10_blocks, std_gas_price_last_10_blocks = get_mean_and_std_gas_price_of_last_n_blocks(10, block, web3)
            mean_gas_price_last_n_blocks_same_EOA, std_gas_price_last_n_blocks_same_EOA = get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(20, block, transaction["from"], web3)
            
            address = transaction["from"]
            
            new_entry = {
                "blockNumber": block,
                "address": address,
                "gasPrice": transaction["gasPrice"] / 10**18,
                "meanGasPriceLastTenBlocks": mean_gas_price_last_10_blocks,
                "stdGasPriceLastTenBlocks": std_gas_price_last_10_blocks,
                "meanGasPriceLastTenBlocksSameEOA": mean_gas_price_last_n_blocks_same_EOA,
                "stdGasPriceLastTenBlocksSameEOA": std_gas_price_last_n_blocks_same_EOA,
                "usedGasToken": is_transaction_using_gas_token(block, address, web3),
                "predictedGasPrice": get_predicted_gas_price(block, address)
            }
            entries.append(new_entry)
                
    return pd.DataFrame(entries) 
    

In [29]:
feature_random_transactions_df = sample_random_transaction(10)

100%|██████████| 9/9 [01:26<00:00,  9.65s/it]


In [30]:
feature_random_transactions_df

Unnamed: 0,blockNumber,address,gasPrice,meanGasPriceLastTenBlocks,stdGasPriceLastTenBlocks,meanGasPriceLastTenBlocksSameEOA,stdGasPriceLastTenBlocksSameEOA,usedGasToken
0,10972278,0x22300dAe061ab0870Fc8503822e096816e2dD951,6.459092e-08,7.35222e-08,5.901775e-08,6.459092e-08,0.0,False
1,11022614,0xe62193Bc1c340EF2205C0Bd71691Fad5e5072253,2.257465e-07,9.166672e-08,1.221066e-07,8.296688e-07,5.992909e-07,False
2,11248573,0x20Dc0b9520CC2C2BE89F247061A2c8e310045949,5.14001e-08,5.91576e-08,2.303706e-08,5.14001e-08,1.3234890000000002e-23,False
3,9974388,0x44121903d6938E440837195aB554cACD1742d63d,2e-08,2.123011e-08,1.253226e-08,2e-08,0.0,False
4,10904225,0x9408faF5C34371636FD27107Cb8241D0978BF0c7,1.19e-07,1.340913e-07,4.692489e-08,1.19e-07,0.0,False
5,6495420,0x63AB0507f208248166F697AEfAe76A9Ba24ed6A6,6.2e-08,1.283176e-08,1.627956e-08,6.2e-08,0.0,False
6,10698926,0x275E6D0977AB69D68cEfE943755Ec34082202a05,1.59e-07,1.034696e-07,4.821043e-08,1.29e-07,3e-08,False
7,2464844,0x2a65Aca4D5fC5B5C859090a6c34d164135398226,2.5e-08,3.38178e-08,1.373328e-08,2.5e-08,6.617445e-24,False
8,8273900,0xd5e015739a8BEffF075C4eAA2013D27Df35FFDdc,1e-09,1.566779e-08,7.595185e-08,1e-09,0.0,False
