## Features needed for training
1. Gas Price of the Transaction
2. Mean Gas Price of Transactions in the Last 10 Blocks
3.  Standard Deviation of Gas Price of Transactions in the
Last Ten Blocks
4. Mean Gas Price of Transactions by the same EOA (externally owned account)
5. Standard Deviation of Gas Price in Transactions by the
same EOA
6. Usage of Gas Tokens
7. Predicted Gas Price

In [158]:
from web3 import Web3
from tqdm import tqdm
import pandas as pd
import numpy as np
import random

In [159]:
web3 = Web3(Web3.HTTPProvider("https://intensive-sly-mountain.quiknode.pro/a3f5256d7f2af6541d483cce3f1d49c94c01879e/"))
print(web3.is_connected())

True


In [160]:
df_insertion = pd.read_csv ('../data/insertion_attacks.csv', delimiter=',')
#print(df_insertion.describe())
#print(df_insertion)
print('# of insertion: ', len(df_insertion))

# of insertion:  196691


### Feature 1
Gas price of transaction

In [161]:
def get_transaction_gas_price_in_eth_by_sender_and_block_nr(block_number, sender, web3):
    block = web3.eth.get_block(block_number, full_transactions=True)
    
    for transaction in block.transactions:        
        if transaction["from"] == sender:
            return (transaction["gasPrice"]) / 10**18

get_transaction_gas_price_in_eth_by_sender_and_block_nr(5599805, '0xFF28319a7cD2136ea7283E7cDb0675B50AC29Dd2', web3)

9.97e-09

### Feature 2 and 3
- Mean Gas Price of Transactions in the Last 10 Blocks
- Standard Deviation of Gas Price of Transactions in the Last 10 Blocks

In [162]:
def get_mean_and_std_gas_price_of_last_n_blocks(last_n_blocks, curr_block, web3):
    web3.eth.get_block(curr_block)
    
    gas_prices = []
    for i in range(last_n_blocks):
        block = web3.eth.get_block(curr_block - i, full_transactions=True)
        
        for transaction in block.transactions:
            gas_prices.append(transaction["gasPrice"] / 10**18)
    return np.mean(gas_prices), np.std(gas_prices)

### Feature 4
- Mean Gas Price of Transactions by the same EOA (externally owned account)
- Standard Deviation of Gas Price in Transactions by the
same EOA

In [163]:
# prepare data for insertion

def get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(last_n_blocks, curr_block, eoa_address, web3):
    web3.eth.get_block(curr_block)
    
    gas_prices = []
    for i in range(last_n_blocks):
        block = web3.eth.get_block(curr_block - i, full_transactions=True)
        
        for transaction in block.transactions:
            if transaction["from"] == eoa_address:
                gas_prices.append((transaction["gasPrice"]) / 10**18)
    return np.mean(gas_prices), np.std(gas_prices)

### Feature 6
Usage of gas tokens -> check if they use the gas token address

In [164]:
def is_transaction_using_gas_token(block_number, address, web3):
    block = web3.eth.get_block(block_number, full_transactions=True)
    
    for transaction in block.transactions:        
        if transaction["from"] == address and transaction["to"] == '0x0000000000b3F879cb30FE243b4Dfee438691c04':
            return True
    return False

### Feature 7
Predicted gas price --> train modell for this

Since the data was collected 3 years ago, we don't predict the gas-price trained on the gas-prices of the last 100 block from now, but from the highest block in the dataset.

In [165]:
def get_max_block_number() -> int:
    return int(df_insertion["Block Number"].max())

In [166]:
df_insertion[df_insertion["Block Number"] >= get_max_block_number()]

Unnamed: 0,Block Number,First Attacker,Whale,Second Attacker,First-Whale Gas Price Delta,Whale-Second Gas Price Delta,Profit,Cost,Interface,Exchange,Token,Bot,Bot Cluster ID
195114,11299998,0xeD1cAa76a8cb7E671654A6c816a753acE6217BAE,0xf384aa91c1709Ec28b7422A2897E74f2d61036Cd,0xeD1cAa76a8cb7E671654A6c816a753acE6217BAE,10.09376,0.0,5.022881,4.400952,bot,Uniswap V2,yfi.group,0xEB46faa47A6A52519839A2e52c7b28a2Db17651e,37.0


In [167]:
def get_gas_prices_from_last_100_blocks(web3: Web3) -> list[int]:
    
    highest_block = get_max_block_number()    
    
    gas_prices = []
    for i in range(100):
        block = web3.eth.get_block(highest_block - i, full_transactions=True)
        
        for transaction in block.transactions:
            gas_prices.append(transaction["gasPrice"] / 10**18) # convertion from wei to eth
        
    return gas_prices

In [168]:
gas_prices_last_100_blocks = get_gas_prices_from_last_100_blocks(web3)

KeyboardInterrupt: 

### Putting all together

In [None]:
def extract_features_insertion_attack(csv_column: str) -> pd.DataFrame:
    
    entries = []
    
    for index, entry in tqdm(df_insertion.iterrows()):
        block_nr = entry["Block Number"]
        address = entry[csv_column]
        
        mean_gas_price_last_10_blocks, std_gas_price_last_10_blocks = get_mean_and_std_gas_price_of_last_n_blocks(10, block_nr, web3)
        
        mean_gas_price_last_n_blocks_same_EOA, std_gas_price_last_n_blocks_same_EOA = get_mean_and_std_gas_price_of_last_n_blocks_of_same_EOA(20, block_nr, address, web3)
        
        new_entry = {
            "blockNumber": block_nr,
            "address": address,
            "gasPrice": get_transaction_gas_price_in_eth_by_sender_and_block_nr(block_nr, address, web3),
            "meanGasPriceLastTenBlocks": mean_gas_price_last_10_blocks,
            "stdGasPriceLastTenBlocks": std_gas_price_last_10_blocks,
            "meanGasPriceLastTenBlocksSameEOA": mean_gas_price_last_n_blocks_same_EOA,
            "stdGasPriceLastTenBlocksSameEOA": std_gas_price_last_n_blocks_same_EOA,
            "usedGasToken": is_transaction_using_gas_token(block_nr, address, web3)
        }
        entries.append(new_entry)
    
    return pd.DataFrame(entries) 

**Feature Extraction First Attacker**

In [None]:
feature_insertion_first_atk_df = extract_features_insertion_attack(csv_column="First Attacker")

**Feature Extraction Second Attacker**


In [None]:
feature_insertion_second_atk_df = extract_features_insertion_attack(csv_column="Second Attacker")

**Feature Extraction Whale/Victim**

In [None]:
feature_insertion_whale_txs_df = extract_features_insertion_attack(csv_column="Whale")


### Feature extraction - random transactions 

Function to get n random block numbers out of the blocks in which the attacks where collected.

In [169]:
def get_n_random_block_numbers_from_insertion_data(n: int):
    
    unique_block_numbers = df_insertion["Block Number"].unique()
    random_block_numbers = pd.Series(unique_block_numbers).sample(n=n).tolist()
    return random_block_numbers

Function to get n random blocks in the block-range of the sampled attacks.

In [170]:
def get_n_random_block_numbers_from_insertion_data_range(n: int):
        
    min_block_number = df_insertion["Block Number"].min()
    max_block_number = df_insertion["Block Number"].max()
    
    block_number_range = range(min_block_number, max_block_number + 1)
    
    random_block_numbers = random.choices(block_number_range, k=n)
    return random_block_numbers

Function to get n random blocks over all blocks on ethereum main net.

In [171]:
def get_n_random_block_numbers_from_total_block_range(n: int):
        
    current_block_number = web3.eth.block_number
    
    block_number_range = range(1, current_block_number + 1)
    
    random_block_numbers = random.choices(block_number_range, k=n)
    return random_block_numbers

Function to get random transaction out of a block.

In [172]:
def address_in_insertion_data(block_number: int, address: str):
    
    if block_number not in df_insertion["Block Number"].values:
        return False
    
    df_subset_with_block_number = df_insertion[df_insertion["Block Number"] == block_number]
    
    if any(address in df_subset_with_block_number[col].values for col in ["First Attacker", "Second Attacker", "Whale"]):
        return True
    
    return False    

In [173]:
def get_random_transaction_from_block(block_number: int):
    
    # Retrieve txs from block
    block = web3.eth.get_block(block_number)
    transactions = block['transactions']
    
    if len(transactions) == 0:
        print(f"Block {block_number} has no transactions!")
        return None
    
    # Choose a random transaction from block
    random_transaction_hash = random.choice(transactions)    
    random_transaction = web3.eth.get_transaction(random_transaction_hash)
    
    if address_in_insertion_data(block_number, random_transaction["from"]):
        print("Randomly sampled transaction already in data set!")
        get_random_transaction_from_block(block_number)
    
    return random_transaction_hash

In [None]:
get_random_transaction_from_block(100)

### Sample random transactions

In [174]:
def sample_random_transaction():
    
    # Get random block numbers
    block_numbers = []
    
    random_block_numbers_from_insertion_data = get_n_random_block_numbers_from_insertion_data(10)
    random_block_numbers_from_insertion_data_range = get_n_random_block_numbers_from_insertion_data_range(10)
    random_block_numbers_from_total_range = get_n_random_block_numbers_from_total_block_range(10)

    block_numbers.extend(random_block_numbers_from_insertion_data)
    block_numbers.extend(random_block_numbers_from_insertion_data_range)
    block_numbers.extend(random_block_numbers_from_total_range)
    
    # Get random transactions out of blocks
    transactions = []
    
    for block in block_numbers:
        transaction = get_random_transaction_from_block(block)
        if transaction:
            transactions.append(transaction)
    
    return transactions
    

In [175]:
random_transactions = sample_random_transaction()

Block 2428110 has no transactions!


In [176]:
random_transactions[:5]

[HexBytes('0x0af5e34951ff18a984e598fb535ceb3d06af79a5684ddaa6e0e167292fcc9723'),
 HexBytes('0x05a270a01e984fcf9f9e90e02abe5a998b0ead996fd0a732f3552ac0f1e86133'),
 HexBytes('0xc59f1a9bf9bddb94867b5d83e577fb228957227622b69d741dbc6845220cd680'),
 HexBytes('0x72571ac56759e7474346c67fb89b7e58614e8e264ae09f5d43827f0f73b92e11'),
 HexBytes('0x2b55e07f5a89fd09c3c85b18c83d91516a98a447185cf276e2feab4a8cb3d13b')]