# Calculating potential loss of transaction intent leakage

For detailed information on methodology, instructions and requirements, see `README.md`


In [1]:
import pandas as pd
import numpy as np 
import json
import requests 
from configurations import *

In [18]:
df_main = pd.read_csv(csv_file_path, usecols=['user_tx'])
#drop the duplicate transaction in case there is any
df_main = df_main.drop_duplicates(subset=['user_tx'])
df_main.head()

Unnamed: 0,user_tx
0,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...
1,0x354e8386267ca643793de913739df3f9680895776f3a...
2,0xee8fd2c76181afa14ca0da158e0a01bba2d3df8e62c5...
3,0x5f3954cc3cb4fbb88803a910d852aab6566af2866acf...
4,0xbd9c86df3327871d630a0921c65cb7bec9b0c199d80c...


In [19]:
#make a list of transaction hash we need to analyse
tx_hash_list = [x for x in df_main['user_tx'].to_list() if pd.notnull(x)]
print(f'There are {len(tx_hash_list)} transactions')

There are 37 transactions


## API calls - skip this if you already made them 

### infura
With this API, we want to get all the inputs necessary to simulate the transaction again later on. infura gives us all of these inputs except fot the timestamp of the transaction, which is why we need to use the Etherscan API later on.

#### Call infura for the first transaction

In [9]:
# create a df to store all the potenatially problematic transactions and the reason why 
problematic_transactions = pd.DataFrame(columns=['tx_hash or block', 'where_problem_happened'])

In [10]:
#the url and the api key are stored in the config file.
url = infura_url

#Get the infura response for the first transaction in the list to create a dataframe
payload = json.dumps({
  "jsonrpc": "2.0",
  "method": "eth_getTransactionByHash",
  "params": [tx_hash_list[0]],
  "id": 1
})
headers = {
  'Content-Type': 'application/json'
}

response = requests.request("POST", url, headers=headers, data=payload)
result = response.json()['result']
result = {k: None if not v else v for k, v in result.items()} # making sure none of the values are empty
df_infura = pd.DataFrame(result, index=[0])
df_infura.head()

Unnamed: 0,accessList,blockHash,blockNumber,chainId,from,gas,gasPrice,hash,input,maxFeePerGas,maxPriorityFeePerGas,nonce,r,s,to,transactionIndex,type,v,value,yParity
0,,0x2c1ca96b35f726a2171d3ebc050e390aca8dd1f83a23...,0x1283433,0x1,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0x30eb5,0x11cd060113,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0x4a25d94a000000000000000000000000000000000000...,0x187a3c7e80,0x5e69ec0,0x707,0xad8743a8c08357fb122fe099ce4ab1cc5155197bf17d...,0x6ac3aca96164f1bf4582dc771aa945939eccac0c562a...,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x2,0x2,0x0,0x0,0x0


#### Call infura for all other transactions - !! This will take a few minutes

In [11]:
# get the infura response for all the other transactions in the list and append the results to the above dataframe
for tx_hash in tx_hash_list[1:]:
  payload = json.dumps({
    "jsonrpc": "2.0",
    "method": "eth_getTransactionByHash",
    "params": [tx_hash],
    "id": 1
  })
  headers = {
    'Content-Type': 'application/json'
  }

  response = requests.request("POST", url, headers=headers, data=payload)

  if response.ok:
    result = response.json()['result']
    result = {k: None if not v else v for k, v in result.items()} # making sure none of the values are empty
    df_temp = pd.DataFrame(result, index=[0])
    df_infura = pd.concat([df_infura, df_temp])
    
  else: 
    print(f"error code {response.status_code} for transaction {tx_hash}")
    new_row = {'tx_hash or block':  tx_hash, 'where_problem_happened': 'infura'}
    problematic_transactions = pd.concat([problematic_transactions, pd.DataFrame([new_row])], ignore_index=True)

df_infura.head()


Unnamed: 0,accessList,blockHash,blockNumber,chainId,from,gas,gasPrice,hash,input,maxFeePerGas,maxPriorityFeePerGas,nonce,r,s,to,transactionIndex,type,v,value,yParity
0,,0x2c1ca96b35f726a2171d3ebc050e390aca8dd1f83a23...,0x1283433,0x1,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0x30eb5,0x11cd060113,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0x4a25d94a000000000000000000000000000000000000...,0x187a3c7e80,0x5e69ec0,0x707,0xad8743a8c08357fb122fe099ce4ab1cc5155197bf17d...,0x6ac3aca96164f1bf4582dc771aa945939eccac0c562a...,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x2,0x2,0x0,0x0,0x0
0,,0xdd26fb3d61779a2527538b20a6710090ef329f09a3c0...,0x128343e,0x1,0x4ffb89a61a6db0586aff308efcfce39207aed2b2,0x59970,0x1254a6dbb8,0x354e8386267ca643793de913739df3f9680895776f3a...,0x24856bc3000000000000000000000000000000000000...,0x15bb9caac7,0x854d13a5,0x491,0x498ee90a0565829b8b473ede7b76174226bc1d5ed8ad...,0x2289f4cb7afc50f5db6b839681ae3f5d999e1bcf0eff...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x1,0x2,0x1,0x0,0x1
0,,0x657b57c868eb09df8631cca1ddd349ee8ca921e33082...,0x1283449,0x1,0xbbf46514de004992d8dcbcec19f02f2e772ab51d,0x4ab7a,0x1172aa4101,0xee8fd2c76181afa14ca0da158e0a01bba2d3df8e62c5...,0x3593564c000000000000000000000000000000000000...,0x11bf4b06ba,0xe57e0,0x121,0xa80847747875f34798ef26d97d4ea1002a6ec099cf85...,0x193addaa244d04a85bfc29027fb9d26c6d5e8d1ab757...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x2,0x2,0x1,0x0,0x1
0,,0x4163d5676b7bd172a054a5d85dda15ae43563d24ac0c...,0x128344b,0x1,0xaed06a6a9737ac56b5c3c7ecdfb233d70d64fc38,0xe6be,0xfdc73a9d9,0x5f3954cc3cb4fbb88803a910d852aab6566af2866acf...,0x095ea7b3000000000000000000000000000000000022...,0x1469ab8236,0x99d399e3,0x0,0x4f715cf4d4321da48de6d3fcdb07cdfca6b0c28366ff...,0xddc690541719b963ec3ab29167402aa86ca7b0b14bdc...,0xdac17f958d2ee523a2206206994597c13d831ec7,0x4,0x2,0x0,0x0,0x0
0,,0x4163d5676b7bd172a054a5d85dda15ae43563d24ac0c...,0x128344b,0x1,0xaed06a6a9737ac56b5c3c7ecdfb233d70d64fc38,0x4b07c,0xfdc73a9d9,0xbd9c86df3327871d630a0921c65cb7bec9b0c199d80c...,0x24856bc3000000000000000000000000000000000000...,0x1469ab8236,0x99d399e3,0x1,0x86b62356a6073de56805fc48478b472a06a3ccbfb5cc...,0x42c6bb462a201bbc2e2621f591c4a2b7cb293206b369...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x5,0x2,0x0,0x0,0x0


In [12]:
print(f"there were {len(problematic_transactions)} transactions where the infura api call did not work")

there were 0 transactions where the infura api call did not work


#### merge infura response to main

In [13]:
df_main = df_main.merge(df_infura, left_on = 'user_tx', right_on = 'hash', how ='outer')
df_main.columns

Index(['user_tx', 'accessList', 'blockHash', 'blockNumber', 'chainId', 'from',
       'gas', 'gasPrice', 'hash', 'input', 'maxFeePerGas',
       'maxPriorityFeePerGas', 'nonce', 'r', 's', 'to', 'transactionIndex',
       'type', 'v', 'value', 'yParity'],
      dtype='object')

#### save the api results
Store the results in a file so that we do not have to rerun the calls if we need this data again

In [14]:
df_main.to_csv(f'data/intermediary/{name_of_incident}_infura.csv')

In [21]:
# read the file so if we want to rerun the notebook, there is no need to remake the infura calls
df_main = pd.read_csv(f'data/intermediary/{name_of_incident}_infura.csv', index_col=0)

### Etherscan 
We use this API to have the timestamp of the blocks rather than the transactions themselves to reduce the amount of API calls (tx and block time are the same for all tx in the block). We need the timestamp of the transactions because in the Tenderly API, if we do not override the timestamp, then it uses the current time as input variable

In [22]:
#converting the hexadecimal string into a decimal number
def hex_to_decimal(hex_str):
    return int(hex_str, 16)

df_main['block_number'] = df_main['blockNumber'].apply(hex_to_decimal)
df_main.head()

Unnamed: 0,user_tx,accessList,blockHash,blockNumber,chainId,from,gas,gasPrice,hash,input,...,nonce,r,s,to,transactionIndex,type,v,value,yParity,block_number
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,,0x85c6415cc35155eeabd6cd71e6814582f9dee28596a3...,0x12834ae,0x1,0x9ea02f652955b90c0dd4f256003e4e339d3a4184,0x352dd,0x14c7b6ec80,0x012778bb6330737bed53ca488e582500498d81e1db22...,0x3593564c000000000000000000000000000000000000...,...,0x0,0x383371f3a3fba29c35eeb4b29752ed5c7fc85c826c6d...,0xbe9f8b295d3edc205639cd2d7e275abbd9de4f89d567...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x2,0x0,0x26,0x2c68af0bb140000,,19412142
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,,0xfa109ad08c3f05907ff7a33b2d91aa879d90a58108f8...,0x128344e,0x1,0x76ec733f445358232ea24aaf03d4536057439bfc,0x2cf7c,0x1160c85b1a,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,0x3593564c000000000000000000000000000000000000...,...,0xd,0x92c2fc3e1f4913b5a6350b581a783752f52d68550409...,0x5d87ebb02ea6728d51e088b3189cad517f06ec29f256...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x2,0x2,0x1,0x29a2241af62c0000,0x1,19412046
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,,0x38aaa61c38a7d204d23dee4dc36870bf024d67e428da...,0x12834bc,0x1,0x30049fd3dde7a46a6441a0d41dcb1cf5aaa93e77,0x2e24f,0x1339037b82,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,0x24856bc3000000000000000000000000000000000000...,...,0xe,0xada86d5a8be88ba11516d207073bd78ddd217519b75a...,0x1d1c892a228966aab4de1b132639834f304b8e1988f6...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x4,0x2,0x0,0x0,0x0,19412156
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,,0xaed11f387b43ccea87b2ad2a65e29e9235a452edd1eb...,0x128345f,0x1,0xc4ba8f21c0dd755cfeff899c4f791c634ca5c6db,0x28a31,0x117a8b7f8d,0x1bcc89149f51daa2d79355a340d576af22555be34567...,0x3593564c000000000000000000000000000000000000...,...,0x37d,0xfd94d547d9253e13977325ad8430ac84663c5520cdac...,0x7739e932e873075d6503aae95c1985825962aeadc9dc...,0x3fc91a3afd70395cd496c647d5a6cc9d4b2b7fad,0x2,0x2,0x1,0x1e87f85809dc0000,0x1,19412063
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,,0x2c1ca96b35f726a2171d3ebc050e390aca8dd1f83a23...,0x1283433,0x1,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0x30eb5,0x11cd060113,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0x4a25d94a000000000000000000000000000000000000...,...,0x707,0xad8743a8c08357fb122fe099ce4ab1cc5155197bf17d...,0x6ac3aca96164f1bf4582dc771aa945939eccac0c562a...,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x2,0x2,0x0,0x0,0x0,19412019


In [26]:
#getting all the block numbers of the transactions we want to analyse
block_number_list = list(set([x for x in df_main['block_number'].to_list() if pd.notnull(x)]))
print(f'there are {len(block_number_list)} different blocks')

there are 30 different blocks


#### Get the ehterscan response for the first block in the list to create a dataframe

In [24]:
url_eth = f"https://api.etherscan.io/api?module=block&action=getblockreward&blockno={block_number_list[0]}&apikey={eth_scan_api_key}"

response_eth = requests.request("POST", url_eth)

result = response_eth.json()['result']
result = {k: None if not v else v for k, v in result.items()} # making sure none of the values are empty
df_eth = pd.DataFrame(result, index=[0])
df_eth.head()

Unnamed: 0,blockNumber,timeStamp,blockMiner,blockReward,uncles,uncleInclusionReward
0,19412096,1710162983,0x95222290dd7278aa3ddd389cc1e1d165cc4bafe5,85616354749290394,,0


#### Get the etherscan responses for the other blocks in the list - !!! This will take a few minutes

In [27]:
for block in block_number_list[1:]:
  url_temp = f"https://api.etherscan.io/api?module=block&action=getblockreward&blockno={block}&apikey={eth_scan_api_key}"
  response_temp = requests.request("POST", url_temp)

  if response_temp.ok:
    dct_temp = response_temp.json()['result']
    dct_temp = {k: None if not v else v for k, v in dct_temp.items()} # making sure none of the values are empty
    df_temp = pd.DataFrame(dct_temp, index=[0])
    df_eth = pd.concat([df_eth, df_temp])
    
  else: 
    print(f"error code {response_temp.status_code} for block {block}")
    new_row = {'tx_hash or block':  block, 'where_problem_happened': 'etherscan. This is a block number'}
    problematic_transactions = pd.concat([problematic_transactions, pd.DataFrame([new_row])], ignore_index=True)

In [28]:
print(f"there were {len(problematic_transactions[problematic_transactions['where_problem_happened'] ==  'etherscan. This is a block number'])} blocks where the etherscan api call did not work")

there were 0 blocks where the etherscan api call did not work


In [29]:
# quick cleaning for later
df_eth['blockNumber'] = df_eth['blockNumber'].astype(int)

#### merge etherscan results to main

In [30]:
df_main = df_main.merge(df_eth, left_on = 'block_number', right_on = 'blockNumber', how ='outer')
df_main.columns

Index(['user_tx', 'accessList', 'blockHash', 'blockNumber_x', 'chainId',
       'from', 'gas', 'gasPrice', 'hash', 'input', 'maxFeePerGas',
       'maxPriorityFeePerGas', 'nonce', 'r', 's', 'to', 'transactionIndex',
       'type', 'v', 'value', 'yParity', 'block_number', 'blockNumber_y',
       'timeStamp', 'blockMiner', 'blockReward', 'uncles',
       'uncleInclusionReward'],
      dtype='object')

#### save the results of etherscan 
Store the results in a file so that we do not have to rerun the calls if we need this data again


In [31]:
df_main.to_csv(f'data/intermediary/{name_of_incident}_etherscan.csv')

In [32]:
df_main = pd.read_csv(f'data/intermediary/{name_of_incident}_etherscan.csv', index_col=0)

### Tenderly !! This will take a few minutes
Here we finally do the simulation. We do it once at the original index position to get the amount of coin transferred originally. Then we do it again at index position 0.

In [33]:
headers = {
    'X-Access-Key': f'{tenderly_access_token}',
    'content-type': 'application/json',
}

#creating an empty DataFrame for the results
columns = ['tx_hash', 'index', 'type', 'raw_amount', 'dollar_value', 'token_contract_address', 'token_name', 'token_dollar_value', 'from', 'to', 'sender', 'timestamp']
df_results = pd.DataFrame(columns = columns)


#iterating over every row of the main dataframe (one row is one transaction)
for index, row in df_main.iterrows():
    tx_index_list = [0]
    tx_index_list.append(int(row['transactionIndex'], 0))
    # for each transaction, simulate twice: once for each index
    for tx_index in tx_index_list:
        json_data = {
        'network_id': int(row['chainId'], 0),
        'from': row['from'],
        'to': row['to'],
        'input': row['input'],
        'block_number': row['block_number'],
        'transaction_index': tx_index,
        'simulation_type': 'quick',
        'gas': int(row['gas'], 0),
        'value': int(row['value'], 0),
        'gas_price': int(row['gasPrice'], 0),
        'l1_timestamp': int(row['timeStamp'])
        }
        
        response = requests.post(
        tenderly_api_url,
        headers=headers,
        json=json_data,
        )

        try:
            for data in response.json()['transaction']['transaction_info']['asset_changes']:
                tx_type = data['type']
                tx_raw_amount = data['raw_amount']
                tx_dollar_value = data['dollar_value']
                sender = response.json()['transaction']['from']

                #sometimes the following values are empty 
                try:
                    contract_address = data['token_info']['contract_address']
                except:
                    contract_address = 'None'

                try:
                    token_name = data['token_info']['name']
                except:
                    token_name = 'None'
                try:
                    token_dollar_value = data['token_info']['dollar_value']
                except:
                    token_dollar_value = 'None'

                try:
                    tx_from = data['from']
                except:
                    tx_from = 'None'

                try:
                    tx_to = data['to']
                except: 
                    tx_to = 'None'

                new_row = {
                    'tx_hash' : row['hash'],
                    'from':tx_from,
                    'to': tx_to,
                    'index' : tx_index, 
                    'type': tx_type, 
                    'raw_amount': tx_raw_amount, 
                    'dollar_value' : tx_dollar_value, 
                    'token_contract_address': contract_address, 
                    'token_name': token_name, 
                    'token_dollar_value': token_dollar_value,
                    'sender': sender, 
                    'timestamp': int(row['timeStamp'])
                    }
                df_results = pd.concat([df_results, pd.DataFrame([new_row])], ignore_index=True)
        except:
            new_row = {'tx_hash or block':  row['hash'], 'where_problem_happened': 'tenderly'}
            problematic_transactions = pd.concat([problematic_transactions, pd.DataFrame([new_row])], ignore_index=True)
            


#### creating a new dataframe with only the transactions where tenderly api returned something.
This meanse that there are 2 index values for these good transactions.

In [34]:
grouped = df_results.groupby('tx_hash')
df_results_good = grouped.filter(lambda x: x['index'].nunique() == 2)
df_results_good.head()

Unnamed: 0,tx_hash,index,type,raw_amount,dollar_value,token_contract_address,token_name,token_dollar_value,from,to,sender,timestamp
0,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0,Transfer,807452531860501402532,,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,,,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0xe1ebdf64f7f3a31723e767a561345f958233bb7d,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,1710162059
1,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0,Transfer,400000000000000000,1241.72001953125,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,WETH,3104.300048828125,0xe1ebdf64f7f3a31723e767a561345f958233bb7d,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,1710162059
2,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0,Burn,400000000000000000,1241.72001953125,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,WETH,3104.300048828125,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,1710162059
3,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0,Transfer,400000000000000000,1241.34404296875,,Ethereum,3103.360107421875,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,1710162059
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0,Transfer,400000000000000000,1241.34404296875,,Ethereum,3103.360107421875,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,1710162059


In [35]:
print(
    'number of transactions for which tenderly worked:', df_results_good.tx_hash.nunique(),
    "\nnumber of transactions for which tenderly did not worked:", problematic_transactions[problematic_transactions['where_problem_happened'] == 'tenderly']['tx_hash or block'].nunique(),
    '\npercentage of properly simulated transactions with tenderly:', format(df_results_good.tx_hash.nunique()/(df_results_good.tx_hash.nunique() +  problematic_transactions[problematic_transactions['where_problem_happened'] == 'tenderly']['tx_hash or block'].nunique()), ".2%")
    )


number of transactions for which tenderly worked: 28 
number of transactions for which tenderly did not worked: 9 
percentage of properly simulated transactions with tenderly: 75.68%


#### Save the tenderly results 

In [36]:
#saving the transactions and it's simulated data where the tenderly api worked
df_results_good.to_csv(f'data/intermediary/{name_of_incident}_tenderly.csv')

#### Save the problematic transaction/block 

In [37]:
problematic_transactions.to_csv(f'data/results/{name_of_incident}_transactions_or_blocks_with_api_problem.csv')

## Data Wrangling

In [38]:
problematic_transactions = pd.read_csv(f'data/results/{name_of_incident}_transactions_or_blocks_with_api_problem.csv', index_col=0)

In [39]:
df_results_good = pd.read_csv(f'data/intermediary/{name_of_incident}_tenderly.csv', index_col=0)
print(f'we are now working with {df_results_good.tx_hash.nunique()} transactions that have been simulated')

we are now working with 28 transactions that have been simulated


### Some Cleanup and new columns
add some new columns and transform the raw amount of certain tokens that have been returned by tenderly in much bigger amounts

In [40]:
df_results_good['raw_amount'] = df_results_good['raw_amount'].astype(float)
df_results_good['sender_is_involved'] = (df_results_good['from'] == df_results_good['sender']) |  (df_results_good['to'] == df_results_good['sender'])
df_results_good['sender_gave_this'] = (df_results_good['from'] == df_results_good['sender'])
df_results_good['token_name'] = df_results_good['token_name'].fillna(df_results_good['token_contract_address'])
df_results_good['token_contract_address'] = df_results_good['token_contract_address'].fillna(df_results_good['token_name'])
df_results_good['dollar_value_net']  = np.where(df_results_good['sender_gave_this'], - df_results_good['dollar_value'], df_results_good['dollar_value'])

df_results_good.loc[df_results_good['token_name'] == 'WETH', 'raw_amount'] /= 1e+18
df_results_good.loc[df_results_good['token_name'] == 'Ethereum', 'raw_amount'] /= 1e+18
df_results_good.loc[df_results_good['token_name'] == 'USDC', 'raw_amount'] /= 1000000
df_results_good.loc[df_results_good['token_name'] == 'Tether', 'raw_amount'] /= 1000000
df_results_good.loc[df_results_good['token_name'] == 'Dai', 'raw_amount'] /= 1e+18
df_results_good.loc[df_results_good['token_name'] == 'Wrapped Bitcoin', 'raw_amount'] /= 1e+8

### get only the part of the transactions where the sender was directly involved

In [41]:
df_results_clean = df_results_good.copy()
df_senders = df_results_clean[df_results_clean['sender_is_involved']]
print(
    'out of the', df_results_clean.tx_hash.nunique(), 'transactions,', 
    'we could identify the sender for ', df_senders.tx_hash.nunique(), 'of them',
    )

out of the 28 transactions, we could identify the sender for  28 of them


#### add the transaction where we can not identify a sender in the problematic transactions df

In [42]:
transaction_with_no_senders = set(df_results_clean.tx_hash.unique()) - set(df_senders.tx_hash.unique())
for transaction in transaction_with_no_senders:
    new_row = {'tx_hash or block':  transaction, 'where_problem_happened': 'could not identify a sender'}
    problematic_transactions = pd.concat([problematic_transactions, pd.DataFrame([new_row])], ignore_index=True)

problematic_transactions.tail()

Unnamed: 0,tx_hash or block,where_problem_happened
8,0x6326451b6cee44291395fd3efa1f7144676392ade4f0...,tenderly
9,0x6326451b6cee44291395fd3efa1f7144676392ade4f0...,tenderly
10,0x8f762515bc5eb815f6683323c4226373eb53922f0007...,tenderly
11,0xe5be76c827fec38b17f44a4adef3a96553c5c88d017e...,tenderly
12,0xe5be76c827fec38b17f44a4adef3a96553c5c88d017e...,tenderly


### For each sender, aggregate the value of the tokens that are swapped

In [43]:
grouped = df_senders.groupby(['tx_hash', 'index', 'token_name', 'token_contract_address', 'sender_is_involved', 'sender_gave_this'])
aggregated_senders = grouped.agg(
    sum_raw_amount=('raw_amount', 'sum'),               
    sum_dollar_value=('dollar_value', 'sum'),            
    token_dollar_value=('token_dollar_value', 'mean')  # all values are the same so first or mean is fine
).reset_index()
aggregated_senders.head() # one row per tx per token per index

Unnamed: 0,tx_hash,index,token_name,token_contract_address,sender_is_involved,sender_gave_this,sum_raw_amount,sum_dollar_value,token_dollar_value
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,Ethereum,Ethereum,True,True,0.2,620.672021,3103.36
1,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,KAIJUNO8,0x4fe8d4775b7cb2546b9ee86182081cdf8f77b053,True,False,1.654453e+28,338.104092,2.0436e-08
2,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,Ethereum,Ethereum,True,True,0.2,620.672021,3103.36
3,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,KAIJUNO8,0x4fe8d4775b7cb2546b9ee86182081cdf8f77b053,True,False,1.110371e+28,226.915507,2.0436e-08
4,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,0,Ethereum,Ethereum,True,True,3.0,9310.080322,3103.36


### calculte the net amounts for each sender (negative if the sender paid in this token, positive if the sender received the token)

In [44]:
aggregated_senders['raw_amount_net'] = aggregated_senders.apply(lambda row: row['sum_raw_amount'] if row['sender_gave_this'] else -row['sum_raw_amount'], axis=1)
aggregated_senders['dollar_value_net'] = aggregated_senders.apply(lambda row: row['sum_dollar_value'] if row['sender_gave_this'] else -row['sum_dollar_value'], axis=1)

aggregated_senders.head()


Unnamed: 0,tx_hash,index,token_name,token_contract_address,sender_is_involved,sender_gave_this,sum_raw_amount,sum_dollar_value,token_dollar_value,raw_amount_net,dollar_value_net
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,Ethereum,Ethereum,True,True,0.2,620.672021,3103.36,0.2,620.672021
1,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,KAIJUNO8,0x4fe8d4775b7cb2546b9ee86182081cdf8f77b053,True,False,1.654453e+28,338.104092,2.0436e-08,-1.654453e+28,-338.104092
2,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,Ethereum,Ethereum,True,True,0.2,620.672021,3103.36,0.2,620.672021
3,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,KAIJUNO8,0x4fe8d4775b7cb2546b9ee86182081cdf8f77b053,True,False,1.110371e+28,226.915507,2.0436e-08,-1.110371e+28,-226.915507
4,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,0,Ethereum,Ethereum,True,True,3.0,9310.080322,3103.36,3.0,9310.080322


In [45]:
# Group by 'tx_hash', 'index', and 'token_name' and aggregate to get one row per group
grouped_df = aggregated_senders.groupby(['tx_hash', 'index', 'token_name', 'token_dollar_value'], as_index=False, dropna=False).agg({
    'raw_amount_net': 'sum',\
    'dollar_value_net': 'sum'
})
grouped_df.head()

Unnamed: 0,tx_hash,index,token_name,token_dollar_value,raw_amount_net,dollar_value_net
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,Ethereum,3103.36,0.2,620.672021
1,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,KAIJUNO8,2.0436e-08,-1.654453e+28,-338.104092
2,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,Ethereum,3103.36,0.2,620.672021
3,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,KAIJUNO8,2.0436e-08,-1.110371e+28,-226.915507
4,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,0,Ethereum,3103.36,3.0,9310.080322


### Add all the tokens where the sender is involved
Only keep the rows where there are exactly 2 tokens

In [46]:
tokens_per_tx = grouped_df.groupby(['tx_hash'])['token_name'].unique().reset_index()
normal_transactions = tokens_per_tx[tokens_per_tx['token_name'].apply(lambda x: len(x) == 2)]
normal_transactions[['token_name_A', 'token_name_B']] = pd.DataFrame(normal_transactions['token_name'].tolist(), index=normal_transactions.index)
normal_transactions = normal_transactions.drop(columns=['token_name'])
tokens_df = pd.merge(normal_transactions, grouped_df, on='tx_hash', how = 'left')
tokens_df.head()

Unnamed: 0,tx_hash,token_name_A,token_name_B,index,token_name,token_dollar_value,raw_amount_net,dollar_value_net
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,KAIJUNO8,0,Ethereum,3103.36,0.2,620.672021
1,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,KAIJUNO8,0,KAIJUNO8,2.0436e-08,-1.654453e+28,-338.104092
2,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,KAIJUNO8,2,Ethereum,3103.36,0.2,620.672021
3,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,KAIJUNO8,2,KAIJUNO8,2.0436e-08,-1.110371e+28,-226.915507
4,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,Jim,0,Ethereum,3103.36,3.0,9310.080322


#### Add the transactions where there are not two tokens where the sender is involved to the problematic transactions df

In [47]:
transaction_without_two_tokens = set(df_senders.tx_hash.unique()) - set(tokens_df.tx_hash.unique()) 
for transaction in transaction_without_two_tokens:
    new_row = {'tx_hash or block':  transaction, 'where_problem_happened': 'only one token or more than two'}
    problematic_transactions = pd.concat([problematic_transactions, pd.DataFrame([new_row])], ignore_index=True)

problematic_transactions.tail()

Unnamed: 0,tx_hash or block,where_problem_happened
8,0x6326451b6cee44291395fd3efa1f7144676392ade4f0...,tenderly
9,0x6326451b6cee44291395fd3efa1f7144676392ade4f0...,tenderly
10,0x8f762515bc5eb815f6683323c4226373eb53922f0007...,tenderly
11,0xe5be76c827fec38b17f44a4adef3a96553c5c88d017e...,tenderly
12,0xe5be76c827fec38b17f44a4adef3a96553c5c88d017e...,tenderly


### Calculate the delta in tokens 

In [48]:
#create two dataframes, for the token A and B at index 0
token_A_df_0 = tokens_df[(tokens_df['token_name'] == tokens_df['token_name_A']) & (tokens_df['index'] == 0)][['tx_hash', 'token_dollar_value', 'dollar_value_net', 'raw_amount_net', 'token_name_A']]
token_B_df_0 = tokens_df[(tokens_df['token_name'] == tokens_df['token_name_B']) & (tokens_df['index'] == 0)][['tx_hash', 'token_dollar_value', 'dollar_value_net', 'raw_amount_net', 'token_name_B']]

token_A_df_0.rename(columns = {'token_dollar_value': 'token_A_dollar_value_index_0', 'dollar_value_net' : 'token_A_tx_dollar_value_index_0', 'raw_amount_net' : 'token_A_raw_amount_index_0'}, inplace = True)
token_B_df_0.rename(columns = {'token_dollar_value': 'token_B_dollar_value_index_0', 'dollar_value_net' : 'token_B_tx_dollar_value_index_0', 'raw_amount_net' : 'token_B_raw_amount_index_0'}, inplace = True)

token_A_df_0 = token_A_df_0.drop_duplicates()
token_B_df_0 = token_B_df_0.drop_duplicates()

In [49]:
#create two dataframes, for the token A and B at index other than 0
token_A_df_other = tokens_df[(tokens_df['token_name'] == tokens_df['token_name_A']) & (tokens_df['index'] != 0)][['tx_hash', 'token_dollar_value','dollar_value_net', 'raw_amount_net', 'token_name_A']]
token_B_df_other = tokens_df[(tokens_df['token_name'] == tokens_df['token_name_B']) & (tokens_df['index'] != 0)][['tx_hash', 'token_dollar_value', 'dollar_value_net', 'raw_amount_net',  'token_name_B']]

token_A_df_other.rename(columns = {'token_dollar_value': 'token_A_dollar_value_index_other', 'dollar_value_net' : 'token_A_tx_dollar_value_index_other', 'raw_amount_net' : 'token_A_raw_amount_index_other'}, inplace = True)
token_B_df_other.rename(columns = {'token_dollar_value': 'token_B_dollar_value_index_other', 'dollar_value_net' : 'token_B_tx_dollar_value_index_other', 'raw_amount_net' : 'token_B_raw_amount_index_other'}, inplace = True)

token_A_df_other = token_A_df_other.drop_duplicates()
token_B_df_other = token_B_df_other.drop_duplicates()

In [50]:
# merge the 4 dataframes to have one with all the data of the tx with one row per tx
#token_B_df_other_grouped, token_B_df_0_grouped, token_A_df_other_grouped, token_A_df_0_grouped
merged_df = pd.merge(token_B_df_other, token_B_df_0, on=['tx_hash', 'token_name_B'])
merged_df = pd.merge(merged_df, token_A_df_other, on=['tx_hash'])
merged_df = pd.merge(merged_df, token_A_df_0, on=['tx_hash', 'token_name_A'])
merged_df.columns 


Index(['tx_hash', 'token_B_dollar_value_index_other',
       'token_B_tx_dollar_value_index_other', 'token_B_raw_amount_index_other',
       'token_name_B', 'token_B_dollar_value_index_0',
       'token_B_tx_dollar_value_index_0', 'token_B_raw_amount_index_0',
       'token_A_dollar_value_index_other',
       'token_A_tx_dollar_value_index_other', 'token_A_raw_amount_index_other',
       'token_name_A', 'token_A_dollar_value_index_0',
       'token_A_tx_dollar_value_index_0', 'token_A_raw_amount_index_0'],
      dtype='object')

In [51]:
def calculate_difference(row, column_0, column_other):
    return row[column_0] - row[column_other]

In [52]:
# add new rows with the delta in tokens
merged_df['token_A_delta_raw_amount'] = merged_df.apply(lambda row: calculate_difference(row, 'token_A_raw_amount_index_0', 'token_A_raw_amount_index_other'), axis=1)
merged_df['token_B_delta_raw_amount'] = merged_df.apply(lambda row: calculate_difference(row, 'token_B_raw_amount_index_0', 'token_B_raw_amount_index_other'), axis=1)

merged_df['token_A_delta_dollar'] = merged_df.apply(lambda row: calculate_difference(row, 'token_A_tx_dollar_value_index_0', 'token_A_tx_dollar_value_index_other'), axis=1)
merged_df['token_B_delta_dollar'] = merged_df.apply(lambda row: calculate_difference(row, 'token_B_tx_dollar_value_index_0', 'token_B_tx_dollar_value_index_other'), axis=1)
merged_df.columns

Index(['tx_hash', 'token_B_dollar_value_index_other',
       'token_B_tx_dollar_value_index_other', 'token_B_raw_amount_index_other',
       'token_name_B', 'token_B_dollar_value_index_0',
       'token_B_tx_dollar_value_index_0', 'token_B_raw_amount_index_0',
       'token_A_dollar_value_index_other',
       'token_A_tx_dollar_value_index_other', 'token_A_raw_amount_index_other',
       'token_name_A', 'token_A_dollar_value_index_0',
       'token_A_tx_dollar_value_index_0', 'token_A_raw_amount_index_0',
       'token_A_delta_raw_amount', 'token_B_delta_raw_amount',
       'token_A_delta_dollar', 'token_B_delta_dollar'],
      dtype='object')

In [53]:
nearly_final = merged_df[['tx_hash','token_name_A', 'token_A_delta_raw_amount', 'token_A_delta_dollar', 'token_name_B',  'token_B_delta_raw_amount', 'token_B_delta_dollar']]
nearly_final = nearly_final.rename(columns = {'token_A_delta_dollar' : 'token_A_delta_dollar_tenderly', 'token_B_delta_dollar' : 'token_B_delta_dollar_tenderly'})
nearly_final.head()  # one row per transaction with the delta in raw amounts, and also in tenderly dollars

Unnamed: 0,tx_hash,token_name_A,token_A_delta_raw_amount,token_A_delta_dollar_tenderly,token_name_B,token_B_delta_raw_amount,token_B_delta_dollar_tenderly
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,0.0,0.0,KAIJUNO8,-5.440819e+27,-111.188585
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,0.0,0.0,Jim,-3.297227e+24,-24.531371
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,-0.02469591,-76.640316,enqAI,0.0,0.0
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,ClosedAI,-1.446769e+21,-0.54575,Ethereum,0.0,0.0
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,-4.175898e+19,0.0,Ethereum,0.0,0.0


### Use the stable coin of the transaction to calculate the difference in dollars 
Tenderly gives us a value for the tokens, but this value is a bit strange for "meme" tokens, or unstable tokens. Therefore, we use the stable token that is present in the transaction (there is always one), calculate the worth of that transaction in dollar based on the amount of this stable token and the dollar value of that stable token, and finally calculate the worth of the unstable token based on the ratio of amount of unstable swapped for stable.

In [54]:
stable_coins = ['WETH', 'Ethereum', 'USDC', 'Tether', 'Wrapped Bitcoin', 'Dai']

In [55]:
# find the stable coin in order of preference of the transaction
def select_stable_coin(group):
    for coin in stable_coins:
        involved_row = group[(group['sender_is_involved'] == True) & (group['token_name'] == coin)]
        if not involved_row.empty:
            return coin
 
    return None  

# Group by 'tx_hash' and apply custom function to each group to find the stable coin for all the transactions
selected_stable_token = df_senders.groupby('tx_hash').apply(select_stable_coin).reset_index(name='selected_stable_coin')
selected_stable_token.head()


  selected_stable_token = df_senders.groupby('tx_hash').apply(select_stable_coin).reset_index(name='selected_stable_coin')


Unnamed: 0,tx_hash,selected_stable_coin
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,Ethereum
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,Ethereum


In [56]:
# Create two dataframes with the dollar values and raw amounts of the stable coins at index 0 and the original index of the transaction
stable_coins_df_index_0 = df_senders[(df_senders['token_name'].isin(stable_coins)) & (df_senders['index'] == 0) & df_senders['raw_amount'] != 0][['tx_hash', 'token_name', 'dollar_value_net', 'raw_amount', 'sender_is_involved', 'sender_gave_this']]
stable_coins_df_index_0 = stable_coins_df_index_0.groupby(['tx_hash', 'token_name'])[['dollar_value_net', 'raw_amount']].sum().reset_index()


stable_coins_df_index_other = df_senders[(df_senders['token_name'].isin(stable_coins)) & (df_senders['index'] != 0) & df_senders['raw_amount'] != 0][['tx_hash', 'token_name', 'token_dollar_value', 'dollar_value_net', 'raw_amount', 'sender_gave_this']]
stable_coins_df_index_other = stable_coins_df_index_other.groupby(['tx_hash', 'token_name'])[['dollar_value_net', 'raw_amount']].sum().reset_index()
stable_coins_df_index_other.head()

Unnamed: 0,tx_hash,token_name,dollar_value_net,raw_amount
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,-620.672021,0.2
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,-9310.080322,3.0
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,15328.173017,4.939218
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,Ethereum,-6827.392236,2.2
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,Ethereum,1241.344043,0.4


In [57]:
# two dataframes with one row per tx_hash with the value of ther stable token if it was exchanged by the sender 
selected_stable_coins_df_index_0 = pd.merge(selected_stable_token, stable_coins_df_index_0, on = ['tx_hash'])
selected_stable_coins_df_index_0 = selected_stable_coins_df_index_0.drop('token_name', axis = 1)
selected_stable_coins_df_index_0 = selected_stable_coins_df_index_0.rename(columns = {'dollar_value_net' : 'tx_stable_dollar_value_index_0_net', 'raw_amount' : 'stable_raw_amount_index_0'})


selected_stable_coins_df_index_other = pd.merge(selected_stable_token, stable_coins_df_index_other, on = ['tx_hash'])
selected_stable_coins_df_index_other = selected_stable_coins_df_index_other.drop('token_name', axis = 1)
selected_stable_coins_df_index_other = selected_stable_coins_df_index_other.rename(columns = { 'dollar_value_net' : 'tx_stable_dollar_value_index_other_net', 'raw_amount' : 'stable_raw_amount_index_other'})

selected_stable_coins_df_index_other.head() 

Unnamed: 0,tx_hash,selected_stable_coin,tx_stable_dollar_value_index_other_net,stable_raw_amount_index_other
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,-620.672021,0.2
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,-9310.080322,3.0
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,15328.173017,4.939218
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,Ethereum,-6827.392236,2.2
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,Ethereum,1241.344043,0.4


In [58]:
# create a dataframe where we merge the dataframes to get two rows per transaction with the amount of tokens

filtered_df_all = df_senders.groupby(['tx_hash', 'index', 'token_name', 'sender_gave_this'])['raw_amount'].sum().reset_index()
df_null_0 = filtered_df_all[(filtered_df_all['index'] == 0 )]

merged_null_0_all = pd.merge(df_null_0, selected_stable_coins_df_index_0, on=['tx_hash'], how = 'left')
merged_null_0_all = merged_null_0_all.drop('index', axis = 1)
merged_null_0_all['other_token_value_index_0'] = merged_null_0_all['tx_stable_dollar_value_index_0_net'].abs() / merged_null_0_all['raw_amount']
merged_null_0_all.head()

Unnamed: 0,tx_hash,token_name,sender_gave_this,raw_amount,selected_stable_coin,tx_stable_dollar_value_index_0_net,stable_raw_amount_index_0,other_token_value_index_0
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,True,0.2,Ethereum,-620.672021,0.2,3103.36
1,0x012778bb6330737bed53ca488e582500498d81e1db22...,KAIJUNO8,False,1.654453e+28,Ethereum,-620.672021,0.2,3.751523e-26
2,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,True,3.0,Ethereum,-9310.080322,3.0,3103.36
3,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Jim,False,6.627504e+26,Ethereum,-9310.080322,3.0,1.404764e-23
4,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,False,4.963914,Ethereum,15404.813333,4.963914,3103.36


In [59]:
# calculate the transaction and net transaction dollar value

calculated_dollar_df_all = pd.merge(filtered_df_all, merged_null_0_all.drop('raw_amount', axis = 1), on=['tx_hash', 'token_name', 'sender_gave_this'], how = 'left')
calculated_dollar_df_all['calculated_tx_dollar_value'] = calculated_dollar_df_all['raw_amount']* calculated_dollar_df_all['other_token_value_index_0']
calculated_dollar_df_all['calculated_tx_dollar_value_net']  = np.where(calculated_dollar_df_all['sender_gave_this'], - calculated_dollar_df_all['calculated_tx_dollar_value'], calculated_dollar_df_all['calculated_tx_dollar_value'])
calculated_dollar_df_all.tail()

Unnamed: 0,tx_hash,index,token_name,sender_gave_this,raw_amount,selected_stable_coin,tx_stable_dollar_value_index_0_net,stable_raw_amount_index_0,other_token_value_index_0,calculated_tx_dollar_value,calculated_tx_dollar_value_net
107,0xf575687f780edd7918193dcfe97f5f2d871ea275b9c7...,1,GEKKO,True,4.135947e+27,Ethereum,25282.906167,8.146946,6.112966e-24,25282.906167,-25282.906167
108,0xf8196d4b1341fb7700603dd0abfebc5ee859b279eb9c...,0,Doggensnout Skeptic,False,1.555176e+21,Ethereum,-9310.080322,3.0,5.986513e-18,9310.080322,9310.080322
109,0xf8196d4b1341fb7700603dd0abfebc5ee859b279eb9c...,0,Ethereum,True,3.0,Ethereum,-9310.080322,3.0,3103.36,9310.080322,-9310.080322
110,0xf8196d4b1341fb7700603dd0abfebc5ee859b279eb9c...,4,Doggensnout Skeptic,False,1.547439e+21,Ethereum,-9310.080322,3.0,5.986513e-18,9263.763359,9263.763359
111,0xf8196d4b1341fb7700603dd0abfebc5ee859b279eb9c...,4,Ethereum,True,3.0,Ethereum,-9310.080322,3.0,3103.36,9310.080322,-9310.080322


In [60]:
final_test_all = pd.merge(df_senders, calculated_dollar_df_all, on = ['tx_hash', 'index', 'token_name', 'sender_gave_this'], how = 'left')
final_test_all['dollar_value_net']  = np.where(final_test_all['sender_gave_this'], - final_test_all['dollar_value'], final_test_all['dollar_value'])
final_test_all.head()

Unnamed: 0,tx_hash,index,type,raw_amount_x,dollar_value,token_contract_address,token_name,token_dollar_value,from,to,...,sender_is_involved,sender_gave_this,dollar_value_net,raw_amount_y,selected_stable_coin,tx_stable_dollar_value_index_0_net,stable_raw_amount_index_0,other_token_value_index_0,calculated_tx_dollar_value,calculated_tx_dollar_value_net
0,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0,Transfer,8.074525e+20,,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0xe1ebdf64f7f3a31723e767a561345f958233bb7d,...,True,True,,8.074525e+20,Ethereum,1241.344043,0.4,1.537359e-18,1241.344043,-1241.344043
1,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0,Transfer,0.4,1241.344043,Ethereum,Ethereum,3103.360107,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,...,True,False,1241.344043,0.4,Ethereum,1241.344043,0.4,3103.36,1241.344043,1241.344043
2,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,2,Transfer,8.492115e+20,,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0xe1ebdf64f7f3a31723e767a561345f958233bb7d,...,True,True,,8.492115e+20,Ethereum,1241.344043,0.4,1.537359e-18,1305.54256,-1305.54256
3,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,2,Transfer,0.4,1241.344043,Ethereum,Ethereum,3103.360107,0x7a250d5630b4cf539739df2c5dacb4c659f2488d,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,...,True,False,1241.344043,0.4,Ethereum,1241.344043,0.4,3103.36,1241.344043,1241.344043
4,0xee8fd2c76181afa14ca0da158e0a01bba2d3df8e62c5...,0,Transfer,35000.47,35000.470928,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDC,1.0,0xbbf46514de004992d8dcbcec19f02f2e772ab51d,0x88e6a0c2ddd26feeb64f039a2c41296fcb3f5640,...,True,True,-35000.470928,35000.47,USDC,-35000.470928,35000.470928,1.0,35000.470928,-35000.470928


In [61]:
def prio_calculated(row):
    if not pd.isnull(row['calculated_tx_dollar_value_net']):
        return row['calculated_tx_dollar_value_net']
    else:
        return row['dollar_value_net']

# Apply the function to create the new column

final_test_all['calculated_dollar_value_priority'] = final_test_all.apply(prio_calculated, axis=1)

results = final_test_all.groupby(['tx_hash', 'index', 'token_name', 'sender_gave_this'])[['calculated_dollar_value_priority']].sum().reset_index()
results.head()

Unnamed: 0,tx_hash,index,token_name,sender_gave_this,calculated_dollar_value_priority
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,Ethereum,True,-620.672021
1,0x012778bb6330737bed53ca488e582500498d81e1db22...,0,KAIJUNO8,False,620.672021
2,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,Ethereum,True,-620.672021
3,0x012778bb6330737bed53ca488e582500498d81e1db22...,2,KAIJUNO8,False,416.558421
4,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,0,Ethereum,True,-9310.080322


In [62]:
# Define a function to calculate differences in dollars of each coin
def calculate_differences(group):
    if len(group) == 2:
        difference_calculated = group.loc[group['index'] != 0, 'calculated_dollar_value_priority'].values[0] - \
                                group.loc[group['index'] == 0, 'calculated_dollar_value_priority'].values[0]
        return pd.Series({'difference_calculated': difference_calculated})
    else:
        return pd.Series({'difference_calculated': np.nan})

result = results.groupby(['tx_hash', 'token_name']).apply(calculate_differences).reset_index()

# for each tx there is the same amount of rows as tokens where the sender is involved (so 2 in case of normal transactions)
result.head()

  result = results.groupby(['tx_hash', 'token_name']).apply(calculate_differences).reset_index()


Unnamed: 0,tx_hash,token_name,difference_calculated
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,0.0
1,0x012778bb6330737bed53ca488e582500498d81e1db22...,KAIJUNO8,-204.113601
2,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,0.0
3,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Jim,-46.318267
4,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,-76.640316


In [63]:
#calcualte the delta in stable coins for the tx 
stable_coins_df_index_0_2 = df_senders[(df_senders['token_name'].isin(stable_coins)) & (df_senders['index'] == 0) & df_senders['raw_amount'] != 0][['tx_hash', 'token_name', 'dollar_value_net', 'raw_amount',  'sender_gave_this']]
stable_coins_df_index_0_2 = stable_coins_df_index_0_2.groupby(['tx_hash', 'token_name', 'sender_gave_this'])[['dollar_value_net', 'raw_amount']].sum().reset_index()
stable_coins_df_index_0_2.rename(columns= {'token_name': 'stable_token_name', 'dollar_value_net': 'stable_dollar_value_net', 'raw_amount': 'stable_raw_amount'}, inplace = True)
stable_coins_df_index_0_2.head()

Unnamed: 0,tx_hash,stable_token_name,sender_gave_this,stable_dollar_value_net,stable_raw_amount
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,True,-620.672021,0.2
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,True,-9310.080322,3.0
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,False,15404.813333,4.963914
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,Ethereum,True,-6827.392236,2.2
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,Ethereum,False,1241.344043,0.4


In [64]:
#calcualte the delta in other coins for the tx
other_coins_df_index_0_2 = df_senders[(df_senders['index'] == 0) & (~df_senders.token_name.isin(stable_coins))][['tx_hash', 'token_name', 'dollar_value_net', 'raw_amount',  'sender_gave_this']]
other_coins_df_index_0_2 = other_coins_df_index_0_2.groupby(['tx_hash', 'token_name', 'sender_gave_this'])[['dollar_value_net', 'raw_amount']].sum().reset_index()
other_coins_df_index_0_2.rename(columns= {'token_name': 'other_token_name', 'dollar_value_net': 'other_dollar_value_net', 'raw_amount': 'other_raw_amount'}, inplace = True)
other_coins_df_index_0_2.head()


Unnamed: 0,tx_hash,other_token_name,sender_gave_this,other_dollar_value_net,other_raw_amount
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,KAIJUNO8,False,338.104092,1.654453e+28
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Jim,False,4930.863232,6.627504e+26
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,enqAI,True,-4124.942422,1.6e+23
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,ClosedAI,False,95.701486,2.53702e+23
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,True,0.0,8.074525e+20


In [65]:
# get the ratios between other token and stable token
token_ratios = pd.merge(other_coins_df_index_0_2, stable_coins_df_index_0_2, on = ['tx_hash'])
token_ratios['ratio_other_to_stable_index_0'] = token_ratios['stable_raw_amount'] / token_ratios['other_raw_amount'] 
token_ratios = token_ratios[['tx_hash', 'other_token_name', 'stable_token_name', 'ratio_other_to_stable_index_0']]
token_ratios.head()

Unnamed: 0,tx_hash,other_token_name,stable_token_name,ratio_other_to_stable_index_0
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,KAIJUNO8,Ethereum,1.2088580000000001e-29
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Jim,Ethereum,4.526591e-27
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,enqAI,Ethereum,3.1024460000000004e-23
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,ClosedAI,Ethereum,8.671589999999999e-24
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,Ethereum,4.953852e-22


In [66]:
nearly_final_empty_A = nearly_final[(nearly_final['token_name_A'].isin(stable_coins)) & (nearly_final['token_A_delta_raw_amount'] == 0)]
nearly_final_empty_B = nearly_final[(nearly_final['token_name_B'].isin(stable_coins)) & (nearly_final['token_B_delta_raw_amount'] == 0)]
nearly_final_empty_A = pd.merge(nearly_final_empty_A, token_ratios, on = 'tx_hash')
nearly_final_empty_B = pd.merge(nearly_final_empty_B, token_ratios, on = 'tx_hash')
nearly_final_empty_A['token_A_delta_raw_amount'] = nearly_final_empty_A['token_B_delta_raw_amount'] * nearly_final_empty_A['ratio_other_to_stable_index_0']
nearly_final_empty_B['token_B_delta_raw_amount'] = nearly_final_empty_B['token_A_delta_raw_amount'] * nearly_final_empty_B['ratio_other_to_stable_index_0']

nearly_final_empty_A[['tx_hash', 'token_name_A', 'token_A_delta_raw_amount']].head()


Unnamed: 0,tx_hash,token_name_A,token_A_delta_raw_amount
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,-0.065772
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,-0.014925
2,0x36453126f7b08f65178da484754a9195f7f044b032bc...,Ethereum,-0.037313
3,0x623bae0e273896d748142353d5cec32276bf7f49b0ec...,Ethereum,-0.054074
4,0xc1a796c24a30a5c110b7cea4123085339445bb1953c0...,Ethereum,-0.232437


In [67]:
# merge them all to get the delata in all coins
delta_stable = pd.merge(nearly_final,nearly_final_empty_A[['tx_hash', 'token_name_A', 'token_name_B', 'token_A_delta_raw_amount']], on = ['tx_hash', 'token_name_A', 'token_name_B'], how = 'left')
delta_stable.loc[delta_stable['token_A_delta_raw_amount_x'] == 0, 'token_A_delta_raw_amount_x'] = delta_stable['token_A_delta_raw_amount_y']
delta_stable_2 =  pd.merge(delta_stable, nearly_final_empty_B[['tx_hash', 'token_name_A', 'token_name_B', 'token_B_delta_raw_amount']], on = ['tx_hash', 'token_name_A', 'token_name_B'], how = 'left')
delta_stable_2.loc[delta_stable_2['token_B_delta_raw_amount_x'] == 0, 'token_B_delta_raw_amount_x'] = delta_stable_2['token_B_delta_raw_amount_y']
delta_stable_2.head()

Unnamed: 0,tx_hash,token_name_A,token_A_delta_raw_amount_x,token_A_delta_dollar_tenderly,token_name_B,token_B_delta_raw_amount_x,token_B_delta_dollar_tenderly,token_A_delta_raw_amount_y,token_B_delta_raw_amount_y
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,-0.06577181,0.0,KAIJUNO8,-5.440819e+27,-111.188585,-0.065772,
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,-0.0149252,0.0,Jim,-3.297227e+24,-24.531371,-0.014925,
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,-0.02469591,-76.640316,enqAI,,0.0,,
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,ClosedAI,-1.446769e+21,-0.54575,Ethereum,-0.01254579,0.0,,-0.012546
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,-4.175898e+19,0.0,Ethereum,-0.02068678,0.0,,-0.020687


In [68]:
def select_stable_token(row, column_A, column_B):
    if row['token_name_A'] in stable_coins:
        return row[column_A]
    elif row['token_name_B'] in stable_coins:
        return row[column_B]
    else:
        return None  # Return None if neither token is in stable_coins


delta_stable_2['token_name_stable'] = delta_stable_2.apply(lambda row: select_stable_token(row, 'token_name_A', 'token_name_B'), axis=1)
delta_stable_2['token_delta_stable'] = delta_stable_2.apply(lambda row: select_stable_token(row, 'token_A_delta_raw_amount_x', 'token_B_delta_raw_amount_x'), axis=1)

delta_stable_2.head()

Unnamed: 0,tx_hash,token_name_A,token_A_delta_raw_amount_x,token_A_delta_dollar_tenderly,token_name_B,token_B_delta_raw_amount_x,token_B_delta_dollar_tenderly,token_A_delta_raw_amount_y,token_B_delta_raw_amount_y,token_name_stable,token_delta_stable
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,Ethereum,-0.06577181,0.0,KAIJUNO8,-5.440819e+27,-111.188585,-0.065772,,Ethereum,-0.065772
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,-0.0149252,0.0,Jim,-3.297227e+24,-24.531371,-0.014925,,Ethereum,-0.014925
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,Ethereum,-0.02469591,-76.640316,enqAI,,0.0,,,Ethereum,-0.024696
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,ClosedAI,-1.446769e+21,-0.54575,Ethereum,-0.01254579,0.0,,-0.012546,Ethereum,-0.012546
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,-4.175898e+19,0.0,Ethereum,-0.02068678,0.0,,-0.020687,Ethereum,-0.020687


In [69]:
dollar_diff = result.groupby('tx_hash')[['difference_calculated']].sum().reset_index()
stable_delta = pd.merge(dollar_diff, delta_stable_2, on = 'tx_hash', how = 'left')
stable_delta.head()

Unnamed: 0,tx_hash,difference_calculated,token_name_A,token_A_delta_raw_amount_x,token_A_delta_dollar_tenderly,token_name_B,token_B_delta_raw_amount_x,token_B_delta_dollar_tenderly,token_A_delta_raw_amount_y,token_B_delta_raw_amount_y,token_name_stable,token_delta_stable
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,-204.113601,Ethereum,-0.06577181,0.0,KAIJUNO8,-5.440819e+27,-111.188585,-0.065772,,Ethereum,-0.065772
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,-46.318267,Ethereum,-0.0149252,0.0,Jim,-3.297227e+24,-24.531371,-0.014925,,Ethereum,-0.014925
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,-76.640316,Ethereum,-0.02469591,-76.640316,enqAI,,0.0,,,Ethereum,-0.024696
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,-38.934096,ClosedAI,-1.446769e+21,-0.54575,Ethereum,-0.01254579,0.0,,-0.012546,Ethereum,-0.012546
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,-64.198517,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,-4.175898e+19,0.0,Ethereum,-0.02068678,0.0,,-0.020687,Ethereum,-0.020687


### Calculate the value in dollars based on the price of the stable coin

In [70]:
stable_value_index_0 = df_senders[(df_senders['index'] == 0) & (df_senders['token_name'].isin(stable_coins))][['tx_hash', 'token_name', 'token_dollar_value']]
stable_value_index_0.rename(columns = {'token_dollar_value' : 'stable_token_dollar_value_0', 'token_name' : 'token_name_stable'}, inplace = True)
stable_value_index_0.head()

Unnamed: 0,tx_hash,token_name_stable,stable_token_dollar_value_0
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,Ethereum,3103.360107
15,0xee8fd2c76181afa14ca0da158e0a01bba2d3df8e62c5...,USDC,1.0
29,0x77929a2b313bd1f5b15c9dbc61cc598d21e6f3594b30...,Ethereum,3103.360107
39,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,Ethereum,3103.360107
50,0x4f9b4cddc272c3803df798041fff63ae7e439982810f...,Ethereum,3103.360107


In [71]:
stable_delta_with_dollar = pd.merge(stable_delta, stable_value_index_0, on = ['tx_hash', 'token_name_stable'], how = 'left')
def calculate_tx_dollar_loss(row):
    if row['token_name_stable'] is not None:
        return row['token_delta_stable'] * row['stable_token_dollar_value_0']

# Apply custom function to create 'delta_dollar' column
stable_delta_with_dollar['delta_dollar'] = stable_delta_with_dollar.apply(calculate_tx_dollar_loss, axis=1)
stable_delta_with_dollar['delta_dollar'].fillna(stable_delta_with_dollar['difference_calculated'], inplace=True)
stable_delta_with_dollar.head()

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  stable_delta_with_dollar['delta_dollar'].fillna(stable_delta_with_dollar['difference_calculated'], inplace=True)


Unnamed: 0,tx_hash,difference_calculated,token_name_A,token_A_delta_raw_amount_x,token_A_delta_dollar_tenderly,token_name_B,token_B_delta_raw_amount_x,token_B_delta_dollar_tenderly,token_A_delta_raw_amount_y,token_B_delta_raw_amount_y,token_name_stable,token_delta_stable,stable_token_dollar_value_0,delta_dollar
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,-204.113601,Ethereum,-0.06577181,0.0,KAIJUNO8,-5.440819e+27,-111.188585,-0.065772,,Ethereum,-0.065772,3103.360107,-204.113601
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,-46.318267,Ethereum,-0.0149252,0.0,Jim,-3.297227e+24,-24.531371,-0.014925,,Ethereum,-0.014925,3103.360107,-46.318267
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,-76.640316,Ethereum,-0.02469591,-76.640316,enqAI,,0.0,,,Ethereum,-0.024696,3103.360107,-76.640316
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,-38.934096,ClosedAI,-1.446769e+21,-0.54575,Ethereum,-0.01254579,0.0,,-0.012546,Ethereum,-0.012546,3103.360107,-38.934096
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,-64.198517,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,-4.175898e+19,0.0,Ethereum,-0.02068678,0.0,,-0.020687,Ethereum,-0.020687,3103.360107,-64.198517


### Add the sender row

In [72]:
only_senders = df_senders[['tx_hash', 'sender']].drop_duplicates().reset_index(drop = True)
only_senders.head()

Unnamed: 0,tx_hash,sender
0,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179
1,0xee8fd2c76181afa14ca0da158e0a01bba2d3df8e62c5...,0xbbf46514de004992d8dcbcec19f02f2e772ab51d
2,0x77929a2b313bd1f5b15c9dbc61cc598d21e6f3594b30...,0xfb93f5128dae38523b8beea93a0f89c32b3f4932
3,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,0x76ec733f445358232ea24aaf03d4536057439bfc
4,0x4f9b4cddc272c3803df798041fff63ae7e439982810f...,0x1518c211f611114decd1842894e08b803afcd27b


In [73]:
stable_delta_with_dollar = pd.merge(stable_delta_with_dollar, only_senders, on = 'tx_hash')

### Add the token_address row

In [74]:
token_name_address = df_senders[['token_contract_address', 'token_name']].drop_duplicates()
token_name_address.head()

Unnamed: 0,token_contract_address,token_name
0,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2
4,Ethereum,Ethereum
15,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDC
16,0x0b7f0e51cd1739d6c96982d55ad8fa634dd43a9c,Dream Machine Token
28,0x1db61c337e5216941f53e6a0e41eed9640aec8bb,0x1db61c337e5216941f53e6a0e41eed9640aec8bb


In [75]:
# Merge for token_name_A
stable_delta_with_dollar = stable_delta_with_dollar.merge(
    token_name_address.rename(columns={'token_name': 'token_name_A', 'token_contract_address': 'token_contract_address_A'}),
    on='token_name_A', how='left')

# Merge for token_name_B
stable_delta_with_dollar = stable_delta_with_dollar.merge(
    token_name_address.rename(columns={'token_name': 'token_name_B', 'token_contract_address': 'token_contract_address_B'}),
    on='token_name_B', how='left')

stable_delta_with_dollar.head()

Unnamed: 0,tx_hash,difference_calculated,token_name_A,token_A_delta_raw_amount_x,token_A_delta_dollar_tenderly,token_name_B,token_B_delta_raw_amount_x,token_B_delta_dollar_tenderly,token_A_delta_raw_amount_y,token_B_delta_raw_amount_y,token_name_stable,token_delta_stable,stable_token_dollar_value_0,delta_dollar,sender,token_contract_address_A,token_contract_address_B
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,-204.113601,Ethereum,-0.06577181,0.0,KAIJUNO8,-5.440819e+27,-111.188585,-0.065772,,Ethereum,-0.065772,3103.360107,-204.113601,0x9ea02f652955b90c0dd4f256003e4e339d3a4184,Ethereum,0x4fe8d4775b7cb2546b9ee86182081cdf8f77b053
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,-46.318267,Ethereum,-0.0149252,0.0,Jim,-3.297227e+24,-24.531371,-0.014925,,Ethereum,-0.014925,3103.360107,-46.318267,0x76ec733f445358232ea24aaf03d4536057439bfc,Ethereum,0xd807f7e2818db8eda0d28b5be74866338eaedb86
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,-76.640316,Ethereum,-0.02469591,-76.640316,enqAI,,0.0,,,Ethereum,-0.024696,3103.360107,-76.640316,0x30049fd3dde7a46a6441a0d41dcb1cf5aaa93e77,Ethereum,0x710287d1d39dcf62094a83ebb3e736e79400068a
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,-38.934096,ClosedAI,-1.446769e+21,-0.54575,Ethereum,-0.01254579,0.0,,-0.012546,Ethereum,-0.012546,3103.360107,-38.934096,0xc4ba8f21c0dd755cfeff899c4f791c634ca5c6db,0x50b0696468f42cab1ddc76413a1312aff3cabdf6,Ethereum
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,-64.198517,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,-4.175898e+19,0.0,Ethereum,-0.02068678,0.0,,-0.020687,Ethereum,-0.020687,3103.360107,-64.198517,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,Ethereum


###  Get the delta in ether at the same hour

In [76]:
#make sure the final df stable_delta_with_dollar has a timestamp column
timestamps_per_transaction = df_senders[['tx_hash', 'timestamp']].drop_duplicates()
stable_delta_with_dollar = pd.merge(stable_delta_with_dollar, timestamps_per_transaction, on = 'tx_hash')

In [77]:
#value of the weth token at the different timestamps of the dataset
weth_values = df_senders[df_senders['token_name'] == 'WETH'].groupby(['timestamp'])[['token_dollar_value']].mean().reset_index()
weth_values.rename(columns= {'token_dollar_value' : 'weth_dollar_value'}, inplace = True)
weth_values.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   timestamp          1 non-null      int64  
 1   weth_dollar_value  1 non-null      float64
dtypes: float64(1), int64(1)
memory usage: 148.0 bytes


In [78]:
all_timestamps = pd.DataFrame({'timestamp': df_senders['timestamp'].unique()})
all_timestamps.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 1 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   timestamp  25 non-null     int64
dtypes: int64(1)
memory usage: 332.0 bytes


In [79]:
#one df with all the timestamps and the weth dollar value for the nearest timestamp
weth_values_all = pd.merge_asof(all_timestamps, weth_values, on='timestamp', direction='nearest')
weth_values_all['weth_dollar_value'] = weth_values_all['weth_dollar_value'].astype(float)
weth_values_all.head()

Unnamed: 0,timestamp,weth_dollar_value
0,1710162059,3104.300049
1,1710162323,3104.300049
2,1710162371,3104.300049
3,1710162383,3104.300049
4,1710162587,3104.300049


In [80]:
stable_delta_with_dollar = pd.merge(stable_delta_with_dollar, weth_values_all, on = 'timestamp')
#calculate the value of the tx loss in eth
stable_delta_with_dollar['delta_eth'] = stable_delta_with_dollar['delta_dollar'] / stable_delta_with_dollar['weth_dollar_value']
stable_delta_with_dollar.head()

Unnamed: 0,tx_hash,difference_calculated,token_name_A,token_A_delta_raw_amount_x,token_A_delta_dollar_tenderly,token_name_B,token_B_delta_raw_amount_x,token_B_delta_dollar_tenderly,token_A_delta_raw_amount_y,token_B_delta_raw_amount_y,token_name_stable,token_delta_stable,stable_token_dollar_value_0,delta_dollar,sender,token_contract_address_A,token_contract_address_B,timestamp,weth_dollar_value,delta_eth
0,0x012778bb6330737bed53ca488e582500498d81e1db22...,-204.113601,Ethereum,-0.06577181,0.0,KAIJUNO8,-5.440819e+27,-111.188585,-0.065772,,Ethereum,-0.065772,3103.360107,-204.113601,0x9ea02f652955b90c0dd4f256003e4e339d3a4184,Ethereum,0x4fe8d4775b7cb2546b9ee86182081cdf8f77b053,1710163547,3104.300049,-0.065752
1,0x08e622acdf6b27fe26f24e33815bb1a0789158f2d326...,-46.318267,Ethereum,-0.0149252,0.0,Jim,-3.297227e+24,-24.531371,-0.014925,,Ethereum,-0.014925,3103.360107,-46.318267,0x76ec733f445358232ea24aaf03d4536057439bfc,Ethereum,0xd807f7e2818db8eda0d28b5be74866338eaedb86,1710162383,3104.300049,-0.014921
2,0x1a5eafe643fb4e41e4666f045341584ea22bb73d5857...,-76.640316,Ethereum,-0.02469591,-76.640316,enqAI,,0.0,,,Ethereum,-0.024696,3103.360107,-76.640316,0x30049fd3dde7a46a6441a0d41dcb1cf5aaa93e77,Ethereum,0x710287d1d39dcf62094a83ebb3e736e79400068a,1710163715,3104.300049,-0.024688
3,0x1bcc89149f51daa2d79355a340d576af22555be34567...,-38.934096,ClosedAI,-1.446769e+21,-0.54575,Ethereum,-0.01254579,0.0,,-0.012546,Ethereum,-0.012546,3103.360107,-38.934096,0xc4ba8f21c0dd755cfeff899c4f791c634ca5c6db,0x50b0696468f42cab1ddc76413a1312aff3cabdf6,Ethereum,1710162587,3104.300049,-0.012542
4,0x1ee8d8e23e4a026ce8afbf2dc79c196c6b1d43d5e9f8...,-64.198517,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,-4.175898e+19,0.0,Ethereum,-0.02068678,0.0,,-0.020687,Ethereum,-0.020687,3103.360107,-64.198517,0x36a2ffb33c1b427c46c3d30adac3ca4e8ed36179,0xb0699d63aef20df3f1cffa9ca2bb8670416271d2,Ethereum,1710162059,3104.300049,-0.020681


### Remove the transactions that are not interesting 

In [81]:
problematic_hashes = problematic_transactions['tx_hash or block']
stable_delta_with_dollar = stable_delta_with_dollar[~stable_delta_with_dollar['tx_hash'].isin(problematic_hashes)]
stable_delta_with_dollar = stable_delta_with_dollar.drop_duplicates()

### Cleanup by keeping only the interseting columns before exporting the file

In [82]:
columns_to_keep = ['tx_hash', 'sender', 'delta_eth', 'delta_dollar', 'token_name_A', 'token_contract_address_A', 'token_A_delta_raw_amount_x', 'token_name_B', 'token_contract_address_B', 'token_B_delta_raw_amount_x']
final = stable_delta_with_dollar[columns_to_keep]

In [83]:
final = final.rename(columns = {'token_A_delta_raw_amount_x' : 'delta_token_A', 'token_B_delta_raw_amount_x' : 'delta_token_B'})
final.to_csv(f'data/results/{name_of_incident}_final_results.csv')

In [94]:
df_main = pd.read_csv(csv_file_path)
tx_hash_list = [x for x in df_main['user_tx'].to_list() if pd.notnull(x)]
print("FINAL STATS:",
      "\n - Out of the", df_main['user_tx'].nunique(), "original transactions,",
      "\nwe were able to find deltas in ethereum and dollar by simulating if the transactions had been on top of block",
      "\nfor", final.tx_hash.nunique(), "transactions in total.", 
      "\n - This approach was therefore successful for", '{:.2%}'.format(round(final.tx_hash.nunique()/df_main['user_tx'].nunique(), 4)), "percent of transactions",
      f"\n - The transaction list for which this approach did not work can be found in the folder data/results/{name_of_incident}_transactions_or_blocks_with_api_problem", 
      "\n - The total potential loss in dollars for this incident is", final.delta_dollar.sum())

FINAL STATS: 
 - Out of the 37 original transactions, 
we were able to find deltas in ethereum and dollar by simulating if the transactions had been on top of block 
for 28 transactions in total. 
 - This approach was therefore successful for 75.68% percent of transactions 
 - The transaction list for which this approach did not work can be found in the folder data/results/incident2_transactions_or_blocks_with_api_problem 
 - The total potential loss in dollars for this incident is -6451.374003850512
