# Packages

In [1]:
import pandas as pd
import numpy as np
import os

pd.set_option('display.max_columns', None)
pd.set_option('max_colwidth', None) # show full width of showing cols

# Functions

In [9]:
def make_identifier(df):
    str_id = df.apply(lambda x: '_'.join(map(str, x)), axis=1)
    return pd.factorize(str_id)[0]
# Apply like:
# dftestMaster['UID_test'] = make_identifier(dftestMaster[['BatchNr','attack_Id']])

# Set base variables

In [2]:
counter = 1
counter_save = 1

# Load new file

In [None]:
df1 = pd.read_parquet(f"/Volumes/Extreme SSD/02_UniswapV2Transactions/UniswapV2Transactions_{counter}.par")
counter = counter + 1


# Convert data

## Convert gasprice and Value to float

In [None]:
df1['trans_gasPrice_Gwei'] = (df1['trans_gasPrice']/(10**9))
df1['value_float'] = (df1['value'].astype(float)/(10**18)) 

# Implement heuristics

## Heuristics 1,4,6

In [None]:
df1['Count_1']=df1.groupby(['block_number','poolId', 'value'],as_index=True)['value'].transform('size').astype('float') 

df_filtered = df1.groupby(['block_number','poolId', 'transaction_hash'],as_index=True).filter(lambda x: x['Count_1'].mean() > 1)



## Heuristics 3 & 7

In [None]:
# Add column with unique count of transaction_hash per sub-frame
df_filtered['Count_nHash']=df_filtered.groupby(['block_number','poolId', 'value'],as_index=True)['transaction_hash'].transform('nunique').astype('float')

# filtering 'Count_nHash'-mean == 1.5 (formerly >1) per sub-frame by 'transaction_hash' means that there are exact 2 (fromaerly at least 2) different transaction_hashes, which is heuristic nr. 3
df_filtered2 = df_filtered.groupby(['transaction_hash'],as_index=True).filter(lambda x: x['Count_nHash'].mean()  == 1.5)




## Heuristic 5

In [None]:
df_filtered2['within_transaction_order'] = df_filtered2.sort_values(['log_index'],ascending=True).groupby(['transaction_hash'],as_index=True).cumcount().add(1)


# filtering for mean values == 1.5 (formerly >= 1.5) for within_transaction_order after groupby to ensure Heuristic 5 (transaction in oposite transaction per block, LP, and token_address
df_filtered3 = df_filtered2[df_filtered2.duplicated(subset=['block_number', 'poolId', 'tf_tokenAddress', 'value'],keep = False)].groupby(['block_number', 'poolId', 'tf_tokenAddress', 'value'],as_index=True).filter(lambda x: x['within_transaction_order'].mean() == 1.5)

# Adding an attack-ID
df_filtered3['attack_Id'] = df_filtered3.groupby(['block_number', 'poolId', 'value'],as_index=True).ngroup()





## Add batch number for identification in master file

In [None]:
df_filtered3['BatchNr'] = counter

# Adding to master file

In [None]:
if os.path.exists(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par"):
    # Save seperate file with counter number (100k blocks)
    df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_{counter_save}.par")
    counter_save = counter_save + 1
    
    # Add to Master file
    dfMaster = pd.read_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")
    dfMaster = pd.concat([dfMaster, df_filtered3], ignore_index=True)
    dfMaster.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")
    
    del df_filtered
    del df_filtered2
    del df_filtered3
    del dfMaster


else:
    # Save Master
    df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")
    # Save first file
    df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_{counter_save}.par")
    counter_save = counter_save + 1
    
    
    


In [None]:
os.path.exists(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")

In [None]:
df_filtered3

# Build loop (1 observation per transaction)

In [2]:
counter = 1
counter_save = 1

while True:
    
    # Add control ouputs
    print(counter,
          counter_save)
    
    try:
        df1 = pd.read_parquet(f"/Volumes/Extreme SSD/02_UniswapV2Transactions/UniswapV2Transactions_{counter}.par")
    except:
        print("Finished!")
        break           
    else:
        # Convert gasprice and Value to float
        df1['trans_gasPrice_Gwei'] = (df1['trans_gasPrice']/(10**9))
        df1['value_float'] = (df1['value'].astype(float)/(10**18))
        
        # Heuristics 1,4,6a
        df1['Count_1']=df1.groupby(['block_number','poolId', 'value'],as_index=True)['value'].transform('size').astype('float') 

        df_filtered = df1.groupby(['block_number','poolId', 'transaction_hash'],as_index=True).filter(lambda x: x['Count_1'].mean() > 1)
        
        # Heuristics 3 & 7
        # Add column with unique count of transaction_hash per sub-frame
        df_filtered['Count_nHash']=df_filtered.groupby(['block_number','poolId', 'value'],as_index=True)['transaction_hash'].transform('nunique').astype('float')

        # filtering 'Count_nHash'-mean == 1.5 (formerly >1) per sub-frame by 'transaction_hash' means that there are exact 2 (fromaerly at least 2) different transaction_hashes, which is heuristic nr. 3
        df_filtered2 = df_filtered.groupby(['transaction_hash'],as_index=True).filter(lambda x: x['Count_nHash'].mean()  == 1.5)
        
        # Heuristic 5
        df_filtered2['within_transaction_order'] = df_filtered2.sort_values(['log_index'],ascending=True).groupby(['transaction_hash'],as_index=True).cumcount().add(1)


        # filtering for mean values == 1.5 (formerly >= 1.5) for within_transaction_order after groupby to ensure Heuristic 5 (transaction in oposite transaction per block, LP, and token_address
        df_filtered3 = df_filtered2[df_filtered2.duplicated(subset=['block_number', 'poolId', 'tf_tokenAddress', 'value'],keep = False)].groupby(['block_number', 'poolId', 'tf_tokenAddress', 'value'],as_index=True).filter(lambda x: x['within_transaction_order'].mean() == 1.5)

        # Adding an attack-ID
        df_filtered3['attack_Id'] = df_filtered3.groupby(['block_number', 'poolId', 'value'],as_index=True).ngroup()
        
        # Adding batch number
        df_filtered3['BatchNr'] = counter

              
        
    
    # Check if first loop
    if os.path.exists(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par"):
        # Save seperate file with counter number (100k blocks)
        df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_{counter_save}.par")
    
        # Add to Master file
        dfMaster = pd.read_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")
        dfMaster = pd.concat([dfMaster, df_filtered3], ignore_index=True)
        # Add overall attack Id
        dfMaster['Attack_UID'] = dfMaster.groupby(['BatchNr','attack_Id']).ngroup()
        dfMaster.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")
        
        # Clean/update variables
        counter = counter + 1
        counter_save = counter_save + 1
        del df1
        del df_filtered
        del df_filtered2
        del df_filtered3
        del dfMaster
    

    else:
        
        dfMaster = df_filtered3
        # Add overall attack Id
        dfMaster['Attack_UID'] = dfMaster.groupby(['BatchNr','attack_Id']).ngroup()
        # Save Master
        df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")
        # Save first file
        df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_{counter_save}.par")
        
        # Clean/update variables
        counter = counter + 1
        counter_save = counter_save + 1
        del df1
        del df_filtered
        del df_filtered2
        del df_filtered3
        del dfMaster
        
        
    
    
    

1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
20 20
21 21
22 22
23 23
24 24
25 25
26 26
Finished!


# Checks

In [None]:
dftest1 = pd.read_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_1.par")

In [None]:
dftest2 = pd.read_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_3.par")

In [18]:
dftestMaster = pd.read_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")

In [None]:
len(dftest1)

In [None]:
len(dftest2)

In [3]:
len(dftestMaster)

1310336

In [10]:
dftestMaster[dftestMaster['BatchNr'] ==18]

Unnamed: 0,transaction_hash,block_number,transaction_index,log_index,from_address,to_address,trans_fromAddress,trans_toAddress,poolId,tf_tokenAddress,tokenName,value,trans_gasPrice,gasUsed,effective_gas_price,trans_gasPrice_Gwei,value_float,Count_1,Count_nHash,within_transaction_order,attack_Id,BatchNr
697642,0xa7b4dc472910a1e601fef1fabbf6f232df556f3434ba4c2d50a38ffc4ec7ea83,11793090,128,117,0xacf24f656492992b4107cc850d57484cc72c48a2,0xdf6b861b4fbcfaffb62dd1906fcd3a863955704b,0x13ab8b21ccd2716c8531cf14ff476c6b4726f9d0,0xacf24f656492992b4107cc850d57484cc72c48a2,0xdf6b861b4fbcfaffb62dd1906fcd3a863955704b,0x6e36556b3ee5aa28def2a8ec3dae30ec2b208739,,69950000000000000000,305724000001,78327,305724000001,305.724,69.950000,2.0,2.0,1,0,18
697643,0x48701d0cc3e2cc9fa3c659defa4de81c77865bc9783d871771b40f645fa144de,11793090,262,243,0xdf6b861b4fbcfaffb62dd1906fcd3a863955704b,0xacf24f656492992b4107cc850d57484cc72c48a2,0x13ab8b21ccd2716c8531cf14ff476c6b4726f9d0,0xacf24f656492992b4107cc850d57484cc72c48a2,0xdf6b861b4fbcfaffb62dd1906fcd3a863955704b,0x6e36556b3ee5aa28def2a8ec3dae30ec2b208739,,69950000000000000000,292000000000,71048,292000000000,292.0,69.950000,2.0,2.0,2,0,18
697644,0x55b80ac7a079dc9ed1b6a8892069f15dbb8842b07b73e7e9f1343fc19b53946c,11793091,137,114,0x99dfde431b40321a35deb6aeb55cf338ddd6eccd,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0x4f69c5b694d5a14a0a595703175c478ec6b2a2fe,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0x99dfde431b40321a35deb6aeb55cf338ddd6eccd,0x0202be363b8a4820f3f4de7faf5224ff05943ab1,,20979072373299356515710,285101000000,94958,285101000000,285.101,20979.072373,2.0,2.0,2,1,18
697645,0x31d723b7de37af5316675d9dc2b440f42aed9a31290b11a8aa009fa1d07d864c,11793091,142,134,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0x99dfde431b40321a35deb6aeb55cf338ddd6eccd,0x74b892425a206eb23905a5f5c322e4386460312d,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0x99dfde431b40321a35deb6aeb55cf338ddd6eccd,0x0202be363b8a4820f3f4de7faf5224ff05943ab1,,20979072373299356515710,284999999999,93248,284999999999,285.0,20979.072373,2.0,2.0,1,1,18
697646,0xda5535433679bc521c0bc6d878d95f114f022abf4984527381e27b7a7fd8d6ea,11793092,41,91,0x0c9c5daf1d7cd8b10e9fc5e7a10762f0a8d1c335,0x7b78eb388fe213037b0f558a4a5935fe27b1e481,0xb6b7cc8c20a25d886f3feff988d15d267f71ac7c,0x7b78eb388fe213037b0f558a4a5935fe27b1e481,0x0c9c5daf1d7cd8b10e9fc5e7a10762f0a8d1c335,0x7d29a64504629172a429e64183d6673b9dacbfce,VectorspaceAI,2476098843206046537035,300712501149,127813,300712501149,300.712501,2476.098843,2.0,2.0,2,2,18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
768445,0x2c40243c05a4d857cdde0d0b766bad25ec25a2bbe952ff0b7b2a68540cc1d208,11893071,116,257,0x0555bdebc3429585b4594285a76f853725a49532,0x659a9a43b32bea6c113c393930a45c7634a242d5,0x8ad7695bcb758c6c090ded08e4cb5240d1875e6b,0x0555bdebc3429585b4594285a76f853725a49532,0x659a9a43b32bea6c113c393930a45c7634a242d5,0x07bac35846e5ed502aa91adf6a9e7aa210f2dcbe,,12651388401879762401307,148000001459,88581,148000001459,148.000001,12651.388402,2.0,2.0,1,35399,18
768446,0xf2a4b983dfda9a917b3c414ee385803ce2ce3c4dc80bd65c01791999759380f2,11893072,116,167,0xe7607a563105f7dba8beb87ad7f5b3a9ec793958,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0x4f69c5b694d5a14a0a595703175c478ec6b2a2fe,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0xe7607a563105f7dba8beb87ad7f5b3a9ec793958,0x159751323a9e0415dd3d6d42a1212fe9f4a0848c,,6466037595259215033783,177681231000,82904,177681231000,177.681231,6466.037595,2.0,2.0,2,35400,18
768447,0x9fc3e677481932ecdff82c16da839479038aec8c3dbe0eecc4de18387cf46c71,11893072,179,296,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0xe7607a563105f7dba8beb87ad7f5b3a9ec793958,0x74b892425a206eb23905a5f5c322e4386460312d,0x1d6e8bac6ea3730825bde4b005ed7b2b39a2932d,0xe7607a563105f7dba8beb87ad7f5b3a9ec793958,0x159751323a9e0415dd3d6d42a1212fe9f4a0848c,,6466037595259215033783,156999999999,93152,156999999999,157.0,6466.037595,2.0,2.0,1,35400,18
768448,0x1294f9e541cb9248552cd04497c7e272c8460f922997738d4e40658a2b002980,11893082,93,70,0xe7607a563105f7dba8beb87ad7f5b3a9ec793958,0x7a08ed5862ba4c2887ef169a17637eadb27beecf,0x979636bfeecd6a3ba720cf6eb8b7135f247e0b4a,0x7a08ed5862ba4c2887ef169a17637eadb27beecf,0xe7607a563105f7dba8beb87ad7f5b3a9ec793958,0x159751323a9e0415dd3d6d42a1212fe9f4a0848c,,13321516127047331277025,222000000056,115432,222000000056,222.0,13321.516127,2.0,2.0,2,35401,18


In [20]:
dftestMaster['Attack_UID'] = dftestMaster.groupby(['BatchNr','attack_Id']).ngroup()

In [22]:
dftestMaster = dftestMaster.drop(['UID','UID_test'],axis =1)

In [24]:
dftestMaster.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_Master.par")

# Build loop (ALL observations per transaction)

In [3]:
counter = 1
counter_save = 1

while True:
    
    # Add control ouputs
    print(counter,
          counter_save)
    
    try:
        df1 = pd.read_parquet(f"/Volumes/Extreme SSD/02_UniswapV2Transactions/UniswapV2Transactions_{counter}.par")
    except:
        print("Finished!")
        break           
    else:
        # Convert gasprice and Value to float
        df1['trans_gasPrice_Gwei'] = (df1['trans_gasPrice']/(10**9))
        df1['value_float'] = (df1['value'].astype(float)/(10**18))
        
        # Heuristics 1,4,6
        df1['Count_1']=df1.groupby(['block_number','poolId', 'value'],as_index=True)['value'].transform('size').astype('float') 

        df_filtered = df1.groupby(['block_number','poolId', 'transaction_hash'],as_index=True).filter(lambda x: x['Count_1'].mean() > 1)
        
        # Heuristics 3 & 7
        # Add column with unique count of transaction_hash per sub-frame
        df_filtered['Count_nHash']=df_filtered.groupby(['block_number','poolId', 'value'],as_index=True)['transaction_hash'].transform('nunique').astype('float')

        # filtering 'Count_nHash'-mean == 1.5 (formerly >1) per sub-frame by 'transaction_hash' means that there are exact 2 (fromaerly at least 2) different transaction_hashes, which is heuristic nr. 3
        df_filtered2 = df_filtered.groupby(['transaction_hash'],as_index=True).filter(lambda x: x['Count_nHash'].mean()  == 1.5)
        
        # Heuristic 5
        df_filtered2['within_transaction_order'] = df_filtered2.sort_values(['log_index'],ascending=True).groupby(['transaction_hash'],as_index=True).cumcount().add(1)
        
        AttackHashes = df_filtered2[df_filtered2.duplicated(subset=['block_number', 'poolId', 'tf_tokenAddress', 'value'],keep = False)].groupby(['block_number', 'poolId', 'tf_tokenAddress', 'value'],as_index=True).filter(lambda x: x['within_transaction_order'].mean() == 1.5)
        AttackHashes = AttackHashes['transaction_hash'].unique()
        df_filtered3 = df_filtered2[df_filtered2['transaction_hash'].isin(AttackHashes)]
        
        ##### Adding an attack-ID
        # Adding an attack-ID to df with 1 observation per transaction
        df_attacksSingleObs = df_filtered2[df_filtered2.duplicated(subset=['block_number', 'poolId', 'tf_tokenAddress', 'value'],keep = False)].groupby(['block_number', 'poolId', 'tf_tokenAddress', 'value'],as_index=True).filter(lambda x: x['within_transaction_order'].mean() == 1.5)
        df_attacksSingleObs['attack_Id'] = df_attacksSingleObs.groupby(['block_number', 'poolId', 'value'],as_index=True).ngroup()
        
        # Merge with full df
        df_filtered3 = pd.merge(df_filtered3,df_attacksSingleObs[['transaction_hash','attack_Id']],on='transaction_hash', how='left').drop_duplicates()

        
        # Adding batch number
        df_filtered3['BatchNr'] = counter

              
        
    
    # Check if first loop
    if os.path.exists(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_full_Master.par"):
        # Save seperate file with counter number (100k blocks)
        df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_full_{counter_save}.par")
    
        # Add to Master file
        dfMaster = pd.read_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_full_Master.par")
        dfMaster = pd.concat([dfMaster, df_filtered3], ignore_index=True)
        # Add overall attack Id
        dfMaster['Attack_UID'] = dfMaster.groupby(['BatchNr','attack_Id']).ngroup()
        dfMaster.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_full_Master.par")
        
        # Clean/update variables
        counter = counter + 1
        counter_save = counter_save + 1
        del df1
        del df_filtered
        del df_filtered2
        del df_filtered3
        del dfMaster
        del AttackHashes
        del df_attacksSingleObs
    

    else:
        
        dfMaster = df_filtered3
        # Add overall attack Id
        dfMaster['Attack_UID'] = dfMaster.groupby(['BatchNr','attack_Id']).ngroup()
        # Save Master
        df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_full_Master.par")
        # Save first file
        df_filtered3.to_parquet(f"/Volumes/Extreme SSD/98_Output/AttacksUniswapV2_full_{counter_save}.par")
        
        # Clean/update variables
        counter = counter + 1
        counter_save = counter_save + 1
        del df1
        del df_filtered
        del df_filtered2
        del df_filtered3
        del dfMaster
        del AttackHashes
        del df_attacksSingleObs
        
        
    
    
    

1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
20 20
21 21
22 22
23 23
24 24
25 25
26 26
Finished!
