### Packages

In [2]:
import os 
import pandas as pd 
import numpy as np 
import glob
import multiprocessing
import collections
from time import time
from tqdm import tqdm

# Working Directory
os.chdir("../../data")

# POur les opcodes 

from pyevmasm import instruction_tables, disassemble_hex, disassemble_all, assemble_hex
import binascii


# Import de Bytecode de Smart Contracts

In [3]:
def aggregate_contract_csv():
    """
    Concatene les csv présents dans le répertoire courant, 
    Renvoie un pandas DF résultant de la concaténation
    
    """
    extension = 'csv'
    all_filenames = [i for i in glob.glob('contracts*.{}'.format(extension))]
    combined_csv = pd.concat([pd.read_csv(f) for f in all_filenames ])
    combined_csv.to_csv( "combined_csv.csv", index=False, encoding='utf-8-sig')
    return(combined_csv)

In [4]:
smc_data = aggregate_contract_csv()

In [5]:
smc_data.head()

Unnamed: 0,address,bytecode,function_sighashes,is_erc20,is_erc721
0,0xb260ca22980dd31a5d2289c52cbb16e6235986bc,0x,,False,False
1,0xc7c7077456a7fd30e49c205049d35ad5bce32501,0x,,False,False
2,0x7d3ae940eb73dc9131758ad2e326c7d863b0916a,0x606060405236156100615760e060020a600035046313...,"0x13af4035,0x35c1d349,0x8da5cb5b,0x9003adfe,0x...",False,False
3,0xd9c324fa5596d188831afa9716ef78d0c2339afc,0x606060405236156100825760e060020a600035046306...,"0x06fdde03,0x18160ddd,0x23b872dd,0x313ce567,0x...",False,False
4,0xa0dcc980a9ed1fbeb6641b098929ee7caad5c5a6,0x606060405236156100825760e060020a600035046306...,"0x06fdde03,0x18160ddd,0x23b872dd,0x313ce567,0x...",False,False


# Levenshtein Distance

In [6]:
def distance_leven(mot1, mot2):
    #print(os.getpid())
    dist = { (-1,-1): 0 }
    for i,c in enumerate(mot1) :
        dist[i,-1] = dist[i-1,-1] + 1
        dist[-1,i] = dist[-1,i-1] + 1
        for j,d in enumerate(mot2) :
            opt = [ ]
            if (i-1,j) in dist : 
                x = dist[i-1,j] + 1
                opt.append(x)
            if (i,j-1) in dist : 
                x = dist[i,j-1] + 1
                opt.append(x)
            if (i-1,j-1) in dist :
                x = dist[i-1,j-1] + (1 if c != d else 0)
                opt.append(x)
            dist[i,j] = min(opt)
    #print('done')
    return dist[len(mot1)-1,len(mot2)-1]/(len(mot1)+len(mot2))

In [7]:
m1 = smc_data.bytecode.values[0]
m2 = smc_data.bytecode.values[14]

In [8]:
distance_leven(m1,m2)

0.9996945632254124

In [9]:
distance_leven(m1,m2)/max(len(m1),len(m2))

7.634753041281597e-05

In [10]:
distance_leven(m1,m2)/(len(m1)+len(m2))

7.633587074109746e-05

# Ponzi Schemes Bytecodes and Opcodes

In [11]:
arrayponzi = {"object": "0x6060604052600060016000505560006003600050555b33600260006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055505b6103d38061004f6000396000f36060604052361561005e576000357c010000000000000000000000000000000000000000000000000000000090048063365b98b21461027c57806361027f78146102c55780638da5cb5b146102e8578063bff1f9e1146103215761005e565b61027a5b670de0b6b3a764000034101561007757610002565b604060405190810160405280338152602001348152602001506000600050600060005080549050815481101561000257906000526020600020906002020160005b5060008201518160000160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055506020820151816001016000505590505060016003600082828250540192505081905550600260009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff166000600a3404604051809050600060405180830381858888f19350505050505b60026000600050600160005054815481101561000257906000526020600020906002020160005b5060010160005054023073ffffffffffffffffffffffffffffffffffffffff16311115610277576000600050600160005054815481101561000257906000526020600020906002020160005b5060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16600060026000600050600160005054815481101561000257906000526020600020906002020160005b506001016000505402604051809050600060405180830381858888f193505050505060016001600082828250540192505081905550610166565b5b565b005b6102926004808035906020019091905050610344565b604051808373ffffffffffffffffffffffffffffffffffffffff1681526020018281526020019250505060405180910390f35b6102d2600480505061039b565b6040518082815260200191505060405180910390f35b6102f560048050506103a4565b604051808273ffffffffffffffffffffffffffffffffffffffff16815260200191505060405180910390f35b61032e60048050506103ca565b6040518082815260200191505060405180910390f35b600060005081815481101561000257906000526020600020906002020160005b915090508060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff16908060010160005054905082565b60016000505481565b600260009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1681565b6003600050548156","opcodes": "PUSH1 0x60 PUSH1 0x40 MSTORE PUSH1 0x0 PUSH1 0x1 PUSH1 0x0 POP SSTORE PUSH1 0x0 PUSH1 0x3 PUSH1 0x0 POP SSTORE JUMPDEST CALLER PUSH1 0x2 PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP JUMPDEST PUSH2 0x3D3 DUP1 PUSH2 0x4F PUSH1 0x0 CODECOPY PUSH1 0x0 RETURN PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZERO PUSH2 0x5E JUMPI PUSH1 0x0 CALLDATALOAD PUSH29 0x100000000000000000000000000000000000000000000000000000000 SWAP1 DIV DUP1 PUSH4 0x365B98B2 EQ PUSH2 0x27C JUMPI DUP1 PUSH4 0x61027F78 EQ PUSH2 0x2C5 JUMPI DUP1 PUSH4 0x8DA5CB5B EQ PUSH2 0x2E8 JUMPI DUP1 PUSH4 0xBFF1F9E1 EQ PUSH2 0x321 JUMPI PUSH2 0x5E JUMP JUMPDEST PUSH2 0x27A JUMPDEST PUSH8 0xDE0B6B3A7640000 CALLVALUE LT ISZERO PUSH2 0x77 JUMPI PUSH2 0x2 JUMP JUMPDEST PUSH1 0x40 PUSH1 0x40 MLOAD SWAP1 DUP2 ADD PUSH1 0x40 MSTORE DUP1 CALLER DUP2 MSTORE PUSH1 0x20 ADD CALLVALUE DUP2 MSTORE PUSH1 0x20 ADD POP PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x0 PUSH1 0x0 POP DUP1 SLOAD SWAP1 POP DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x0 DUP3 ADD MLOAD DUP2 PUSH1 0x0 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP PUSH1 0x20 DUP3 ADD MLOAD DUP2 PUSH1 0x1 ADD PUSH1 0x0 POP SSTORE SWAP1 POP POP PUSH1 0x1 PUSH1 0x3 PUSH1 0x0 DUP3 DUP3 DUP3 POP SLOAD ADD SWAP3 POP POP DUP2 SWAP1 SSTORE POP PUSH1 0x2 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 PUSH1 0xA CALLVALUE DIV PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP JUMPDEST PUSH1 0x2 PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x1 PUSH1 0x0 POP SLOAD DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x1 ADD PUSH1 0x0 POP SLOAD MUL ADDRESS PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND BALANCE GT ISZERO PUSH2 0x277 JUMPI PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x1 PUSH1 0x0 POP SLOAD DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x0 ADD PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 PUSH1 0x2 PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x1 PUSH1 0x0 POP SLOAD DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x1 ADD PUSH1 0x0 POP SLOAD MUL PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP PUSH1 0x1 PUSH1 0x1 PUSH1 0x0 DUP3 DUP3 DUP3 POP SLOAD ADD SWAP3 POP POP DUP2 SWAP1 SSTORE POP PUSH2 0x166 JUMP JUMPDEST JUMPDEST JUMP JUMPDEST STOP JUMPDEST PUSH2 0x292 PUSH1 0x4 DUP1 DUP1 CALLDATALOAD SWAP1 PUSH1 0x20 ADD SWAP1 SWAP2 SWAP1 POP POP PUSH2 0x344 JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP4 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD DUP3 DUP2 MSTORE PUSH1 0x20 ADD SWAP3 POP POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH2 0x2D2 PUSH1 0x4 DUP1 POP POP PUSH2 0x39B JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH2 0x2F5 PUSH1 0x4 DUP1 POP POP PUSH2 0x3A4 JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH2 0x32E PUSH1 0x4 DUP1 POP POP PUSH2 0x3CA JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH1 0x0 PUSH1 0x0 POP DUP2 DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST SWAP2 POP SWAP1 POP DUP1 PUSH1 0x0 ADD PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND SWAP1 DUP1 PUSH1 0x1 ADD PUSH1 0x0 POP SLOAD SWAP1 POP DUP3 JUMP JUMPDEST PUSH1 0x1 PUSH1 0x0 POP SLOAD DUP2 JUMP JUMPDEST PUSH1 0x2 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 JUMP JUMPDEST PUSH1 0x3 PUSH1 0x0 POP SLOAD DUP2 JUMP "}

contract ArrayPonzi{
    struct User{
        address addr;
        uint amount;
    }
    User[] public users;
    uint public paying = 0;
    address public owner;
    uint public totalUsers=0;
    
    function ArrayPonzi(){
        owner = msg.sender;
    }
    function()  {
        if (msg.value < 1 ether) throw;
        
        users[users.length] = User({addr:msg.sender,amount:msg.value});
        totalUsers += 1 ;
        owner.send(msg.value/10);
        
        while (this.balance > users[paying].amount*2){
            users[paying].addr.send(users[paying].amount*2);
            paying += 1;
        }
        
    }
}

In [12]:
treeponzi = {
"object": "0x60606040525b60406040519081016040528033815260200133815260200150600060005060003373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060008201518160000160006101000a81548173ffffffffffffffffffffffffffffffffffffffff0219169083021790555060208201518160010160006101000a81548173ffffffffffffffffffffffffffffffffffffffff0219169083021790555090505033600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055505b61032c806100ec6000396000f360606040526000357c010000000000000000000000000000000000000000000000000000000090048063d014c01f1461003957610037565b005b61004f6004808035906020019091905050610051565b005b60006000670de0b6b3a76400003410806100d457506000600060005060003373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1614155b8061014757506000600060005060008573ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16145b1561015157610002565b60406040519081016040528084815260200133815260200150600060005060003373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060008201518160000160006101000a81548173ffffffffffffffffffffffffffffffffffffffff0219169083021790555060208201518160010160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055509050508291503490505b600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168273ffffffffffffffffffffffffffffffffffffffff161415156102f35760028104905080508173ffffffffffffffffffffffffffffffffffffffff16600082604051809050600060405180830381858888f1935050505050600060005060008373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1691508150610205565b8173ffffffffffffffffffffffffffffffffffffffff16600082604051809050600060405180830381858888f19350505050505b50505056",
"opcodes": "PUSH1 0x60 PUSH1 0x40 MSTORE JUMPDEST PUSH1 0x40 PUSH1 0x40 MLOAD SWAP1 DUP2 ADD PUSH1 0x40 MSTORE DUP1 CALLER DUP2 MSTORE PUSH1 0x20 ADD CALLER DUP2 MSTORE PUSH1 0x20 ADD POP PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x0 CALLER PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP1 DUP2 MSTORE PUSH1 0x20 ADD PUSH1 0x0 SHA3 PUSH1 0x0 POP PUSH1 0x0 DUP3 ADD MLOAD DUP2 PUSH1 0x0 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP PUSH1 0x20 DUP3 ADD MLOAD DUP2 PUSH1 0x1 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP SWAP1 POP POP CALLER PUSH1 0x1 PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP JUMPDEST PUSH2 0x32C DUP1 PUSH2 0xEC PUSH1 0x0 CODECOPY PUSH1 0x0 RETURN PUSH1 0x60 PUSH1 0x40 MSTORE PUSH1 0x0 CALLDATALOAD PUSH29 0x100000000000000000000000000000000000000000000000000000000 SWAP1 DIV DUP1 PUSH4 0xD014C01F EQ PUSH2 0x39 JUMPI PUSH2 0x37 JUMP JUMPDEST STOP JUMPDEST PUSH2 0x4F PUSH1 0x4 DUP1 DUP1 CALLDATALOAD SWAP1 PUSH1 0x20 ADD SWAP1 SWAP2 SWAP1 POP POP PUSH2 0x51 JUMP JUMPDEST STOP JUMPDEST PUSH1 0x0 PUSH1 0x0 PUSH8 0xDE0B6B3A7640000 CALLVALUE LT DUP1 PUSH2 0xD4 JUMPI POP PUSH1 0x0 PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x0 CALLER PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP1 DUP2 MSTORE PUSH1 0x20 ADD PUSH1 0x0 SHA3 PUSH1 0x0 POP PUSH1 0x0 ADD PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND EQ ISZERO JUMPDEST DUP1 PUSH2 0x147 JUMPI POP PUSH1 0x0 PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x0 DUP6 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP1 DUP2 MSTORE PUSH1 0x20 ADD PUSH1 0x0 SHA3 PUSH1 0x0 POP PUSH1 0x0 ADD PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND EQ JUMPDEST ISZERO PUSH2 0x151 JUMPI PUSH2 0x2 JUMP JUMPDEST PUSH1 0x40 PUSH1 0x40 MLOAD SWAP1 DUP2 ADD PUSH1 0x40 MSTORE DUP1 DUP5 DUP2 MSTORE PUSH1 0x20 ADD CALLER DUP2 MSTORE PUSH1 0x20 ADD POP PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x0 CALLER PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP1 DUP2 MSTORE PUSH1 0x20 ADD PUSH1 0x0 SHA3 PUSH1 0x0 POP PUSH1 0x0 DUP3 ADD MLOAD DUP2 PUSH1 0x0 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP PUSH1 0x20 DUP3 ADD MLOAD DUP2 PUSH1 0x1 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP SWAP1 POP POP DUP3 SWAP2 POP CALLVALUE SWAP1 POP JUMPDEST PUSH1 0x1 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP3 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND EQ ISZERO ISZERO PUSH2 0x2F3 JUMPI PUSH1 0x2 DUP2 DIV SWAP1 POP DUP1 POP DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 DUP3 PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x0 DUP4 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP1 DUP2 MSTORE PUSH1 0x20 ADD PUSH1 0x0 SHA3 PUSH1 0x0 POP PUSH1 0x0 ADD PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND SWAP2 POP DUP2 POP PUSH2 0x205 JUMP JUMPDEST DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 DUP3 PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP JUMPDEST POP POP POP JUMP "
}

contract TreePonzi {
    struct User {
        address inviter;
        address itself;
    }
    mapping (address =>User) tree;
    address top;
    
    function TreePonzi(){
        tree[msg.sender] = User({itself:msg.sender,inviter:msg.sender});
        top = msg.sender;
    }
    
    function enter(address inviter) public {
        if ((msg.value < 1 ether) || (tree[msg.sender].inviter != 0x0) || (tree[inviter].inviter == 0x0)) throw;
        tree[msg.sender] = User({itself:msg.sender,inviter:inviter});
        address current = inviter;
        uint amount = msg.value;
        while(current != top){
            amount = amount/2;
            current.send(amount);
            current = tree[current].inviter;
        }
        current.send(amount);
}
    
}

In [13]:
handoverponzi = {
"object": "0x606060405267016345785d8a00006002600050555b33600060006101000a81548173ffffffffffffffffffffffffffffffffffffffff0219169083021790555033600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055505b61025c806100796000396000f360606040523615610053576000357c0100000000000000000000000000000000000000000000000000000000900480630eb3f5a0146101095780634f8632ba14610121578063a035b1fe1461015a57610053565b6101075b60026000505434101561006957610002565b600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff166000600a6009340204604051809050600060405180830381858888f193505050505033600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055506002600360026000505402046002600050819055505b565b005b61011f600480803590602001909190505061017d565b005b61012e600480505061022d565b604051808273ffffffffffffffffffffffffffffffffffffffff16815260200191505060405180910390f35b6101676004805050610253565b6040518082815260200191505060405180910390f35b600060009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff163373ffffffffffffffffffffffffffffffffffffffff16141561022957600060009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16600082604051809050600060405180830381858888f19350505050505b5b50565b600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1681565b6002600050548156",
"opcodes": "PUSH1 0x60 PUSH1 0x40 MSTORE PUSH8 0x16345785D8A0000 PUSH1 0x2 PUSH1 0x0 POP SSTORE JUMPDEST CALLER PUSH1 0x0 PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP CALLER PUSH1 0x1 PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP JUMPDEST PUSH2 0x25C DUP1 PUSH2 0x79 PUSH1 0x0 CODECOPY PUSH1 0x0 RETURN PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZERO PUSH2 0x53 JUMPI PUSH1 0x0 CALLDATALOAD PUSH29 0x100000000000000000000000000000000000000000000000000000000 SWAP1 DIV DUP1 PUSH4 0xEB3F5A0 EQ PUSH2 0x109 JUMPI DUP1 PUSH4 0x4F8632BA EQ PUSH2 0x121 JUMPI DUP1 PUSH4 0xA035B1FE EQ PUSH2 0x15A JUMPI PUSH2 0x53 JUMP JUMPDEST PUSH2 0x107 JUMPDEST PUSH1 0x2 PUSH1 0x0 POP SLOAD CALLVALUE LT ISZERO PUSH2 0x69 JUMPI PUSH2 0x2 JUMP JUMPDEST PUSH1 0x1 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 PUSH1 0xA PUSH1 0x9 CALLVALUE MUL DIV PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP CALLER PUSH1 0x1 PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP PUSH1 0x2 PUSH1 0x3 PUSH1 0x2 PUSH1 0x0 POP SLOAD MUL DIV PUSH1 0x2 PUSH1 0x0 POP DUP2 SWAP1 SSTORE POP JUMPDEST JUMP JUMPDEST STOP JUMPDEST PUSH2 0x11F PUSH1 0x4 DUP1 DUP1 CALLDATALOAD SWAP1 PUSH1 0x20 ADD SWAP1 SWAP2 SWAP1 POP POP PUSH2 0x17D JUMP JUMPDEST STOP JUMPDEST PUSH2 0x12E PUSH1 0x4 DUP1 POP POP PUSH2 0x22D JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH2 0x167 PUSH1 0x4 DUP1 POP POP PUSH2 0x253 JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH1 0x0 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND CALLER PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND EQ ISZERO PUSH2 0x229 JUMPI PUSH1 0x0 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 DUP3 PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP JUMPDEST JUMPDEST POP JUMP JUMPDEST PUSH1 0x1 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 JUMP JUMPDEST PUSH1 0x2 PUSH1 0x0 POP SLOAD DUP2 JUMP "
}

contract HandoverPonzi{
    address owner;
    address public user;
    uint public price = 100 finney;
    
    function HandoverPonzi(){
        owner = msg.sender;
        user = msg.sender;
    }
    
    function(){
        if (msg.value < price) throw;
        user.send(msg.value * 9/10);
        user = msg.sender ;
        price = price *3/2;
        
    }
    
    function sweepCommission(uint amount){
        if (msg.sender == owner) owner.send(amount);
        
    }
}

In [14]:
waterfallponzi = {
"object": "0x60606040526000600160005055600060026000505560006004600050555b33600360006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055505b610406806100576000396000f36060604052361561005e576000357c010000000000000000000000000000000000000000000000000000000090048063365b98b2146102af5780638da5cb5b146102f85780639af1d35a14610331578063bff1f9e1146103545761005e565b6102ad5b670de0b6b3a764000034101561007757610002565b604060405190810160405280338152602001348152602001506000600050600260005054815481101561000257906000526020600020906002020160005b5060008201518160000160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055506020820151816001016000505590505060016002600082828250540192505081905550600a3404600460005081905550600360009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff166000600460005054604051809050600060405180830381858888f193505050505060006001600050819055505b606460066000600050600160005054815481101561000257906000526020600020906002020160005b506001016000505402043073ffffffffffffffffffffffffffffffffffffffff1631101580156101df5750600260005054600160005054105b156102aa576000600050600160005054815481101561000257906000526020600020906002020160005b5060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff166000606460066000600050600160005054815481101561000257906000526020600020906002020160005b50600101600050540204604051809050600060405180830381858888f19350505050506001600160008282825054019250508190555061017d565b5b565b005b6102c56004808035906020019091905050610380565b604051808373ffffffffffffffffffffffffffffffffffffffff1681526020018281526020019250505060405180910390f35b61030560048050506103e0565b604051808273ffffffffffffffffffffffffffffffffffffffff16815260200191505060405180910390f35b61033e6004805050610377565b6040518082815260200191505060405180910390f35b61036160048050506103d7565b6040518082815260200191505060405180910390f35b60046000505481565b600060005081815481101561000257906000526020600020906002020160005b915090508060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff16908060010160005054905082565b60026000505481565b600360009054906101000a900473ffffffffffffffffffffffffffffffffffffffff168156",
"opcodes": "PUSH1 0x60 PUSH1 0x40 MSTORE PUSH1 0x0 PUSH1 0x1 PUSH1 0x0 POP SSTORE PUSH1 0x0 PUSH1 0x2 PUSH1 0x0 POP SSTORE PUSH1 0x0 PUSH1 0x4 PUSH1 0x0 POP SSTORE JUMPDEST CALLER PUSH1 0x3 PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP JUMPDEST PUSH2 0x406 DUP1 PUSH2 0x57 PUSH1 0x0 CODECOPY PUSH1 0x0 RETURN PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZERO PUSH2 0x5E JUMPI PUSH1 0x0 CALLDATALOAD PUSH29 0x100000000000000000000000000000000000000000000000000000000 SWAP1 DIV DUP1 PUSH4 0x365B98B2 EQ PUSH2 0x2AF JUMPI DUP1 PUSH4 0x8DA5CB5B EQ PUSH2 0x2F8 JUMPI DUP1 PUSH4 0x9AF1D35A EQ PUSH2 0x331 JUMPI DUP1 PUSH4 0xBFF1F9E1 EQ PUSH2 0x354 JUMPI PUSH2 0x5E JUMP JUMPDEST PUSH2 0x2AD JUMPDEST PUSH8 0xDE0B6B3A7640000 CALLVALUE LT ISZERO PUSH2 0x77 JUMPI PUSH2 0x2 JUMP JUMPDEST PUSH1 0x40 PUSH1 0x40 MLOAD SWAP1 DUP2 ADD PUSH1 0x40 MSTORE DUP1 CALLER DUP2 MSTORE PUSH1 0x20 ADD CALLVALUE DUP2 MSTORE PUSH1 0x20 ADD POP PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x2 PUSH1 0x0 POP SLOAD DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x0 DUP3 ADD MLOAD DUP2 PUSH1 0x0 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP PUSH1 0x20 DUP3 ADD MLOAD DUP2 PUSH1 0x1 ADD PUSH1 0x0 POP SSTORE SWAP1 POP POP PUSH1 0x1 PUSH1 0x2 PUSH1 0x0 DUP3 DUP3 DUP3 POP SLOAD ADD SWAP3 POP POP DUP2 SWAP1 SSTORE POP PUSH1 0xA CALLVALUE DIV PUSH1 0x4 PUSH1 0x0 POP DUP2 SWAP1 SSTORE POP PUSH1 0x3 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 PUSH1 0x4 PUSH1 0x0 POP SLOAD PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP PUSH1 0x0 PUSH1 0x1 PUSH1 0x0 POP DUP2 SWAP1 SSTORE POP JUMPDEST PUSH1 0x64 PUSH1 0x6 PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x1 PUSH1 0x0 POP SLOAD DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x1 ADD PUSH1 0x0 POP SLOAD MUL DIV ADDRESS PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND BALANCE LT ISZERO DUP1 ISZERO PUSH2 0x1DF JUMPI POP PUSH1 0x2 PUSH1 0x0 POP SLOAD PUSH1 0x1 PUSH1 0x0 POP SLOAD LT JUMPDEST ISZERO PUSH2 0x2AA JUMPI PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x1 PUSH1 0x0 POP SLOAD DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x0 ADD PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND PUSH1 0x0 PUSH1 0x64 PUSH1 0x6 PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x1 PUSH1 0x0 POP SLOAD DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST POP PUSH1 0x1 ADD PUSH1 0x0 POP SLOAD MUL DIV PUSH1 0x40 MLOAD DUP1 SWAP1 POP PUSH1 0x0 PUSH1 0x40 MLOAD DUP1 DUP4 SUB DUP2 DUP6 DUP9 DUP9 CALL SWAP4 POP POP POP POP POP PUSH1 0x1 PUSH1 0x1 PUSH1 0x0 DUP3 DUP3 DUP3 POP SLOAD ADD SWAP3 POP POP DUP2 SWAP1 SSTORE POP PUSH2 0x17D JUMP JUMPDEST JUMPDEST JUMP JUMPDEST STOP JUMPDEST PUSH2 0x2C5 PUSH1 0x4 DUP1 DUP1 CALLDATALOAD SWAP1 PUSH1 0x20 ADD SWAP1 SWAP2 SWAP1 POP POP PUSH2 0x380 JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP4 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD DUP3 DUP2 MSTORE PUSH1 0x20 ADD SWAP3 POP POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH2 0x305 PUSH1 0x4 DUP1 POP POP PUSH2 0x3E0 JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH2 0x33E PUSH1 0x4 DUP1 POP POP PUSH2 0x377 JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH2 0x361 PUSH1 0x4 DUP1 POP POP PUSH2 0x3D7 JUMP JUMPDEST PUSH1 0x40 MLOAD DUP1 DUP3 DUP2 MSTORE PUSH1 0x20 ADD SWAP2 POP POP PUSH1 0x40 MLOAD DUP1 SWAP2 SUB SWAP1 RETURN JUMPDEST PUSH1 0x4 PUSH1 0x0 POP SLOAD DUP2 JUMP JUMPDEST PUSH1 0x0 PUSH1 0x0 POP DUP2 DUP2 SLOAD DUP2 LT ISZERO PUSH2 0x2 JUMPI SWAP1 PUSH1 0x0 MSTORE PUSH1 0x20 PUSH1 0x0 SHA3 SWAP1 PUSH1 0x2 MUL ADD PUSH1 0x0 JUMPDEST SWAP2 POP SWAP1 POP DUP1 PUSH1 0x0 ADD PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND SWAP1 DUP1 PUSH1 0x1 ADD PUSH1 0x0 POP SLOAD SWAP1 POP DUP3 JUMP JUMPDEST PUSH1 0x2 PUSH1 0x0 POP SLOAD DUP2 JUMP JUMPDEST PUSH1 0x3 PUSH1 0x0 SWAP1 SLOAD SWAP1 PUSH2 0x100 EXP SWAP1 DIV PUSH20 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF AND DUP2 JUMP "
}

contract WaterfallPonzi{
    struct User {
        address addr;
        uint amount;
    }
    
    User[] public users;
    uint pos = 0;
    uint public totalUsers=0;
    address public owner;
    uint public fees = 0;
    
    function WaterfallPonzi(){
        owner = msg.sender;
    }
    
    function(){
        if (msg.value < 1 ether) throw;
        
        users[totalUsers] = User({addr:msg.sender,amount:msg.value});
        
        totalUsers += 1;
        fees = msg.value/10;
        owner.send(fees);
        
        pos = 0;
        while (this.balance >= users[pos].amount * 6/100 && pos < totalUsers){
            users[pos].addr.send(users[pos].amount * 6/100);
            pos += 1;
        }
    }
}

# Création de la distance 

In [15]:
def add_dist(df):
    """
    Version non parallélisée
    A partir d'un DF pandas, ajoute la distance de levenshtein des bytecodes des contrats aux bytecodes de référence
    
    """
    
    pool = multiprocessing.Pool()
    
    arr_ponz = []
    tree_ponz = []
    handov_ponz = []
    water_ponz = []
    
    arponzstr = arrayponzi['object']
    treeponzstr = treeponzi['object']
    handoverponzstr = handoverponzi['object']
    waterponzstr = waterfallponzi['object']
    
    c = 0
    for by in df.bytecode.values:
        
        arr_ponz.append(distance_leven(by,arponzstr)/(len(by)+len(arponzstr)))
        print('done arrponz' + ' ' + str(c))
        tree_ponz.append(distance_leven(by,treeponzstr)/(len(by)+len(treeponzstr)))
        print('done treeponz' + ' ' + str(c))
        handov_ponz.append(distance_leven(by,handoverponzstr)/(len(by)+len(handoverponzstr)))
        print('done handoverponz' + ' ' + str(c))
        water_ponz.append(distance_leven(by,waterponzstr)/(len(by)+len(waterponzstr)))
        print('done waterponz' + ' ' + str(c))
        c += 1 
        
    df['dist arr'] = arr_ponz
    df['dist tree'] = tree_ponz
    df['dist handov'] = handov_ponz
    df['dist water'] = water_ponz
    
    return(df)

In [16]:
#df = add_dist(smc_data)

In [17]:
#df.to_csv('smc_data_dist.csv')

In [18]:
#df[['dist arr','dist tree','dist handov','dist water']]

In [19]:
def add_dist_parr(df):
    
    """
    Version parallélisée
    A partir d'un DF pandas, ajoute la distance de levenshtein des bytecodes des contrats aux bytecodes de référence
    
    """
    # Liste des bytecode de référence
    
    arr = arrayponzi['object']
    tree = treeponzi['object']
    handover = handoverponzi['object']
    water = waterfallponzi['object']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed = 0
    result_list = []
    
    for ponz_type in ponzi_list:
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        
        results = [pool.apply(distance_leven,args=(m1,ponz_type)) for m1 in df['bytecode']]
        
        end = time()
        time_elapsed += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['byte dist arr'] = result_list[0]
    df['byte dist tree'] = result_list[1]
    df['byte dist handover'] = result_list[2]
    df['byte dist water'] = result_list[3]
    
    
    
    
    return(df,time_elapsed)

In [20]:
def add_dist_parr2(df):
    
    """
    Version parallélisée
    A partir d'un DF pandas, ajoute la distance de levenshtein des bytecodes des contrats aux bytecodes de référence
    
    """
    # Liste des bytecode de référence
    
    arr = arrayponzi['object']
    tree = treeponzi['object']
    handover = handoverponzi['object']
    water = waterfallponzi['object']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed1 = 0
    result_list = []
    
    for ponz_type in tqdm(ponzi_list):
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        
        results = [pool.apply(distance_leven,args=(m1,ponz_type)) for m1 in df['bytecode']]
        
        end = time()
        time_elapsed1 += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['byte dist arr'] = result_list[0]
    df['byte dist tree'] = result_list[1]
    df['byte dist handover'] = result_list[2]
    df['byte dist water'] = result_list[3]
    
    # Comparaison des OPCODES 
    
    arr = arrayponzi['opcodes']
    tree = treeponzi['opcodes']
    handover = handoverponzi['opcodes']
    water = waterfallponzi['opcodes']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed2 = 0
    result_list = []
    
    for ponz_type in tqdm(ponzi_list):
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        
        results = [pool.apply(distance_leven,args=(m1,ponz_type)) for m1 in df['opcode']]
        
        end = time()
        time_elapsed2 += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['op dist arr'] = result_list[0]
    df['op dist tree'] = result_list[1]
    df['op dist handover'] = result_list[2]
    df['op dist water'] = result_list[3]    
    
    return(df,time_elapsed1+time_elapsed2)

In [21]:
def add_dist_parr3(df):
    
    """
    Version parallélisée
    A partir d'un DF pandas, ajoute la distance de levenshtein des bytecodes des contrats aux bytecodes de référence
    
    """
    # Liste des bytecode de référence
    
    arr = arrayponzi['object']
    tree = treeponzi['object']
    handover = handoverponzi['object']
    water = waterfallponzi['object']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed1 = 0
    result_list = []
    
    for ponz_type in ponzi_list:
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        results = []
        
        for m1 in tqdm(df['bytecode']):
            results.append(pool.apply(distance_leven,args=(m1,ponz_type)))
        
        
        end = time()
        time_elapsed1 += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['byte dist arr'] = result_list[0]
    df['byte dist tree'] = result_list[1]
    df['byte dist handover'] = result_list[2]
    df['byte dist water'] = result_list[3]
    
    # Comparaison des OPCODES 
    
    arr = arrayponzi['opcodes']
    tree = treeponzi['opcodes']
    handover = handoverponzi['opcodes']
    water = waterfallponzi['opcodes']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed2 = 0
    result_list = []
    
    for ponz_type in ponzi_list:
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        results = []
        
        for m1 in tqdm(df['opcode']):
            results.append(pool.apply(distance_leven,args=(m1,ponz_type)))
        
        
        end = time()
        time_elapsed2 += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['op dist arr'] = result_list[0]
    df['op dist tree'] = result_list[1]
    df['op dist handover'] = result_list[2]
    df['op dist water'] = result_list[3]    
    
    return(df,time_elapsed1+time_elapsed2)

In [22]:
def add_dist_op(df):
    
    arr = arrayponzi['opcodes']
    tree = treeponzi['opcodes']
    handover = handoverponzi['opcodes']
    water = waterfallponzi['opcodes']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed2 = 0
    result_list = []
    
    for ponz_type in ponzi_list:
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        
        results = [pool.apply(distance_leven,args=(m1,ponz_type)) for m1 in df['opcode']]
        
        end = time()
        time_elapsed2 += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['op dist arr'] = result_list[0]
    df['op dist tree'] = result_list[1]
    df['op dist handover'] = result_list[2]
    df['op dist water'] = result_list[3]    
    
    return(df,time_elapsed2)

# Enrichissement des données sur les Smart Contract

## Rajout des OPCODES

In [67]:
#x = df.iloc[3].bytecode

In [68]:
x = "0x60606040525b60406040519081016040528033815260200133815260200150600060005060003373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060008201518160000160006101000a81548173ffffffffffffffffffffffffffffffffffffffff0219169083021790555060208201518160010160006101000a81548173ffffffffffffffffffffffffffffffffffffffff0219169083021790555090505033600160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055505b61032c806100ec6000396000f360606040526000357c010000000000000000000000000000000000000000000000000000000090048063d014c01f1461003957610037565b005b61004f6004808035906020019091905050610051565b005b60006000670de0b6b3a76400003410806100d457506000600060005060003373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff1614155b8061014757506000600060005060008573ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff16145b1561015157610002565b60406040519081016040528084815260200133815260200150600060005060003373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060008201518160000160006101000a81548173ffffffffffffffffffffffffffffffffffffffff0219169083021790555060208201518160010160006101000a81548173ffffffffffffffffffffffffffffffffffffffff021916908302179055509050508291503490505b600160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1673ffffffffffffffffffffffffffffffffffffffff168273ffffffffffffffffffffffffffffffffffffffff161415156102f35760028104905080508173ffffffffffffffffffffffffffffffffffffffff16600082604051809050600060405180830381858888f1935050505050600060005060008373ffffffffffffffffffffffffffffffffffffffff16815260200190815260200160002060005060000160009054906101000a900473ffffffffffffffffffffffffffffffffffffffff1691508150610205565b8173ffffffffffffffffffffffffffffffffffffffff16600082604051809050600060405180830381858888f19350505050505b50505056"

In [2]:
x = smc_data['opcode'].iloc[7]

NameError: name 'smc_data' is not defined

In [95]:
x

'PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZERO PUSH2 0x61 JUMPI PUSH1 0xe0 PUSH1 0x2 EXP PUSH1 0x0 CALLDATALOAD DIV PUSH4 0x8513af8d DUP2 EQ PUSH2 0x63 JUMPI DUP1 PUSH4 0x8e86755c EQ PUSH2 0x21d JUMPI DUP1 PUSH4 0xcb135a68 EQ PUSH2 0x337 JUMPI DUP1 PUSH4 0xccc65845 EQ PUSH2 0x451 JUMPI DUP1 PUSH4 0xd68629c1 EQ PUSH2 0x5b8 JUMPI DUP1 PUSH4 0xd939f910 EQ PUSH2 0x7d2 JUMPI DUP1 PUSH4 0xef311f20 EQ PUSH2 0x938 JUMPI JUMPDEST STOP JUMPDEST PUSH2 0x61 PUSH1 0x4 DUP1 DUP1 CALLDATALOAD SWAP1 PUSH1 0x20 ADD SWAP1 DUP3 ADD DUP1 CALLDATALOAD SWAP1 PUSH1 0x20 ADD SWAP2 SWAP2 SWAP1 DUP1 DUP1 PUSH1 0x1f ADD PUSH1 0x20 DUP1 SWAP2 DIV MUL PUSH1 0x20 ADD PUSH1 0x40 MLOAD SWAP1 DUP2 ADD PUSH1 0x40 MSTORE DUP1 SWAP4 SWAP3 SWAP2 SWAP1 DUP2 DUP2 MSTORE PUSH1 0x20 ADD DUP4 DUP4 DUP1 DUP3 DUP5 CALLDATACOPY POP POP PUSH1 0x40 DUP1 MLOAD PUSH1 0x20 DUP9 CALLDATALOAD DUP1 DUP12 ADD CALLDATALOAD PUSH1 0x1f DUP2 ADD DUP4 SWAP1 DIV DUP4 MUL DUP5 ADD DUP4 ADD SWAP1 SWAP5 MSTORE DUP4 DUP4 MSTORE SWAP8 SWAP10 SWAP

In [94]:
instrs = list(disassemble_all(binascii.unhexlify(x[2:])))

Error: Odd-length string

In [70]:
a = assemble_hex(instrs)

In [71]:
print(disassemble_hex(a))

PUSH1 0x60
PUSH1 0x40
MSTORE
JUMPDEST
PUSH1 0x40
PUSH1 0x40
MLOAD
SWAP1
DUP2
ADD
PUSH1 0x40
MSTORE
DUP1
CALLER
DUP2
MSTORE
PUSH1 0x20
ADD
CALLER
DUP2
MSTORE
PUSH1 0x20
ADD
POP
PUSH1 0x0
PUSH1 0x0
POP
PUSH1 0x0
CALLER
PUSH20 0xffffffffffffffffffffffffffffffffffffffff
AND
DUP2
MSTORE
PUSH1 0x20
ADD
SWAP1
DUP2
MSTORE
PUSH1 0x20
ADD
PUSH1 0x0
SHA3
PUSH1 0x0
POP
PUSH1 0x0
DUP3
ADD
MLOAD
DUP2
PUSH1 0x0
ADD
PUSH1 0x0
PUSH2 0x100
EXP
DUP2
SLOAD
DUP2
PUSH20 0xffffffffffffffffffffffffffffffffffffffff
MUL
NOT
AND
SWAP1
DUP4
MUL
OR
SWAP1
SSTORE
POP
PUSH1 0x20
DUP3
ADD
MLOAD
DUP2
PUSH1 0x1
ADD
PUSH1 0x0
PUSH2 0x100
EXP
DUP2
SLOAD
DUP2
PUSH20 0xffffffffffffffffffffffffffffffffffffffff
MUL
NOT
AND
SWAP1
DUP4
MUL
OR
SWAP1
SSTORE
POP
SWAP1
POP
POP
CALLER
PUSH1 0x1
PUSH1 0x0
PUSH2 0x100
EXP
DUP2
SLOAD
DUP2
PUSH20 0xffffffffffffffffffffffffffffffffffffffff
MUL
NOT
AND
SWAP1
DUP4
MUL
OR
SWAP1
SSTORE
POP
JUMPDEST
PUSH2 0x32c
DUP1
PUSH2 0xec
PUSH1 0x0
CODECOPY
PUSH1 0x0
RETURN
PUSH1 0x60
PUSH1 0x40
MSTORE
P

In [72]:
str(disassemble_hex(a))

'PUSH1 0x60\nPUSH1 0x40\nMSTORE\nJUMPDEST\nPUSH1 0x40\nPUSH1 0x40\nMLOAD\nSWAP1\nDUP2\nADD\nPUSH1 0x40\nMSTORE\nDUP1\nCALLER\nDUP2\nMSTORE\nPUSH1 0x20\nADD\nCALLER\nDUP2\nMSTORE\nPUSH1 0x20\nADD\nPOP\nPUSH1 0x0\nPUSH1 0x0\nPOP\nPUSH1 0x0\nCALLER\nPUSH20 0xffffffffffffffffffffffffffffffffffffffff\nAND\nDUP2\nMSTORE\nPUSH1 0x20\nADD\nSWAP1\nDUP2\nMSTORE\nPUSH1 0x20\nADD\nPUSH1 0x0\nSHA3\nPUSH1 0x0\nPOP\nPUSH1 0x0\nDUP3\nADD\nMLOAD\nDUP2\nPUSH1 0x0\nADD\nPUSH1 0x0\nPUSH2 0x100\nEXP\nDUP2\nSLOAD\nDUP2\nPUSH20 0xffffffffffffffffffffffffffffffffffffffff\nMUL\nNOT\nAND\nSWAP1\nDUP4\nMUL\nOR\nSWAP1\nSSTORE\nPOP\nPUSH1 0x20\nDUP3\nADD\nMLOAD\nDUP2\nPUSH1 0x1\nADD\nPUSH1 0x0\nPUSH2 0x100\nEXP\nDUP2\nSLOAD\nDUP2\nPUSH20 0xffffffffffffffffffffffffffffffffffffffff\nMUL\nNOT\nAND\nSWAP1\nDUP4\nMUL\nOR\nSWAP1\nSSTORE\nPOP\nSWAP1\nPOP\nPOP\nCALLER\nPUSH1 0x1\nPUSH1 0x0\nPUSH2 0x100\nEXP\nDUP2\nSLOAD\nDUP2\nPUSH20 0xffffffffffffffffffffffffffffffffffffffff\nMUL\nNOT\nAND\nSWAP1\nDUP4\nMUL\nOR\nSWAP1\nS

In [73]:
str(disassemble_hex(a)).replace("\n"," ")

'PUSH1 0x60 PUSH1 0x40 MSTORE JUMPDEST PUSH1 0x40 PUSH1 0x40 MLOAD SWAP1 DUP2 ADD PUSH1 0x40 MSTORE DUP1 CALLER DUP2 MSTORE PUSH1 0x20 ADD CALLER DUP2 MSTORE PUSH1 0x20 ADD POP PUSH1 0x0 PUSH1 0x0 POP PUSH1 0x0 CALLER PUSH20 0xffffffffffffffffffffffffffffffffffffffff AND DUP2 MSTORE PUSH1 0x20 ADD SWAP1 DUP2 MSTORE PUSH1 0x20 ADD PUSH1 0x0 SHA3 PUSH1 0x0 POP PUSH1 0x0 DUP3 ADD MLOAD DUP2 PUSH1 0x0 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xffffffffffffffffffffffffffffffffffffffff MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP PUSH1 0x20 DUP3 ADD MLOAD DUP2 PUSH1 0x1 ADD PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xffffffffffffffffffffffffffffffffffffffff MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP SWAP1 POP POP CALLER PUSH1 0x1 PUSH1 0x0 PUSH2 0x100 EXP DUP2 SLOAD DUP2 PUSH20 0xffffffffffffffffffffffffffffffffffffffff MUL NOT AND SWAP1 DUP4 MUL OR SWAP1 SSTORE POP JUMPDEST PUSH2 0x32c DUP1 PUSH2 0xec PUSH1 0x0 CODECOPY PUSH1 0x0 RETURN PUSH1 0x60 PUSH1 0x40 MSTORE 

In [75]:
distance_leven(str(disassemble_hex(a)).replace("\n"," "),arrayponzi['opcodes'])

0.3389265601654982

In [96]:
distance_leven(arrayponzi['opcodes'],x)

MemoryError: 

In [97]:
len(x)

21983

In [98]:
len(arrayponzi['opcodes'])

4644

In [100]:
l = []
for k in test_data['opcode']:
    l.append(len(k))


In [105]:
np.argwhere(np.array(l) > 5000)[:,0]

array([  3,   4,   6,   7,   8,   9,  12,  14,  15,  25,  27,  32,  36,
        41,  43,  44,  45,  54,  55,  61,  66,  67,  68,  69,  70,  71,
        72,  73,  76,  77,  78,  79,  80,  81,  89,  92, 101, 102, 105,
       107, 108, 109, 110, 112, 113, 115, 116, 117, 118, 119, 120, 121,
       122, 123, 126, 127, 133, 134, 136, 138, 139, 141, 142, 143, 144,
       147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
       160, 161, 162, 163, 167, 168, 170, 171, 172, 174, 175, 176, 177,
       178, 182, 184, 185, 186, 187, 188, 189, 191, 193, 194, 195, 197,
       198, 200, 203, 204, 208, 209, 210, 214, 215, 216, 220, 221, 222,
       223, 225, 226, 228, 231, 232, 234, 235, 237, 240, 243, 245, 246,
       247, 248, 249, 251, 253, 256, 257, 258, 259, 262, 263, 265, 266,
       273, 276, 279, 281, 285, 288, 289, 291, 292, 293, 296, 297, 298,
       299, 303, 304, 305, 306, 307, 308, 312, 313, 316, 317, 318, 320,
       321, 322, 325, 326, 327, 328, 329, 330, 331, 336, 337, 33

In [106]:
test_data.iloc[np.argwhere(np.array(l) > 5000)[:,0]]

Unnamed: 0,address,bytecode,function_sighashes,is_erc20,is_erc721,opcode,byte dist arr,byte dist tree,byte dist handover,byte dist water
3,0xd9c324fa5596d188831afa9716ef78d0c2339afc,0x606060405236156100825760e060020a600035046306...,"0x06fdde03,0x18160ddd,0x23b872dd,0x313ce567,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.410401,0.445055,0.534275,0.396749
4,0xa0dcc980a9ed1fbeb6641b098929ee7caad5c5a6,0x606060405236156100825760e060020a600035046306...,"0x06fdde03,0x18160ddd,0x23b872dd,0x313ce567,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.410401,0.445055,0.534275,0.396749
6,0x204d5d045d5320e478924d76584cf3437a495b10,0x606060405236156100f8576000357c01000000000000...,"0x173825d9,0x2f54bf6e,0x4123cb6b,0x52375093,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.728898,0.728410,0.802214,0.716699
7,0xab17397185f193cf14856029e95b34c17f47029e,0x606060405236156100615760e060020a600035046385...,"0x8513af8d,0x8e86755c,0xcb135a68,0xccc65845,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.647432,0.677202,0.751028,0.633130
8,0xce8d2a633dcb20c0d5644967e3cd1a6a8ab3a651,0x606060405236156100f8576000357c01000000000000...,"0x173825d9,0x2f54bf6e,0x4123cb6b,0x52375093,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.728898,0.728410,0.802214,0.716699
9,0x9b95c8e6b2383b7536c220843cbcbd2e360d3f5f,0x606060405236156100f8576000357c01000000000000...,"0x173825d9,0x2f54bf6e,0x4123cb6b,0x52375093,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.728898,0.728410,0.802214,0.716699
12,0xc8e13874f74fd7e624678ae949bf35741d94b7ca,0x606060405236156100f8576000357c01000000000000...,"0x173825d9,0x2f54bf6e,0x4123cb6b,0x52375093,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.728898,0.728410,0.802214,0.716699
14,0xf9407829e838e20a5063f496bf622fb37bda4a90,0x606060405236156100f8576000357c01000000000000...,"0x173825d9,0x2f54bf6e,0x4123cb6b,0x52375093,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.728898,0.728410,0.802214,0.716699
15,0x3dcc102b93e33d1ed617674f59ae9726358b17eb,0x606060405236156100ab576000357c01000000000000...,"0x08551a53,0x1a8315d5,0x1ae2379c,0x2d606843,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.439338,0.422384,0.528295,0.422188
25,0xf7b032045769d18205eca7833e2d19ec99fa2870,0x606060405236156100f8576000357c01000000000000...,"0x173825d9,0x2f54bf6e,0x4123cb6b,0x52375093,0x...",False,False,PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...,0.728898,0.728410,0.802214,0.716699


In [30]:
def add_opcodes(df):
    """
    Ajoute à un DF Pandas une colonne avec les opcodes déduits du bytecode associé à chaque contrat
    
    """
    opcodes = []
    start = time()
    for bytecode in df['bytecode']:
        if bytecode != '0x':
            
            instrs = list(disassemble_all(binascii.unhexlify(bytecode[2:])))
            a = assemble_hex(instrs)
        
            res = str(disassemble_hex(a)).replace("\n"," ")
            opcodes.append(res)
        else:
            opcodes.append(" ")
        
    df['opcode'] = opcodes
    end = time()
    
    print("Time spent on adding opcodes:  " + str(end-start))
    
    return(df)

In [31]:
smc_data[0:5]

Unnamed: 0,address,bytecode,function_sighashes,is_erc20,is_erc721
0,0xb260ca22980dd31a5d2289c52cbb16e6235986bc,0x,,False,False
1,0xc7c7077456a7fd30e49c205049d35ad5bce32501,0x,,False,False
2,0x7d3ae940eb73dc9131758ad2e326c7d863b0916a,0x606060405236156100615760e060020a600035046313...,"0x13af4035,0x35c1d349,0x8da5cb5b,0x9003adfe,0x...",False,False
3,0xd9c324fa5596d188831afa9716ef78d0c2339afc,0x606060405236156100825760e060020a600035046306...,"0x06fdde03,0x18160ddd,0x23b872dd,0x313ce567,0x...",False,False
4,0xa0dcc980a9ed1fbeb6641b098929ee7caad5c5a6,0x606060405236156100825760e060020a600035046306...,"0x06fdde03,0x18160ddd,0x23b872dd,0x313ce567,0x...",False,False


In [32]:
test_data = smc_data.copy()
test_data = add_opcodes(test_data)

Time spent on adding opcodes:  9.72165060043335


In [33]:
df,t_spent = add_dist_parr3(test_data)

100%|██████████| 391/391 [1:55:29<00:00, 12.90s/it]  
100%|██████████| 391/391 [1:52:12<00:00, 12.79s/it]  
100%|██████████| 391/391 [1:14:40<00:00,  8.76s/it]
100%|██████████| 391/391 [2:00:09<00:00, 13.82s/it]  
  2%|▏         | 6/391 [03:23<4:06:44, 38.45s/it]

MemoryError: 

In [None]:
df

In [None]:
t_spent

In [None]:
df.to_csv('smc_completed.csv')

In [108]:
test_data.iloc[32]

address                      0x58da5311898b508d8b88465daef127f88a97df5b
bytecode              0x606060405236156100f8576000357c01000000000000...
function_sighashes    0x173825d9,0x2f54bf6e,0x4123cb6b,0x52375093,0x...
is_erc20                                                          False
is_erc721                                                         False
opcode                PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...
byte dist arr                                                  0.728898
byte dist tree                                                  0.72841
byte dist handover                                             0.802214
byte dist water                                                0.716699
Name: 32, dtype: object

In [112]:
l = []
for k in test_data[test_data['byte dist arr'] < 0.35]['opcode']:
    l.append(len(k))

In [113]:
l

[5504, 5504, 5504, 5504, 5504, 5504, 5709, 5709]

In [115]:
np.argwhere(test_data['byte dist arr'] < 0.35)[:,0]

array([ 76,  77,  78,  79,  80,  81, 266, 305])

In [None]:
d = []
for k in test_data.iloc[np.argwhere(test_data['byte dist arr'] < 0.35)[:,0]]['opcode']:
    d.append(distance_leven(k,arrayponzi['opcodes']))

In [None]:
d

In [55]:
distance_leven(test_data.opcode[7],arrayponzi['opcodes'])

0.9995695221696083

In [59]:
distance_leven(test_data.opcode[10],arrayponzi['opcodes'])

0.9995695221696083

In [65]:
test_data['opcode'][3]

3    PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...
3    PUSH1 0x60 PUSH1 0x40 MSTORE CALLDATASIZE ISZE...
Name: opcode, dtype: object

In [None]:
def add_dist_parr4(df):
    
    """
    Version parallélisée
    A partir d'un DF pandas, ajoute la distance de levenshtein des bytecodes des contrats aux bytecodes de référence
    
    """
    # Liste des bytecode de référence
    
    arr = arrayponzi['object']
    tree = treeponzi['object']
    handover = handoverponzi['object']
    water = waterfallponzi['object']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed1 = 0
    result_list = []
    
    for ponz_type in ponzi_list:
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        results = []
        
        for m1 in tqdm(df['bytecode']):
            results.append(pool.apply(distance_leven,args=(m1,ponz_type)))
        
        
        end = time()
        time_elapsed1 += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['byte dist arr'] = result_list[0]
    df['byte dist tree'] = result_list[1]
    df['byte dist handover'] = result_list[2]
    df['byte dist water'] = result_list[3]
    
    # Comparaison des OPCODES 
    
    arr = arrayponzi['opcodes']
    tree = treeponzi['opcodes']
    handover = handoverponzi['opcodes']
    water = waterfallponzi['opcodes']
    
    
    ponzi_list = [arr,tree,handover,water]
    time_elapsed2 = 0
    result_list = []
    
    for ponz_type in ponzi_list:
        
        pool = multiprocessing.Pool(processes=multiprocessing.cpu_count())
        start = time()
        results = []
        
        for m1 in tqdm(df['opcode']):
            results.append(pool.apply(distance_leven,args=(m1,ponz_type)))
        
        
        end = time()
        time_elapsed2 += (end-start)
        
        result_list.append(results)
        
        pool.close()
    
    df['op dist arr'] = result_list[0]
    df['op dist tree'] = result_list[1]
    df['op dist handover'] = result_list[2]
    df['op dist water'] = result_list[3]    
    
    return(df,time_elapsed1+time_elapsed2)