In [None]:
import numpy as np
import pandas as pd
import networkx as nx


transaction_dataset_path = './Documents/UBA/Tesis/Netder_experiment/dataset/0to999999_NormalTransaction.csv'
transaction_dataframe = pd.read_csv(transaction_dataset_path)
# block_dataframe = pd.read_csv(block_dataset_path)

In [None]:
transaction_dataframe = transaction_dataframe.drop(columns=['timestamp', 'status'])

In [None]:
transaction_dataframe

In [None]:
# blocks(address) : Conj<blockNumber>
# inDegree(address, blockNumber) : num
# outDegree(address, blockNumber) : num
# transacciones(address) : Conj<(Transacciones, blockNumber)>
# transacciones(address, blockNumber) : Conj<Transacciones>
# gasPrice(tx, address, blockNumber) : num
# numTransacciones(address, blockNumber) : num
# balance(address, blockNumber) : num
# vecinos(address, blockNumber) : Conj<nodo>
# contratosCreados(address) : Conj<address>
# invocacionesDe(address) : Conj<tx> (data != null)
# invocacionesA(address) : Conj<tx> (data != null)
# esInvocacion(tx) : bool
# esTransferenciaDeValor(tx) : bool
# esCreacionDeContrato(tx) : bool
# value(tx) : num
# esContrato(address) : bool
# esEOA(address) : bool
# diferenciaDeTransacciones(address, blockNumber, blockNumber) : num


## Auxiliary functions

## Returns transaction sent FROM address
def out_transactions(df, address):
    assert len(address) == 42, 'address provided is incorrect'
    return df[df['from']==address]

## Returns transaction sent TO address
def in_transactions(df, address):
    assert len(address) == 42, 'address provided is incorrect'
    return df[df['to']==address]

## Returns transaction sent TO address in block number blockNumber
def in_transaction_per_block(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    df_aux = in_transactions(df,address)
    return df_aux[df_aux['blockNumber']==blockNumber]

## Returns transaction sent FROM address in block number blockNumber
def out_transaction_per_block(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    df_aux = out_transactions(df,address)
    return df_aux[df_aux['blockNumber']==blockNumber]

## Returns transaction sent FROM and TO address in block number blockNumber
def transaction_per_block(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    return df[((df['to']==address) | (df['from']==address)) & (df['blockNumber']==blockNumber)]
    

# R1 - Temporal Burst: Si la diferencia de transacciones en un nodo en los tiempos t1 y t2 es mayor que \theta, ese nodo es sospechoso 

# diferenciaDeTransacciones(A, B1, B2, DIF) & (DIF > t_tx) & (B1 < B2) --> hyp_malicioso(A)  


#Returns the number of transactions in blockFrom - number of transactions in blockTo both sent TO address
def in_transactionsDifferenceBetweenBlocks(df, address, blockFrom, blockTo):
    assert blockFrom < blockTo, 'blockFrom should be lower'
    assert len(address) == 42, 'address provided is incorrect'
    return in_degree(in_transaction_per_block(df, address, blockFrom), address, blockFrom) - in_degree(in_transaction_per_block(df, address, blockTo), address, blockTo)

#Returns the number of transactions in blockFrom - number of transactions in blockTo both sent FROM address
def out_transactionsDifferenceBetweenBlocks(df, address, blockFrom, blockTo):
    assert blockFrom < blockTo, 'blockFrom should be lower'
    assert len(address) == 42, 'address provided is incorrect'
    return out_degree(out_transaction_per_block(df, address, blockFrom), address, blockFrom) - out_degree(out_transaction_per_block(df, address, blockTo), address, blockTo)

#Returns the number of transactions in blockFrom - number of transactions in blockTo both sent FROM and TO address
def transactionsDifferenceBetweenBlocks(df, address, blockFrom, blockTo):
    assert blockFrom < blockTo, 'blockFrom should be lower'
    assert len(address) == 42, 'address provided is incorrect'
    return degree(transaction_per_block(df, address, blockFrom), address, blockFrom) - degree(transaction_per_block(df, address, blockTo), address, blockTo)

# R2 - Degree Burst: Si el in/out degree de un nodo i es mayor que \theta, ese nodo es sospechoso 

# R2.1: grado(A, B, G_in_out) & (G_in_out > t_gr_in+t_gr_out) --> hyp_malicioso(A)  
    
#Returns number of transaction TO and FROM address in the block number blockNumber
def degree(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    return df[(df['blockNumber']==blockNumber) & ((df['to']==address) | (df['from']==address))].count()['to']

# R2.2: grado_In(A, B, G_in) & (G_in > t_gr_in) --> hyp_malicioso(A)

#Returns number of transaction TO address in the block number blockNumber
def in_degree(df, address, blockNumber): 
    assert len(address) == 42, 'address provided is incorrect'
    return in_transaction_per_block(df, address, blockNumber).count()['from']
    
# R2.3: grado_out(A, B, G_out) & (G_out > t_gr_out) --> hyp_malicioso(A)  

#Returns number of transaction FROM address in the block number blockNumber
def out_degree(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    return out_transaction_per_block(df, address, blockNumber).count()['from']

# R3 - Balance Burst:  El balance que envía/recibe una cuenta A es mayor que \theta, ese nodo es sospechoso 

# R3.1: balance(A, B, Bal) & (Bal > t_bal) --> hyp_malicioso(A)   (in/out)

#Returns the different values the address SENT or RECEIVED in block number blockNumber
def balance(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    df_aux = transaction_per_block(df, address, blockNumber)
    return df_aux['value']
    
# R3.2: in_balance(A, B, in_Bal) & (in_Bal > t_bal) --> hyp_malicioso(A)

#Returns the different values the address RECEIVED in blockNumber
def in_balance(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    df_aux = in_transaction_per_block(df, address, blockNumber)
    return df_aux['value']

# R3.3: out_balance(A, B, out_Bal) & (out_Bal > t_bal) --> hyp_malicioso(A)

#Returns the different values the address SENT in blockNumber
def out_balance(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    df_aux = out_transaction_per_block(df, address, blockNumber)
    return df_aux['value']

# R4 - Gas Price Burst : Si el gas price que pone una cuenta en una tx es mayor que \theta, ese nodo es sospechoso 
# gasPrice(A, B, GP) & (GP > t_gp) --> hyp_malicioso(A) 

#Returns gasPrice in transactions RECEIVED by address belonged to block blockNumber
def in_gasPrice(df, address, blockNumber):
    assert len(address) == 42, 'address provided is incorrect'
    df_aux = in_transaction_per_block(df, address, blockNumber)
    return df_aux['gasPrice']


# R5 - Attractiveness: Si el nodo tiene un valor de attractiveness alto entonces ese nodo es sospechoso
# attractiveness(A,B1,ATT) & ATT > t_att --> hyp_malicioso(A)  

#We considered a neighbor of an account A in the graph, if the neighbor SENT a transaction to A

#Returns the nodes that are neighbors of the address in the time blockNumber
def neighborhood(df, blockNumber, address):
    assert len(address) == 42, 'address provided is incorrect'
    df_aux = in_transaction_per_block(df, address, blockNumber)
    return df_aux["from"].unique()

#Returns the nodes that are neighbors of the address in the time range fromBlockNumber..toBlockNumber
def neighborhood_from_to(df, fromBlockNumber, toBlocknumber, address):
    assert fromBlockNumber <= toBlocknumber, 'fromBlockNumber should be leq to toBlocknumber'
    assert len(address) == 42, 'address provided is incorrect'
    df_in = in_transactions(df, address)
    df_in_range = df_in[(df_in['blockNumber'] >= fromBlockNumber) & (df_in['blockNumber'] < toBlocknumber)]
    return df_in_range["from"].unique()

#Returns the attractivenes of the address
def attractiveness(df, fromBlockNumber, window, address):
    assert fromBlockNumber >= 0, 'invalid fromBlockNumber'
    assert fromBlockNumber >= window, 'window is lower than fromBlockNumber'
    assert len(address) == 42, 'address provided is incorrect'
    neighbors_in_t = neighborhood(df, fromBlockNumber, address)
    print("neighbors_in_t: ")
    print(neighbors_in_t)
    print('SIZE: ' + str(neighbors_in_t.size))
    if(neighbors_in_t.size == 0):
        return 0
    neighborhood_T_not_t = neighborhood_from_to(df, fromBlockNumber - window, fromBlockNumber, address) 
    print("neighborhood_T_not_t: ")
    print(neighborhood_T_not_t)
    print('SIZE: ' + str(neighborhood_T_not_t.size))
    neighborhood_T_with_t = neighborhood_from_to(df, fromBlockNumber - window, fromBlockNumber+1, address)
    print("neighborhood_T_with_t: ")
    print(neighborhood_T_with_t)
    print('SIZE: ' + str(neighborhood_T_with_t.size))
    not_changed_neighbors = np.intersect1d(neighbors_in_t, neighborhood_T_not_t).size
    print("not_changed_neighbors: ")
    print(not_changed_neighbors)
    print('SIZE: ' + str(neighborhood_T_with_t.size))
    return 1 - (not_changed_neighbors/neighborhood_T_with_t.size)
    
    
def blocks(df, address):
    df_result = df[df['from']==address]['blockNumber']
    return df_result
    
def gas_price(df, address, blockNumber, transactionHash):
    return df[df['from']==address and df['transactionHash']==transactionHash and df['blockNumber']==blockNumber]['gasPrice']




In [None]:
MG = nx.MultiDiGraph()
MG = nx.from_pandas_edgelist(transaction_dataframe, 'from', 'to', edge_attr=['blockNumber','transactionHash', 'value', 'gasPrice'], create_using=nx.MultiGraph())

In [None]:
# # in_transaction_per_block(transaction_dataframe, '0x32be343b94f860124dc4fee278fdcbd38c102d88', 999998)
# np.intersect1d(neighborhood(transaction_dataframe, '0x32be343b94f860124dc4fee278fdcbd38c102d88', 999998), neighborhood(transaction_dataframe, '0x32be343b94f860124dc4fee278fdcbd38c102d88', 999998))
# in_transaction_per_block(transaction_dataframe, '0x32be343b94f860124dc4fee278fdcbd38c102d88', 999998)
# print('0x32be343b94f860124dc4fee278fdcbd38c102d88')
# attractiveness(transaction_dataframe, 999999, 999998, '0x32be343b94f860124dc4fee278fdcbd38c102d88')

# print('0x7de5aba7de728950c92c57d08e20d4077161f12f')
attractiveness(transaction_dataframe, 835037, 835036, '0x7bd4583297fced654fd5ad77fd1e99f648ef70c4')


# neighborhood(transaction_dataframe, 999999, '0x32be343b94f860124dc4fee278fdcbd38c102d88')
# def neighborhood(df, address, blockNumber):
#     df_aux = in_transaction_per_block(df, address, blockNumber)
#     return df_aux["from"].unique()
# len('0xB8c77482e45F1F44dE1745F52C74426C631bDD52')

# df = transaction_dataframe.groupby('to').count().sort_values(by='blockNumber')
# df = transaction_dataframe[transaction_dataframe['to'] == '0x7bd4583297fced654fd5ad77fd1e99f648ef70c4']
# df

In [None]:
# import infura


# ifr = infura.Client(
#     project_id='3c62e1e38e234dd8b61022ad73661486',
#     project_secret='c6f80547b6ff4da9bc649b3945f9546b',
#     network='mainnet',
#     cache_expire_after=5,
# )


# block_number = ifr.eth_get_block_number()

# block = ifr.eth_get_block_by_number(block_number, show_transaction_details = True)
# block


# blocks(address) : Conj<blockNumber>
# inDegree(address, blockNumber) : num
# outDegree(address, blockNumber) : num
# transacciones(address) : Conj<(Transacciones, blockNumber)>
# transacciones(address, blockNumber) : Conj<Transacciones>
# gasPrice(tx, address, blockNumber) : num
# numTransacciones(address, blockNumber) : num
# balance(address, blockNumber) : num
# vecinos(address, blockNumber) : Conj<nodo>
# contratosCreados(address) : Conj<address>
# invocacionesDe(address) : Conj<tx> (data != null)
# invocacionesA(address) : Conj<tx> (data != null)
# esInvocacion(tx) : bool
# esTransferenciaDeValor(tx) : bool
# esCreacionDeContrato(tx) : bool
# value(tx) : num
# esContrato(address) : bool
# esEOA(address) : bool
# diferenciaDeTransacciones(address, blockNumber, blockNumber) : num


# Nodo: {address, balance, nonce, data}
# Eje: {from: address, to: address, value, nonce, data} (info relevante de la tx)
# Del bloque Bk..Bn (k < n)
# Por cada transacción tx en Bi:
# Se crea un eje e dirigido
# Si no existen se crean los nodos nf, nt :=  tx.from, tx.to 
# (habría que ver los casos bordes donde tx.from.balance es 0 ya que lo creamos)
# e.from = tx.from, e.to = tx.to, e.blockNum = Bi.number
# 	Si el tx.to = null es una creación de contrato. 
# 		e.to := tx.receipt.contractAddress
# 		nt := tx.receipt.contractAddress
# 	Si data != vacio y tx.to != null es una invocación eje.data =  tx.data
# 	Si value > 0 es una transferencia de valor 
# e.value = tx.value, Nt.balance += e.value

In [None]:
#Edges from address
MG['0x32be343b94f860124dc4fee278fdcbd38c102d88']

In [None]:
transaction_dataframe[(transaction_dataframe['from'] == '0x32be343b94f860124dc4fee278fdcbd38c102d88')]

In [None]:
nx.draw(MG)