# Miner transaction transformations - 2

This notebook extends the transformations of data performed in miner_transaction transformation 1 to be able to efficiently define a graph miner transactions. 

In [2]:
# Import relevant libraries
import pandas as pd
import numpy as np

### Aggregate miner data

In [26]:
# Import miner data
miners=pd.read_csv('/Users/dsrincon/Dropbox/Personal/EDU/Posgrado/masters_usa/MIMS/research/miner_network/data/miners_summary.csv')
miners.head()

Unnamed: 0,miner,count,sum,nunique,max
0,0xea674fdde714fd979de3edf0f56aa9716b898ec8,1677231.0,158881295.0,137.0,×\nGethgo1.5.1linux
1,0x52bc44d5378309ee2abf1539bf71de1b7d7be3b5,961958.0,66720193.0,33.0,Ü
2,0x2a65aca4d5fc5b5c859090a6c34d164135398226,933307.0,19097355.0,24.0,×Gethgo1.6.2linux
3,0x5a0b54d5dc17e0aadc383d2db43b0a0d3e029c4c,861883.0,96817057.0,116.0,ÞParity-Ethereum1.36.0li
4,0x829bd824b016326a401d083b33d092293333a830,799757.0,89240980.0,901.0,ä¸å½©ç¥ä»é±¼ÿû®


In [27]:
# Add miner id and change index
miners['miner_id']=miners.index #Use index of sorted dataframe (ranking) as miner_id
miners=miners.rename(columns={"count": "no. blocks", "sum": "no. transactions","nunique": "unique tags",'max':'miner_tag'})
miners.to_csv(r'/Users/dsrincon/Dropbox/Personal/EDU/Posgrado/masters_usa/MIMS/research/miner_network/data/miners.csv')

# Additional transformations for edge calculations
miners.set_index('miner',inplace=True) #Set address as index
miners['miner']=miners.index
miners.head()

Unnamed: 0_level_0,no. blocks,no. transactions,unique tags,miner_tag,miner_id,miner
miner,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0xea674fdde714fd979de3edf0f56aa9716b898ec8,1677231.0,158881295.0,137.0,×\nGethgo1.5.1linux,0,0xea674fdde714fd979de3edf0f56aa9716b898ec8
0x52bc44d5378309ee2abf1539bf71de1b7d7be3b5,961958.0,66720193.0,33.0,Ü,1,0x52bc44d5378309ee2abf1539bf71de1b7d7be3b5
0x2a65aca4d5fc5b5c859090a6c34d164135398226,933307.0,19097355.0,24.0,×Gethgo1.6.2linux,2,0x2a65aca4d5fc5b5c859090a6c34d164135398226
0x5a0b54d5dc17e0aadc383d2db43b0a0d3e029c4c,861883.0,96817057.0,116.0,ÞParity-Ethereum1.36.0li,3,0x5a0b54d5dc17e0aadc383d2db43b0a0d3e029c4c
0x829bd824b016326a401d083b33d092293333a830,799757.0,89240980.0,901.0,ä¸å½©ç¥ä»é±¼ÿû®,4,0x829bd824b016326a401d083b33d092293333a830


## Miner transactions 

In [5]:
# Load miner transaction data: Numpy array that contains transaction cut-offs for every month since start of network
miners_evolution=np.load('/Users/dsrincon/Dropbox/Personal/EDU/Posgrado/masters_usa/MIMS/research/miner_network/data/miners_evolution.npy',allow_pickle=True)
print('Number of cut-offs: {}'.format(len(miners_evolution)))
print('Example of row for each cut-off: {}'.format(miners_evolution[50][10]))

Number of cut-offs: 51
Example of row for each cut-off: ['0x002e08000acbbae2155fab7ac01929564949070d0x12eb9bce34341d1163814843f8dca44dfebe913c'
 4.9914077324600004e+20]


Each cut-off has conncatenation of ethereum addresses 'sender'+'recepient' and the total value transacted up to that block cut-off. 

**Script to generate undirected graph structure**

In [58]:
edges_array=[]
for i in range(len(miners_evolution)):

    miners_ex=miners_evolution[i]
    # Transform concatenation into tuple of miner ids
    # Extract array
    address_ex=miners_ex[:,0] 
    address_ex=address_ex.astype(np.str)

    # Split strings into two separate lists: join_list_0 and join_list_1
    split_ad=np.char.rpartition(address_ex,'0x')
    split_ad_m=np.split(split_ad,[1,2],axis=1) 
    join=np.core.defchararray.add(split_ad_m[1],split_ad_m[2])
    join_0=np.squeeze(split_ad_m[0])
    join_list_0=[miners.miner_id[address] for address in join_0]
    join=np.squeeze(join)
    join_list_1=[miners.miner_id[address] for address in join ]

    # Create array of unique edges and edge_weights (undirected graph) to feed to Graph definition 
    tuples=zip(join_list_0,join_list_1)
    tuples_list=[(a,b) for a,b in tuples]
    w_edges=np.array([[tuple(sorted(t)),w] for t,w in zip(tuples_list,miners_ex[:,1])]) #sort tuples to group by unique connections
    w_edges_df=pd.DataFrame(data=w_edges,columns=['edge','value'])
    w_edges_df=w_edges_df.groupby('edge').sum() # sum connections to define 'edge weight' as sum of transactions
    w_edges_df['edge']=w_edges_df.index
    edges_final=[(e[0],e[1],{'value':v}) for e,v in zip(w_edges_df['edge'],w_edges_df['value'])]
    edges_array.append(edges_final)

# Aggregate all arrays
edges_array=np.array(edges_array)


In [59]:
#Save in disk
np.save('/Users/dsrincon/Dropbox/Personal/EDU/Posgrado/masters_usa/MIMS/research/miner_network/data/miner_trans_edges.npy',edges_array)

In [60]:
# Save Test
m=np.load('/Users/dsrincon/Dropbox/Personal/EDU/Posgrado/masters_usa/MIMS/research/miner_network/data/miner_trans_edges.npy',allow_pickle=True)
l=[(x[0],x[1],x[2]) for x in m[40]]
print(l[900:1000])

**Script to generate directed graph structure**

In [8]:
dir_edges_array=[]
for i in range(len(miners_evolution)):

    miners_ex=miners_evolution[i]
    # Transform concatenation into tuple of miner ids
    # Extract array
    address_ex=miners_ex[:,0] 
    address_ex=address_ex.astype(np.str)

    # Split strings into two separate lists: join_list_0 and join_list_1
    split_ad=np.char.rpartition(address_ex,'0x')
    split_ad_m=np.split(split_ad,[1,2],axis=1) 
    join=np.core.defchararray.add(split_ad_m[1],split_ad_m[2])
    join_0=np.squeeze(split_ad_m[0])
    join_list_0=[miners.miner_id[address] for address in join_0]
    join=np.squeeze(join)
    join_list_1=[miners.miner_id[address] for address in join ]

    # Create array of unique edges and edge_weights (undirected graph) to feed to Graph definition 
    tuples=zip(join_list_0,join_list_1)
    tuples_list=[(a,b) for a,b in tuples]
    edges_final=[(e[0],e[1],{'value':v}) for e,v in zip(tuples_list,miners_ex[:,1])]
    dir_edges_array.append(edges_final)

# Aggregate all arrays
dir_edges_array=np.array(dir_edges_array)


In [10]:
#Save in disk
np.save('/Users/dsrincon/Dropbox/Personal/EDU/Posgrado/masters_usa/MIMS/research/miner_network/data/dir_miner_trans_edges.npy',dir_edges_array)