Collecting data from different sources for a set of addresses (we can provide you with sample addresses but feel free to find interesting groups of wallets
which are publicly available); 

the data to be collected by the list of wallets will
include transactions, tokentransfers, balance/token balance

In [1]:
## import libraries
import numpy as np
import os
import pandas as pd
import requests
import sys
import time

In [2]:
## import configuration
sys.path.append('..')
from config.addresses import addresses
from config.tokens import tokens

In [3]:
## etherscan API
etherscan_api_url = 'https://api.etherscan.io/api'
with open('../config/etherscan_api.tkn') as tkn_file:
    etherscan_token = tkn_file.read()
    
balance_request = '?module=account' \
    '&action=balance' \
    '&address={address}' \
    '&tag=latest' \
    '&apikey={etherscan_token}'

normal_transaction_request = '?module=account' \
    '&action=txlist' \
    '&address={address}' \
    '&startblock=0' \
    '&endblock=99999999' \
    '&page=1' \
    '&offset=10000' \
    '&sort=desc' \
    '&apikey={etherscan_token}'

internal_transaction_request = '?module=account' \
    '&action=txlistinternal' \
    '&address={address}' \
    '&startblock=0' \
    '&endblock=99999999' \
    '&page=1' \
    '&offset=10000' \
    '&sort=desc' \
    '&apikey={etherscan_token}'

token_transaction_request = '?module=account' \
    '&action=tokentx' \
    '&contractaddress={contract_address}' \
    '&address={address}' \
    '&page=1' \
    '&offset=10000' \
    '&sort=asc' \
    '&apikey={etherscan_token}'

link_transaction_request = '?module=account' \
    '&action=tokentx' \
    '&contractaddress=0x514910771af9ca656af840dff83e8264ecf986ca' \
    '&address={address}' \
    '&page=1' \
    '&offset=10000' \
    '&sort=asc' \
    '&apikey={etherscan_token}'

NFT_transaction_request = '?module=account' \
    '&action=tokennfttx' \
    '&address={address}' \
    '&page=1' \
    '&offset=10000' \
    '&sort=asc' \
    '&apikey={etherscan_token}'

mined_blocks_request = '?module=account' \
    '&action=getminedblocks' \
    '&address={address}' \
    '&blocktype=blocks' \
    '&page=1' \
    '&offset=10000' \
    '&apikey={etherscan_token}'

token_balance_request = '?module=account' \
    '&action=tokenbalance' \
    '&address={address}' \
    '&contractaddress={contract_address}' \
    '&tag=latest' \
    '&apikey={etherscan_token}'

transfers_in_transaction_request = '?module=account' \
   '&action=txlist' \
   '&txhash={transaction_hash}' \
   '&apikey={etherscan_token}'
#    '&action=txlistinternal' \

In [4]:
## accounts balances dataframe
balances = pd.DataFrame(columns=['Name', 'Address', 'CurrentBalanceEth'])

# current balance
for name, address in addresses.items():
    current_balance = requests.get(
        etherscan_api_url + balance_request.format(address=address, etherscan_token=etherscan_token)) \
        .json()['result']
    balances = balances.append({
        'Name': name,
        'Address': address,
        'CurrentBalanceWei': int(current_balance),
        'lastedUpdated': int(time.time()),
        }, ignore_index=True)
    time.sleep(0.5)

In [5]:
balances.to_csv('../data/balances.csv', index=False, header=True)

In [6]:
normal_transactions = requests.get(
    etherscan_api_url \
    + normal_transaction_request.format(address=address, etherscan_token=etherscan_token)) \
    .json()['result']

In [7]:
## get normal transactions (API limits to 10,000 results)
normal_transactions_df = pd.DataFrame()
start_time = time.time()
for name, address in addresses.items():
    normal_transactions = requests.get(
        etherscan_api_url \
        + normal_transaction_request.format(address=address, etherscan_token=etherscan_token)) \
        .json()['result']
    for transaction in normal_transactions:
        normal_transactions_df = normal_transactions_df.append({
            'Name': name,
            'Address': address,
            'From': transaction['from'],
            'To': transaction['to'],
            'TransactionHash': transaction['hash'],
            'Timestamp': transaction['timeStamp'],
            'Value': transaction['value'],
            'ContractAddress': transaction['contractAddress'],
            'TransactionFeeEth': int(transaction['gasPrice'])*int(transaction['gasUsed'])
            }, ignore_index=True)
    time.sleep(1)
end_time = time.time()

In [8]:
print("Time to obtain last 10,000 normal transactions for each account: {:0.2f} minutes".format( (end_time-start_time)/60 ))

Time to obtain last 10,000 normal transactions for each account: 3.95 minutes


In [9]:
normal_transactions_df.to_csv('../data/normal_transactions.csv', index=False, header=True)

In [10]:
## get internal transactions (API limits to 10,000 results)
internal_transactions_df = pd.DataFrame()
start_time = time.time()
for name, address in addresses.items():
    internal_transactions = requests.get(
        etherscan_api_url \
        + internal_transaction_request.format(address=address, etherscan_token=etherscan_token)) \
        .json()['result']
    for transaction in internal_transactions:
        internal_transactions_df = internal_transactions_df.append({
            'Name': name,
            'Address': address,
            'From': transaction['from'],
            'To': transaction['to'],
            'TransactionHash': transaction['hash'],
            'Timestamp': transaction['timeStamp'],
            'Value': transaction['value'],
            'ContractAddress': transaction['contractAddress']
            }, ignore_index=True)
    time.sleep(0.5)
end_time = time.time()

In [11]:
print("Time to obtain last 10,000 internal transactions for each account: {:0.2f} minutes".format( (end_time-start_time)/60 ))

Time to obtain last 10,000 internal transactions for each account: 0.27 minutes


In [12]:
internal_transactions_df.to_csv('../data/internal_transactions.csv', index=False, header=True)

In [13]:
## get token transactions (API limits to 10,000 results)
token_transactions_df = pd.DataFrame()
start_time = time.time()
for name, address in addresses.items():
    for token_symbol, token_contract_address in tokens.items():
        
        token_transactions = requests.get(
            etherscan_api_url \
            + token_transaction_request.format(
                address=address, contract_address=token_contract_address, etherscan_token=etherscan_token)) \
            .json()['result']
        for transaction in token_transactions:
            token_transactions_df = token_transactions_df.append({
                'Name': name,
                'Address': address,
                'From': transaction['from'],
                'To': transaction['to'],
                'TransactionHash': transaction['hash'],
                'TransactionIndex': int(transaction['transactionIndex']),
                'Timestamp': transaction['timeStamp'],
                'Value': transaction['value'],
                'TransactionFeeEth': int(transaction['gasPrice'])*int(transaction['gasUsed']),
                'ContractAddress': transaction['contractAddress'],
                'TokenName': transaction['tokenName'],
                'TokenSymbol': transaction['tokenSymbol'],
                }, ignore_index=True)
    time.sleep(1)
end_time = time.time()

In [14]:
print("Time to obtain last 10,000 token transactions for each account: {:0.2f} minutes".format( (end_time-start_time)/60 ))

Time to obtain last 10,000 token transactions for each account: 0.58 minutes


In [15]:
token_transactions_df.to_csv('../data/token_transactions.csv', index=False, header=True)

In [16]:
## get NFT transactions (API limits to 10,000 results)
NFT_transactions_df = pd.DataFrame()
start_time = time.time()
for name, address in addresses.items():
    NFT_transactions = requests.get(
        etherscan_api_url \
        + NFT_transaction_request.format(address=address, etherscan_token=etherscan_token)) \
        .json()['result']
    for transaction in NFT_transactions:
        NFT_transactions_df = NFT_transactions_df.append({
            'Name': name,
            'Address': address,
            'From': transaction['from'],
            'To': transaction['to'],
            'TransactionHash': transaction['hash'],
            'Timestamp': transaction['timeStamp'],
            'TokenID': transaction['tokenID'],
            'ContractAddress': transaction['contractAddress'],
            'TokenName': transaction['tokenName'],
            'TokenSymbol': transaction['tokenSymbol'],
            }, ignore_index=True)
    time.sleep(1)
end_time = time.time()

In [17]:
print("Time to obtain last 10,000 NFT transactions for each account: {:0.2f} minutes".format( (end_time-start_time)/60 ))

Time to obtain last 10,000 NFT transactions for each account: 0.58 minutes


In [18]:
NFT_transactions_df.to_csv('../data/NFT_transactions.csv', index=False, header=True)

In [19]:
## get mined blocks (API limits to 10,000 results)
mined_blocks_df = pd.DataFrame()
start_time = time.time()
for name, address in addresses.items():
    mined_blocks = requests.get(
        etherscan_api_url \
        + mined_blocks_request.format(address=address, etherscan_token=etherscan_token)) \
        .json()['result']
    for mined_block in mined_blocks:
        mined_blocks_df = mined_blocks_df.append({
            'Name': name,
            'Address': address,
            'BlockNumber': mined_block['blockNumber'],
            'TimeSatamp': mined_block['timeStamp'],
            'BlockReward': mined_block['blockReward'],
            }, ignore_index=True)
    time.sleep(1)
end_time = time.time()

In [20]:
## get token balances (API limits to 10,000 results)
token_balances_df = pd.DataFrame()
start_time = time.time()
for name, address in addresses.items():
    for token_symbol, token_contract_address in tokens.items():
        
        token_balance = requests.get(
            etherscan_api_url \
            + token_balance_request.format(
                address=address, contract_address=token_contract_address, etherscan_token=etherscan_token)) \
            .json()['result']
            
        token_balances_df = token_balances_df.append({
            'Name': name,
            'Address': address,
            'TokenSymbol': token_symbol,
            'TokenBalance': int(token_balance),
                }, ignore_index=True)
    time.sleep(1)
    
token_balances_df['TokenBalance'] = token_balances_df['TokenBalance'].apply(int)
end_time = time.time()

In [21]:
print("Time to obtain token balances for each account: {:0.2f} minutes".format( (end_time-start_time)/60 ))

Time to obtain token balances for each account: 0.37 minutes


In [22]:
token_balances_df.to_csv('../data/token_balances.csv', index=False, header=True)