# Etherscan API Data Retrieval

In [1]:
import requests, pandas as pd, time, math, csv, datetime

In [2]:
etherscan_token = 'URXV5S3XKUQT9GBEDPB5MGD2DA8P1EFY42'

# Ethereum block range
startblock = 0 # not needed
endblock = 17670000

# Session object
session = requests.Session()

# All addresses
df = pd.read_csv('../data/subsets.csv')
ethereum_addresses = df.loc[df['Ethereum'] == 1, 'Address'].tolist()
print(f'Number of addresses recorded on Ethereum: {len(ethereum_addresses)}')

Number of addresses recorded on Ethereum: 59651


### ERC20 Transfers

In [4]:
# Variables
start = 20001
batch_size = 10000
end = start + batch_size - 1
iteration = math.ceil(len(ethereum_addresses) / batch_size)
print(iteration)
errors = []

for j in range(2,iteration):
    print(f'Iteration {j} started')
    data = [["timeStamp", "hash", "from", "to", "contractAddress", "value", "nonce", "tokenName", "tokenDecimal", "gasPrice", "gasUsed", "chainName", "tokenType"]]
    for i in range(start,end):
        try:
            api_url = f'https://api.etherscan.io/api?module=account&action=tokentx&address={ethereum_addresses[i]}&endblock={endblock}&apikey={etherscan_token}'
            # make the API request and extract the transfer events
            response = session.get(api_url)
            transfers = response.json()['result']
            token_transfer_data = [(tx['timeStamp'], tx['hash'], tx['from'], tx['to'], tx['contractAddress'], tx['value'], tx['nonce'],tx['tokenName'], tx['tokenDecimal'], tx['gasPrice'], tx['gasUsed'], 'Ethereum', '20') for tx in transfers]
            data.extend(token_transfer_data)
            time.sleep(0.02)
            if i%1000==0:
                current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print(f'{current_time} Addresses done: {i} / {len(ethereum_addresses)} ({i/len(ethereum_addresses)*100} %)')

        except Exception as e:
            print(f"Error in {ethereum_addresses[i]}: {str(e)}")
            errors.append(ethereum_addresses[i])
            continue

    # Write csv file
    with open(f'../data/ethereum_erc20_{start}-{end}.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)
    print('CSV successfully written')

    start = end + 1
    end += batch_size
    if end > len(ethereum_addresses) - 1:
        end = len(ethereum_addresses) - 1

6
Iteration 2 started
2023-07-12 17:17:52 Addresses done: 21000 / 59651 (35.20477443798092 %)
2023-07-12 17:21:32 Addresses done: 22000 / 59651 (36.88119226836097 %)
2023-07-12 17:25:33 Addresses done: 23000 / 59651 (38.55761009874101 %)
2023-07-12 17:29:34 Addresses done: 24000 / 59651 (40.234027929121055 %)
2023-07-12 17:33:19 Addresses done: 25000 / 59651 (41.910445759501094 %)
2023-07-12 17:37:03 Addresses done: 26000 / 59651 (43.58686358988114 %)
2023-07-12 17:40:55 Addresses done: 27000 / 59651 (45.26328142026119 %)
2023-07-12 17:44:49 Addresses done: 28000 / 59651 (46.93969925064123 %)
2023-07-12 17:48:56 Addresses done: 29000 / 59651 (48.61611708102128 %)
CSV successfully written
Iteration 3 started
2023-07-12 17:56:29 Addresses done: 31000 / 59651 (51.96895274178136 %)
2023-07-12 18:00:17 Addresses done: 32000 / 59651 (53.64537057216141 %)
2023-07-12 18:04:07 Addresses done: 33000 / 59651 (55.32178840254145 %)
2023-07-12 18:07:52 Addresses done: 34000 / 59651 (56.9982062329214

### ERC721 Transfers

In [6]:
# Variables
start = 0
batch_size = 20000
end = start + batch_size
iteration = math.ceil(len(ethereum_addresses) / batch_size)
errors = []

for j in range(0,iteration):
    print(f'Iteration {j} started')
    data = [["timeStamp", "hash", "from", "to","contractAddress", "tokenID", "nonce", "tokenName", "gasPrice", "gasUsed", "chainName", "tokenType"]]
    for i in range(start,end):
        try:
            api_url = f'https://api.etherscan.io/api?module=account&action=tokennfttx&address={ethereum_addresses[i]}&endblock={endblock}&apikey={etherscan_token}'
            # make the API request and extract the transfer events
            response = session.get(api_url)
            transfers = response.json()['result']
            token_transfer_data = [(tx['timeStamp'], tx['hash'], tx['from'], tx['to'], tx['contractAddress'], tx['tokenID'], tx['nonce'], tx['tokenName'], tx['gasPrice'],tx['gasUsed'], 'Ethereum', '721') for tx in transfers]
            data.extend(token_transfer_data)
            time.sleep(0.02)
            if i%1000==0:
                current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print(f'{current_time} Addresses done: {i} / {len(ethereum_addresses)} ({i/len(ethereum_addresses)*100} %)')

        except Exception as e:
            print(f"Error in {ethereum_addresses[i]}: {str(e)}")
            errors.append(ethereum_addresses[i])
            continue

    # Write csv file
    with open(f'../data/ethereum_erc721_{start}-{end}.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)
    print('CSV successfully written')

    start = end + 1
    end += batch_size
    if end > len(ethereum_addresses) - 1:
        end = len(ethereum_addresses) - 1

Iteration 0 started
2023-07-12 22:12:49 Addresses done: 0 / 59651 (0.0 %)
2023-07-12 22:16:55 Addresses done: 1000 / 59651 (1.676417830380044 %)
2023-07-12 22:21:56 Addresses done: 2000 / 59651 (3.352835660760088 %)
2023-07-12 22:25:59 Addresses done: 3000 / 59651 (5.029253491140132 %)
2023-07-12 22:29:45 Addresses done: 4000 / 59651 (6.705671321520176 %)
2023-07-12 22:33:58 Addresses done: 5000 / 59651 (8.38208915190022 %)
2023-07-12 22:37:58 Addresses done: 6000 / 59651 (10.058506982280264 %)
2023-07-12 22:41:49 Addresses done: 7000 / 59651 (11.734924812660308 %)
2023-07-12 22:45:59 Addresses done: 8000 / 59651 (13.411342643040353 %)
2023-07-12 22:49:52 Addresses done: 9000 / 59651 (15.087760473420394 %)
2023-07-12 22:53:49 Addresses done: 10000 / 59651 (16.76417830380044 %)
2023-07-12 22:58:04 Addresses done: 11000 / 59651 (18.440596134180485 %)
2023-07-12 23:02:34 Addresses done: 12000 / 59651 (20.117013964560527 %)
2023-07-12 23:07:02 Addresses done: 13000 / 59651 (21.793431794940

### ERC1155 Transfers

In [7]:
# Variables
start = 0
batch_size = 20000
end = start + batch_size
iteration = math.ceil(len(ethereum_addresses) / batch_size)
errors = []

for j in range(0,iteration):
    print(f'Iteration {j} started')
    data = [["timeStamp", "hash", "from", "to", "contractAddress", "tokenID", "tokenValue", "tokenName", "nonce", "gasPrice", "gasUsed", "chainName", "tokenType"]]
    for i in range(start,end):
        try:
            api_url = f'https://api.etherscan.io/api?module=account&action=token1155tx&address={ethereum_addresses[i]}&endblock={endblock}&apikey={etherscan_token}'
            # make the API request and extract the transaction data
            response = session.get(api_url)
            transfers = response.json()['result']
            token_transfer_data = [(tx['timeStamp'], tx['hash'], tx['from'], tx['to'], tx['contractAddress'], tx['tokenID'], tx['tokenValue'], tx['tokenName'], tx['nonce'], tx['gasPrice'], tx['gasUsed'], "Ethereum", "1155" ) for tx in transfers]
            data.extend(token_transfer_data)
            time.sleep(0.02)
            if i%1000==0:
                current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print(f'{current_time} Addresses done: {i} / {len(ethereum_addresses)} ({i/len(ethereum_addresses)*100} %)')

        except Exception as e:
            print(f"Error in {ethereum_addresses[i]}: {str(e)}")
            errors.append(ethereum_addresses[i])
            continue

    # Write csv file
    with open(f'../data/ethereum_erc1155_{start}-{end}.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)
    print('CSV successfully written')

    start = end + 1
    end += batch_size
    if end > len(ethereum_addresses) - 1:
        end = len(ethereum_addresses) - 1

Iteration 0 started
2023-07-13 02:26:34 Addresses done: 0 / 59651 (0.0 %)
2023-07-13 02:30:01 Addresses done: 1000 / 59651 (1.676417830380044 %)
2023-07-13 02:33:43 Addresses done: 2000 / 59651 (3.352835660760088 %)
2023-07-13 02:38:10 Addresses done: 3000 / 59651 (5.029253491140132 %)
2023-07-13 02:41:35 Addresses done: 4000 / 59651 (6.705671321520176 %)
2023-07-13 02:45:15 Addresses done: 5000 / 59651 (8.38208915190022 %)
2023-07-13 02:48:47 Addresses done: 6000 / 59651 (10.058506982280264 %)
2023-07-13 02:52:22 Addresses done: 7000 / 59651 (11.734924812660308 %)
2023-07-13 02:56:12 Addresses done: 8000 / 59651 (13.411342643040353 %)
2023-07-13 02:59:38 Addresses done: 9000 / 59651 (15.087760473420394 %)
2023-07-13 03:03:37 Addresses done: 10000 / 59651 (16.76417830380044 %)
2023-07-13 03:07:23 Addresses done: 11000 / 59651 (18.440596134180485 %)
2023-07-13 03:11:07 Addresses done: 12000 / 59651 (20.117013964560527 %)
2023-07-13 03:14:58 Addresses done: 13000 / 59651 (21.793431794940

### Transactions

In [8]:
start = 0
batch_size = 20000
end = start + batch_size
iteration = math.ceil(len(ethereum_addresses) / batch_size)
errors = []

for j in range(0,iteration):
    print(f'Iteration {j} started')
    data = [["hash", "from", "to", "timeStamp", "nonce", "value", "gasPrice", "input", "gasUsed", "functionName", "chainName"]]
    for i in range(start,end):
        try:
            api_url = f'https://api.etherscan.io/api?module=account&action=txlist&address={ethereum_addresses[i]}&endblock={endblock}&apikey={etherscan_token}'
            # make the API request and extract the transaction data
            response = session.get(api_url)
            transactions = response.json()['result']
            tx_data = [(tx['hash'], tx['from'], tx['to'], tx['timeStamp'], tx['nonce'], tx['value'], tx['gasPrice'], tx['input'], tx['gasUsed'], tx['functionName'], "Ethereum") for tx in transactions]
            data.extend(tx_data)
            time.sleep(0.02)
            if i%1000==0:
                current_time = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
                print(f'{current_time} Addresses done: {i} / {len(ethereum_addresses)} ({i/len(ethereum_addresses)*100} %)')
        except Exception as e:
            print(f"Error in {ethereum_addresses[i]}: {str(e)}")
            errors.append(ethereum_addresses[i])
            continue

    # Write csv file
    with open(f'../data/transactions_ethereum_{start}-{end}.csv', 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        writer.writerows(data)
    print('CSV successfully written')

    start = end + 1
    end += batch_size
    if end > len(ethereum_addresses) - 1:
        end = len(ethereum_addresses) - 1

Iteration 0 started
2023-07-13 12:16:01 Addresses done: 0 / 59651 (0.0 %)
2023-07-13 12:20:23 Addresses done: 1000 / 59651 (1.676417830380044 %)
2023-07-13 12:24:53 Addresses done: 2000 / 59651 (3.352835660760088 %)
2023-07-13 12:29:17 Addresses done: 3000 / 59651 (5.029253491140132 %)
2023-07-13 12:33:43 Addresses done: 4000 / 59651 (6.705671321520176 %)
2023-07-13 12:38:19 Addresses done: 5000 / 59651 (8.38208915190022 %)
2023-07-13 12:42:37 Addresses done: 6000 / 59651 (10.058506982280264 %)
2023-07-13 12:47:07 Addresses done: 7000 / 59651 (11.734924812660308 %)
2023-07-13 12:51:45 Addresses done: 8000 / 59651 (13.411342643040353 %)
2023-07-13 12:56:04 Addresses done: 9000 / 59651 (15.087760473420394 %)
2023-07-13 13:00:21 Addresses done: 10000 / 59651 (16.76417830380044 %)
2023-07-13 13:05:36 Addresses done: 11000 / 59651 (18.440596134180485 %)
2023-07-13 13:10:37 Addresses done: 12000 / 59651 (20.117013964560527 %)
Error in 0x36092aa71ee2c420cecf62cafeb7e657dde92ec7: HTTPSConnecti