## Arbitrum Data Collection
**[Johnnatan Messias](https://johnnatan-messias.github.io/), March 2025**

This notebook file is useful for retrieving Arbitrum data from the [Nansen Query API](https://query.nansen.ai).

In [None]:
import pandas as pd
from tqdm.notebook import tqdm
import pandas_gbq
from google.cloud import bigquery
%load_ext google.cloud.bigquery


In [None]:
progress_bar_type = 'tqdm_notebook'
project_id = 'proof-time-prediction'

In [None]:
import sys
import os
data_dir = os.path.realpath(os.path.join(
    os.getcwd(), "..", "data", "arbitrum")) + os.sep

print(data_dir)

In [None]:
# There are 1,208,286 logs for the contract "0x67a24ce4321ab3af51c2d0a4801c3e111d88c9d9"
# There are 65,674,747 logs for the contract "0x912ce59144191c1204e64559fe8253a0e49e6548"

arbitrum_airdrop_contract_address = "0x67a24ce4321ab3af51c2d0a4801c3e111d88c9d9"
arb_token_contract_address = "0x912ce59144191c1204e64559fe8253a0e49e6548"

In [None]:
sql = """
SELECT *
FROM `nansen-query.raw_arbitrum.logs`
WHERE address = '{contract_address}'
"""

In [None]:
sql_2 = """
SELECT *
FROM `nansen-query.raw_arbitrum.logs`
WHERE address = '{contract_address}' AND block_timestamp >= '{start_date}' AND block_timestamp < '{end_date}'
"""

In [None]:
query = sql.format(contract_address=arbitrum_airdrop_contract_address)
df = pandas_gbq.read_gbq(query, project_id=project_id,
                         progress_bar_type=progress_bar_type)
filename = 'arbitrum_logs.csv.gz'
df.to_csv(data_dir + filename, index=False, compression='gzip')

In [None]:
start_date, end_date = '2023-03-01', 'today'
dates = list(map(lambda x: x.date(), pd.date_range(
    start=start_date, end=end_date, freq='7D')))

for end_date in dates[1:]:
    print("Fetching logs for the contract {} between {} and {}".format(
        arb_token_contract_address, str(start_date), str(end_date))
    )
    query = sql_2.format(
        contract_address=arb_token_contract_address,
        start_date=str(start_date),
        end_date=str(end_date)
    )
    df = pandas_gbq.read_gbq(query, project_id=project_id,
                             progress_bar_type=progress_bar_type)
    print("There are {} logs for the contract {} between {} and {}".format(
        df.shape[0], arb_token_contract_address, str(start_date), str(end_date))
    )
    filename = "arb_{}_{}.csv.gz".format(str(start_date), str(end_date))
    df.to_csv(data_dir+filename, index=False, compression='gzip')
    start_date = end_date
    print("Saved logs to {}".format(data_dir+filename))