## Flipside

In [1]:
from flipside import Flipside
import os

API_KEY = os.getenv("FLIPSIDE_API_KEY", "")
API_URL = "https://api-v2.flipsidecrypto.xyz"

flipside = Flipside(API_KEY, API_URL)

In [2]:
import polars as pl

addresses = pl.read_csv("../data/processed/addresses.csv")

print(f"Processing {len(addresses)} addresses")

Processing 852475 addresses


In [3]:
# Initialize an empty DataFrame to store all results
all_results = pl.DataFrame()

batch_size = 100000
total_addresses = len(addresses)

for i in range(0, total_addresses, batch_size):
    # Get the current batch of addresses
    end_idx = min(i + batch_size, total_addresses)
    addresses_batch = addresses.slice(i, end_idx - i)["address"].to_list()

    # Format addresses for SQL query - each address needs to be in single quotes
    formatted_addresses = [
        "'" + addr + "'" for addr in addresses_batch if addr is not None
    ]

    # Join the formatted addresses with commas for SQL IN clause
    addresses_sql_string = ", ".join(formatted_addresses)

    print(
        f"Processing batch {i // batch_size + 1}/{(total_addresses + batch_size - 1) // batch_size}: {len(addresses_batch)} addresses"
    )

    sql = f"""
    select * from ethereum.core.dim_labels where address in ({addresses_sql_string})
    """

    # This sends the SQL to Flipside and returns a QueryResultSet object
    query_result_set = flipside.query(sql)

    # Convert results to DataFrame and append to the main DataFrame
    if query_result_set.records:
        batch_results = pl.DataFrame(query_result_set.records)
        all_results = (
            pl.concat([all_results, batch_results])
            if not all_results.is_empty()
            else batch_results
        )
        print(f"Added {len(batch_results)} results from batch")
    else:
        print("No results found in this batch")

print(f"Total results collected: {len(all_results)}")

Processing batch 1/9: 100000 addresses
Added 19057 results from batch
Processing batch 2/9: 100000 addresses
Added 19146 results from batch
Processing batch 3/9: 100000 addresses
Added 18717 results from batch
Processing batch 4/9: 100000 addresses
Added 18890 results from batch
Processing batch 5/9: 100000 addresses
Added 19014 results from batch
Processing batch 6/9: 100000 addresses
Added 19024 results from batch
Processing batch 7/9: 100000 addresses
Added 19103 results from batch
Processing batch 8/9: 100000 addresses
Added 18998 results from batch
Processing batch 9/9: 52475 addresses
Added 9919 results from batch
Total results collected: 161868


In [6]:
all_results.select(
    pl.col("address").alias("address"),
    pl.col("address_name").alias("flipside_address_name"),
    pl.col("label_type").alias("flipside_label_type"),
    pl.col("label").alias("flipside_label"),
).write_parquet("../data/external/flipside_address_labels.parquet", compression="zstd")

In [7]:
# Initialize an empty DataFrame to store all results
contracts_results = pl.DataFrame()

batch_size = 100000
total_addresses = len(addresses)

for i in range(0, total_addresses, batch_size):
    # Get the current batch of addresses
    end_idx = min(i + batch_size, total_addresses)
    addresses_batch = addresses.slice(i, end_idx - i)["address"].to_list()

    # Format addresses for SQL query - each address needs to be in single quotes
    formatted_addresses = [
        "'" + addr + "'" for addr in addresses_batch if addr is not None
    ]

    # Join the formatted addresses with commas for SQL IN clause
    addresses_sql_string = ", ".join(formatted_addresses)

    print(
        f"Processing batch {i // batch_size + 1}/{(total_addresses + batch_size - 1) // batch_size}: {len(addresses_batch)} addresses"
    )

    sql = f"""
    select address, created_block_number, creator_address from ethereum.core.dim_contracts where address in ({addresses_sql_string})
    """

    # This sends the SQL to Flipside and returns a QueryResultSet object
    query_result_set = flipside.query(sql)

    # Convert results to DataFrame and append to the main DataFrame
    if query_result_set.records:
        batch_results = pl.DataFrame(query_result_set.records)
        contracts_results = (
            pl.concat([contracts_results, batch_results])
            if not contracts_results.is_empty()
            else batch_results
        )
        print(f"Added {len(batch_results)} results from batch")
    else:
        print("No results found in this batch")

print(f"Total results collected: {len(contracts_results)}")

Processing batch 1/9: 100000 addresses
Added 14099 results from batch
Processing batch 2/9: 100000 addresses
Added 14065 results from batch
Processing batch 3/9: 100000 addresses
Added 13914 results from batch
Processing batch 4/9: 100000 addresses
Added 14020 results from batch
Processing batch 5/9: 100000 addresses
Added 13897 results from batch
Processing batch 6/9: 100000 addresses
Added 14071 results from batch
Processing batch 7/9: 100000 addresses
Added 14049 results from batch
Processing batch 8/9: 100000 addresses
Added 14065 results from batch
Processing batch 9/9: 52475 addresses
Added 7414 results from batch
Total results collected: 119594


In [8]:
contracts_results.select(
    pl.col("address"),
    pl.col("created_block_number").alias("created_block_number"),
    pl.col("creator_address").alias("creator_address"),
).write_parquet("../data/external/flipside_contracts_data.parquet", compression="zstd")
