In [None]:
import os

import duckdb
import httpx
import pandas as pd
from dotenv import load_dotenv

load_dotenv()

### 데이터 다운로드

In [None]:
def get_httpx_client():
    transport = httpx.HTTPTransport(retries=3)
    client = httpx.Client(transport=transport)

    return client


def get_collection_list_from_reservoir(network, client: httpx.Client):
    result_data = []

    network_domains = {"ethereum": "api", "polygon": "api-polygon", "bnb": "api-bsc"}

    assert network in network_domains, f"ambigious network: {network}"

    API_KEY = os.getenv("RESERVOIR_API_KEY")
    headers = {"accept": "*/*", "x-api-key": API_KEY}
    params = {
        "limit": 20,
        "sortBy": "1DayVolume",
        "excludeSpam": True,
        "excludeNsfw": True,
    }

    url = f"https://{network_domains[network]}.reservoir.tools/collections/v7"
    resp = None
    while True:
        if resp is not None:
            params["continuation"] = resp.get("continuation")

        resp = client.get(url=url, params=params, headers=headers, timeout=30)
        resp = resp.json()

        collections = [
            collection
            for collection in resp.get("collections")
            if collection["volume"]["30day"] != 0
        ]
        result_data.extend(collections)

        if len(collections) != 20:
            break

    return result_data


def write_parquet(data, filename):
    pd.DataFrame(data).to_parquet(filename, index=False, compression="gzip")

In [None]:
network = "ethereum"

resp = get_collection_list_from_reservoir(network, get_httpx_client())
resp

In [23]:
df = pd.DataFrame(resp).to_parquet("./data/collections_df.parquet")

In [None]:
network = "ethereum"
df = pd.read_json(f"./data/{network}_collections.json")
df.to_parquet(
    f"./data/{network}_collections_pd.parquet", index=False, compression="gzip"
)

In [None]:
df = pd.read_parquet(f"./data/{network}_collections_pd.parquet")
df

In [None]:
df = pd.read_parquet(f"./{network}_collections.parquet")
df

### 데이터 가공

In [None]:
df = duckdb.read_json("./data/ethereum_collections.json")
df

In [None]:
df.to_parquet("ethereum_collections.parquet", compression="gzip")

In [None]:
df = duckdb.read_parquet("ethereum_collections.parquet")

df.to_arrow_table().schema()