In [1]:
import time
import shutil
import requests
import pandas as pd

from enum import Enum
from os import path, mkdir
from decimal import Decimal
from datetime import datetime
from pydantic import ValidationError
from pydantic.dataclasses import dataclass

In [16]:
class Timeframe(Enum):
    HOURLY = "1h"
    DAILY = "1d"
    WEEKLY = "1w"
    MONTHLY = "1M"

@dataclass
class Candle:
    time: datetime
    open: Decimal
    high: Decimal
    low: Decimal
    close: Decimal
    volume: Decimal

@dataclass
class Ticker:
    symbol: str
    price: Decimal
    volume: Decimal


def filter_symbols(market: str, tickers: list[Ticker], blacklist: list[str] = []) -> list[Ticker]:
    filtered = filter(lambda x: x.symbol.endswith(market) and not blacklisted(x.symbol, blacklist), tickers)
    return list(filtered)

def blacklisted(symbol: str, blacklist: list[str]) -> bool:
    """Checks if a blacklisted symbol is part of a given market and returns True if it is."""
    if len(blacklist) > 0:
        for item in blacklist:
            if symbol.startswith(item) or symbol.endswith(item):
                return True
    return False

class Binance():
    BASEURL = "https://api.binance.com"
    ENDPOINTS = {
        "ticker": "/api/v3/ticker/24hr",
        "price": "/api/v3/ticker/price",
        "kline": "/api/v3/klines"
    }

    def __init__(self) -> None:
        pass

    def markets(self) -> list[Ticker]:
        url = Binance.BASEURL + Binance.ENDPOINTS["ticker"]
        r = requests.get(url)

        if r.status_code != 200:
            raise requests.exceptions.HTTPError(r.json())

        return list(map(lambda x: Ticker(x["symbol"], x["lastPrice"], x["volume"]), r.json()))

    def kline(self, symbol: str, interval: Timeframe) -> list[Candle]:
        url = Binance.BASEURL + Binance.ENDPOINTS["kline"]
        payload = { "symbol": symbol, "interval": interval.value }
        r = requests.get(url, params=payload)

        if r.status_code != 200:
            raise requests.exceptions.HTTPError(r.json())

        klines = []
        for kline in r.json():
            candle = Candle(*kline[:6])
            klines.append(candle)

        return klines

**Data Selection**

In [8]:
blacklist = ["UPUSDT", "DOWNUSDT", "BEARUSDT", "BULLUSDT"]
stablecoins = ["TUSD", "BUSD", "USDC", "PAX", "USDP", "DAI", "GUSD", "USDD", "USTC", "UST", "USDS"]

binance = Binance()
markets = binance.markets()

In [None]:
usdt_markets = filter_symbols("USDT", markets, blacklist + stablecoins)
btc_markets = filter_symbols("BTC", markets)

**Data Download**

Could be a lot faster with async and without sleep, but since downloading the data is not time critical it's more important to make sure to not hit the rate limits.

In [3]:
print(f"Downloading {len(usdt_markets)} USDT markets...")
market_data = {}
tf = Timeframe.DAILY
for i in range(len(usdt_markets)):
    if i % 20 == 0:
        time.sleep(5)
    ticker = usdt_markets[i]
    market_data[ticker.symbol] = binance.kline(ticker.symbol, Timeframe.DAILY)
print("Finished downloading.")

Downloading 387 markets...
Finished downloading.


**Data Persistence**

Execute to save the downloaded kline data into individual csv-files for each downloaded market.

In [4]:
data_path = path.join("..", "data")

if path.exists(data_path):
    shutil.rmtree(data_path)

mkdir(data_path)

for symbol, data in market_data.items():
    df = pd.DataFrame.from_dict(data).set_index("time")
    df.to_csv(path.join(data_path, f"{symbol}_{tf.name.lower()}.csv"))

# Remove data from memory
df = None
market_data = None