In [4]:
import time
import shutil
import requests
import pandas as pd

from enum import Enum
from os import path, mkdir
from decimal import Decimal
from datetime import datetime
from pydantic import ValidationError
from pydantic.dataclasses import dataclass

In [5]:
class Timeframe(Enum):
    HOURLY = "1h"
    DAILY = "1d"
    WEEKLY = "1w"
    MONTHLY = "1M"

@dataclass
class Candle:
    time: datetime
    open: Decimal
    high: Decimal
    low: Decimal
    close: Decimal
    volume: Decimal

@dataclass
class Ticker:
    symbol: str
    price: Decimal
    volume: Decimal

def highest_volume(tickers: list[Ticker]) -> list[Ticker]:
    """Sorts tickers based on volume and removes tickers with no volume."""
    filtered = filter(lambda x: x.price > 0 and x.volume > 0, tickers)
    tickers = sorted(filtered, key=lambda x: x.price * x.volume, reverse=True)
    return tickers

def filter_symbols(market: str, tickers: list[Ticker], blacklist: list[str] = []) -> list[Ticker]:
    """Returns list of tickers denominated in the provided market, excluding symbols that are blacklisted."""
    filtered = filter(lambda x: x.symbol.endswith(market) and not blacklisted(x.symbol, blacklist), tickers)
    return list(filtered)

def blacklisted(symbol: str, blacklist: list[str]) -> bool:
    """Checks if a blacklisted symbol is part of a given market and returns True if it is."""
    if len(blacklist) > 0:
        for item in blacklist:
            if symbol.startswith(item) or symbol.endswith(item):
                return True
    return False

class Binance():
    BASEURL = "https://api.binance.com"
    ENDPOINTS = {
        "ticker": "/api/v3/ticker/24hr",
        "price": "/api/v3/ticker/price",
        "kline": "/api/v3/klines"
    }

    def __init__(self) -> None:
        pass

    def markets(self) -> list[Ticker]:
        url = Binance.BASEURL + Binance.ENDPOINTS["ticker"]
        r = requests.get(url)

        if r.status_code != 200:
            raise requests.exceptions.HTTPError(r.json())

        return list(map(lambda x: Ticker(x["symbol"], x["lastPrice"], x["volume"]), r.json()))

    def kline(self, symbol: str, interval: Timeframe) -> list[Candle]:
        url = Binance.BASEURL + Binance.ENDPOINTS["kline"]
        payload = { "symbol": symbol, "interval": interval.value }
        r = requests.get(url, params=payload)

        if r.status_code != 200:
            raise requests.exceptions.HTTPError(r.json())

        klines = []
        for kline in r.json():
            candle = Candle(*kline[:6])
            klines.append(candle)

        return klines

**Data Selection**

In [6]:
binance = Binance()
markets = binance.markets()

blacklist = ["UPUSDT", "DOWNUSDT", "BEARUSDT", "BULLUSDT"]
stablecoins = ["TUSD", "BUSD", "USDC", "PAX", "USDP", "DAI", "GUSD", "USDD", "USTC", "UST", "USDS"]

usdt_markets = filter_symbols("USDT", markets, blacklist + stablecoins)
usdt_volume = highest_volume(usdt_markets)

**Data Download**

Could be a lot faster with async and without sleep, but since downloading the data is not time critical it's more important to make sure to not hit the rate limits.

In [7]:
tf = Timeframe.HOURLY
download = usdt_volume[:50]

print(f"Downloading {len(download)} USDT markets...")
usdt_data = {}
for ticker in download:
    usdt_data[ticker.symbol] = binance.kline(ticker.symbol, tf)
print("Finished downloading.")

Downloading 50 USDT markets...
Finished downloading.


In [8]:
import plotly.express as px

def relative(symbol, df, benchmark) -> pd.DataFrame:
    rel = df.reindex(benchmark.index, method="bfill")
    rel[symbol] = rel["close"].div(benchmark["close"] * rel["close"][0]) * benchmark["close"][0]
    return rel

benchmark = pd.DataFrame.from_dict(usdt_data["BTCUSDT"]).set_index("time")
relative_performance = pd.DataFrame(index = benchmark.index)
for symbol, data in usdt_data.items():
    try:
        df = pd.DataFrame.from_dict(data).set_index("time")
        df = relative(symbol, df, benchmark)
        relative_performance.insert(len(relative_performance.columns), symbol, df[symbol])
    except TypeError:
        print(f"Error while processing {symbol}")

fig = px.line(relative_performance, x = relative_performance.index, y = relative_performance.columns)
fig.show()

**Data Persistence**

Execute to save the downloaded kline data into individual csv-files for each downloaded market.

In [4]:
data_path = path.join("..", "data")

if path.exists(data_path):
    shutil.rmtree(data_path)

mkdir(data_path)

for symbol, data in usdt_data.items():
    df = pd.DataFrame.from_dict(data).set_index("time")
    df.to_csv(path.join(data_path, f"{symbol}_{tf.name.lower()}.csv"))

# Remove data from memory
df = None
usdt_data = None