In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
import requests

In [2]:
from IPython.display import display, HTML
display(HTML("<style>.container { max-width:100% !important; }</style>"))

In [3]:
base_url = 'https://api.binance.com' 
def get_base_currencies():
    endpoint = '/api/v3/exchangeInfo'
    url = base_url + endpoint
    response = requests.get(url)

    if response.status_code == 200:
        exchange_info = response.json()
        symbols = exchange_info['symbols']
        tradeable = list()
        base_currencies = set()
        for symbol in symbols:
            if symbol["status"]=='TRADING' and symbol["isSpotTradingAllowed"]:
                base_currencies.add(symbol['baseAsset'])
                tradeable.append(symbol)

        return sorted(list(base_currencies)),symbols,tradeable
    else:
        print(f'Error: {response.status_code}')
        return None,symbols,tradeable

base_currencies,symbols,tradeable = get_base_currencies()
#print(base_currencies)
tradeable_tickers = [tt["symbol"] for tt in tradeable]
len(tradeable_tickers)

1403

In [4]:
print(len(tradeable_tickers),tradeable_tickers[:10])

1403 ['ETHBTC', 'LTCBTC', 'BNBBTC', 'NEOBTC', 'QTUMETH', 'EOSETH', 'SNTETH', 'BNTETH', 'GASBTC', 'BNBETH']


In [5]:

basetickers = ["USDT","ETH","BTC","BNB","LTC"]
def get_high_volume_trading_pairs():
    endpoint = '/api/v3/ticker/24hr'
    url = base_url + endpoint
    response = requests.get(url)

    if response.status_code == 200:
        trading_pairs_data = response.json()
        trading_pairs = sorted(trading_pairs_data, key=lambda x: float(x['quoteVolume']), reverse=True)
        print("all pairs",len(trading_pairs))
        trading_pairs = [sp for sp in trading_pairs if sp["symbol"] in tradeable_tickers]
        print("spot tradeable pairs",len(trading_pairs))
        trading_pairs = [sp for sp in trading_pairs if any(sp["symbol"].endswith(bt) for bt in basetickers)]
        print("with base pairs",len(trading_pairs))
        high_volume_pairs = []
        for pair in trading_pairs[:10]:
            high_volume_pairs.append({
                'symbol': pair['symbol'],
                'quote_volume': float(pair['quoteVolume'])
            })

        return high_volume_pairs,trading_pairs
    else:
        print(f'Error: {response.status_code}')
        return None,trading_pairs

high_volume_trading_pairs,trading_pairs = get_high_volume_trading_pairs() 

all pairs 2203
spot tradeable pairs 1403
with base pairs 819


# saving the list of trading pairs

In [30]:
import pickle
with open('trading_pairs.pkl', 'wb') as f:  # open a text file
    pickle.dump(trading_pairs, f)
# with open('trading_pairs.pkl', 'rb') as f:
#     trading_pairs = pickle.load(f)

In [88]:
import requests
import csv
from datetime import datetime
from pathlib import Path
from os.path import exists
import time
base_url = 'https://api.binance.com' 
def get_klines(symbol, interval='1h', start_time=None, end_time=None, limit=500):
    endpoint = '/api/v3/klines'
    url = base_url + endpoint
    params = {'symbol': symbol,'interval': interval,'limit': limit}
    if start_time:
        params['startTime'] = start_time
    if end_time:
        params['endTime'] = end_time
    response = requests.get(url, params=params)
    if response.status_code == 200:
        klines_data = response.json()
        klines = []
        for kline in klines_data:
            klines.append({
                'open_time': kline[0],
                'open': float(kline[1]),
                'high': float(kline[2]),
                'low': float(kline[3]),
                'close': float(kline[4]),
                'volume': float(kline[5]),
                'close_time': kline[6],
                'quote_asset_volume': float(kline[7]),
                'number_of_trades': kline[8],
                'taker_buy_base_asset_volume': float(kline[9]),
                'taker_buy_quote_asset_volume': float(kline[10]),
                'ignore': kline[11]
            })

        return klines
    else:
        print(f'Error: {response.status_code}',end="")
        return None

def save_klines_to_csv(klines, filename,data_folder):
    Path(data_folder).mkdir(parents=True, exist_ok=True)
    fieldnames = [
        'open_time', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
    ]

    with open(data_folder+filename, mode='w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for kline in klines:
            writer.writerow(kline)
def doesFileExist(filename,data_folder):
    return exists(data_folder+filename)
def download_klines_yearly(symbol, interval='1h', start_year=2018,data_folder=None):
    current_year = datetime.now().year
    ms_per_year = 31536000 * 1000
    for year in range(start_year, current_year + 1):
        filename = f'{symbol}_{year}_{interval}.csv'
        if doesFileExist(filename,data_folder) and current_year!=year:
            continue
        start_time = int(datetime(year, 1, 1).timestamp() * 1000)
        end_time = int(start_time + ms_per_year)
        klines = []
        last_fetched_time = start_time
        counts=0
        while last_fetched_time < end_time:
            fetched_klines = get_klines(symbol, interval, start_time=last_fetched_time, limit=1000)
            if counts%50==0 and counts//50>0:
                print(counts//50,end="")
            if counts%15==0 and counts//15>0:
                time.sleep(2)
            counts+=1
            if (not fetched_klines) or len(fetched_klines)==0:
                break
            #print(fetched_klines) 
            new_fetched_time = fetched_klines[-1]['open_time']
            if last_fetched_time==new_fetched_time:
                break
            last_fetched_time = new_fetched_time
            klines.extend(fetched_klines)
        if len(klines)>0:
            save_klines_to_csv(klines, filename ,data_folder)
            time.sleep(2)
            print("",end=" ")

## sample run

In [60]:
symbol = 'BTCUSDT'
data_folder=f"kline_data/{symbol}/"
interval = '1h'
download_klines_yearly(symbol, interval,data_folder=data_folder)

## large run, run for all intervals >= 2h

In [90]:
intervals=['1m','3m','5m','15m','30m','1h','2h','4h','6h','8h','12h','1d','3d','1w','1M']
#intervals=['12h','1d','3d','1w','1M']
#intervals=['5m','15m','30m','1h','2h','4h','6h','8h','12h','1d','3d','1w','1M']
#intervals=['5m']
#intervals=['1m','3m']

In [87]:
%%time
for interval in intervals[::-1]:
    print(interval)
    for i,sp in list(enumerate(trading_pairs[50:250])):
        symbol = sp["symbol"]
        data_folder=f"kline_data/{symbol}/"
        print(f" {i}/{len(trading_pairs)}{symbol}",end=" ")
        download_klines_yearly(symbol, interval,data_folder=data_folder)
    print("")

3m
 0/819BTCUSDT 123 123 123 123 123 1  1/819EDUUSDT        2/819ETHUSDT 123 123 123 123 123 1  3/819USDCUSDT  123 123 123 12   4/819BUSDUSDT   123 123 123 1  5/819XRPUSDT 12 123 123 123 123 1  6/819SOLUSDT   1 123 123 1  7/819BNBUSDT 123 123 123 123 123 1  8/819ARBUSDT        9/819ALPHAUSDT    123 123 1  10/819IDUSDT        11/819CFXUSDT    12 123 1  12/819TUSDUSDT 12 123 123 123 12   13/819RNDRUSDT     123 1  14/819APTUSDT      1  15/819INJUSDT    123 123 1  16/819DOGEUSDT  1 123 123 123 1  17/819MATICUSDT  12 123 123 123 1  18/819LTCUSDT 123 123 123 123 123 1  19/819RADUSDT     123 1  20/819ICPUSDT    12 123 1  21/819EURUSDT   123 123 123 1  22/819FILUSDT    123 123 1  23/819LINAUSDT    12 123 1  24/819LINKUSDT  123 123 123 123 1  25/819FTMUSDT  1 123 123 123 1  26/819DYDXUSDT    1 123 1  27/819ADAUSDT 12 123 123 123 123 1  28/819WOOUSDT     123 1  29/819TRXUSDT 1 123 123 123 123 1  30/819ATOMUSDT  12 123 123 123 1  31/819HOOKUSDT      1  32/819OGUSDT    123 123 1  33/819RDNTUSDT   