In [1]:
import pandas as pd
from dateutil import parser
from datetime import datetime as dt
import datetime
import time 

from binance.client import Client
import os
from dotenv import load_dotenv
load_dotenv("../../constants/.env")

True

In [2]:
api_key = os.environ.get('BINANCE_KEY')
secret_key = os.environ.get('BINANCE_SECRET')

In [3]:
client = Client(api_key = api_key, api_secret = secret_key, tld = "com")

valid intervals - 1m, 3m, 5m, 15m, 30m, 1h, 2h, 4h, 6h, 8h, 12h, 1d, 3d, 1w, 1M

In [4]:
CANDLE_COUNT = 5000
THROTTLE_TIME = 0.1
SLEEP = 0.3
INCREMENTS = {
    'M1': 1 * CANDLE_COUNT,
    '1m': 1 * CANDLE_COUNT,
    '15m': 15 * CANDLE_COUNT,
    '5m' : 5 * CANDLE_COUNT,
    'H1' : 60 * CANDLE_COUNT,
    'H4' : 240 * CANDLE_COUNT,
    'D1' : 1440 * CANDLE_COUNT
}

In [5]:
last_req_time = dt.now()
def throttle():
    global last_req_time
    el_s = (dt.now() - last_req_time).total_seconds()
    if el_s < THROTTLE_TIME:
        time.sleep(THROTTLE_TIME - el_s)
    last_req_time = dt.now()

In [6]:
def get_history(symbol, interval, start, end, api):

    throttle()
    
    bars = api.get_historical_klines(symbol = symbol, interval = interval,
                                        start_str = start, end_str = end, limit = 1000)
    df = pd.DataFrame(bars)
    df["Date"] = pd.to_datetime(df.iloc[:,0], unit = "ms")
    df.columns = ["Open Time", "Open", "High", "Low", "Close", "Volume",
                  "Clos Time", "Quote Asset Volume", "Number of Trades",
                  "Taker Buy Base Asset Volume", "Taker Buy Quote Asset Volume", "Ignore", "Time"]
    df = df[["Time", "Open", "High", "Low", "Close", "Volume"]].copy()
    # df.set_index("Time", inplace = True)
    # print(df["Time"])
    for column in ["Open", "High", "Low", "Close", "Volume"]:
        df[column] = pd.to_numeric(df[column], errors = "coerce")
    
    return df

In [7]:
def save_file(final_df: pd.DataFrame, file_prefix, granularity, pair):
    filename = f"{file_prefix}{pair}_{granularity}.pkl"

    final_df.drop_duplicates(subset=['Time'], inplace=True)
    final_df.sort_values(by='Time', inplace=True)
    final_df.reset_index(inplace=True, drop=True)
    final_df.to_pickle(filename)

    print(f"**** {pair} {granularity}, {final_df.Time.min()} {final_df.Time.max()} --> {final_df.shape}")


def fetch_candles(pair, granularity, date_f: str, date_t: str, api: Client):
    
    attempts = 0

    while attempts < 3:
        
        candles_df = get_history(
            symbol=pair,
            interval=granularity,
            start=date_f,
            end=date_t,
            api=api
        )

        if candles_df is not None:
            break

        attempts += 1

    if candles_df is not None and candles_df.empty == False:
        return candles_df
    else:
        return None

In [8]:
def collect_data(pair, granularity, date_f, date_t, file_prefix, api: Client):
    
    time_step = INCREMENTS[granularity]

    from_date = parser.parse(date_f)
    end_date = parser.parse(date_t)
    
    candle_dfs = []

    to_date = from_date
    while to_date < end_date:

        to_date = from_date + datetime.timedelta(minutes=time_step)
        if to_date > end_date:
            to_date = end_date

        to_date_str = to_date.strftime("%Y-%m-%d %H:%M:%S")
        from_date_str = from_date.strftime("%Y-%m-%d %H:%M:%S")
        candles = fetch_candles(
            pair,
            granularity,
            from_date_str,
            to_date_str,
            api
        )
        if candles is not None and candles.empty == False:
            print(f"{pair} {granularity}, {from_date} {to_date_str}  | {candles.Time.min()} {candles.Time.max()} --> {candles.shape[0]} candles")
            candle_dfs.append(candles)
            if candles.Time.max() > to_date:
                from_date = candles.Time.max()
            else:
                from_date = to_date

        else:
            print(f"{pair} {granularity}, {from_date} {to_date} --> NO CANDLES")
            from_date = to_date

    time.sleep(SLEEP)

    if len(candle_dfs) > 0:
        final_df = pd.concat(candle_dfs)
        save_file(final_df, file_prefix, granularity, pair)
    else:
        print(f"{pair} {granularity}, {from_date} {to_date} --> NO DATA SAVED")



In [11]:
# pair = ['ADAUSDT','BTCUSDT','ETHUSDT','BNBUSDT','XRPUSDT','LTCUSDT']
pair = ['LINKUSDT','DOTUSDT','DOGEUSDT','ADAUSDT','XRPUSDT','LTCUSDT','TRXUSDT','XLMUSDT','AVAXUSDT']
for p in pair:
    print(str(p))
    collect_data(
        p,
        '1m',
        "2022-01-01 00:00:00",
        "2024-11-01 00:00:00",
        "./",
        client
    )

LINKUSDT
LINKUSDT 1m, 2022-01-01 00:00:00 2022-01-04 11:20:00  | 2022-01-01 00:00:00 2022-01-04 11:19:00 --> 5000 candles
LINKUSDT 1m, 2022-01-04 11:20:00 2022-01-07 22:40:00  | 2022-01-04 11:20:00 2022-01-07 22:39:00 --> 5000 candles
LINKUSDT 1m, 2022-01-07 22:40:00 2022-01-11 10:00:00  | 2022-01-07 22:40:00 2022-01-11 09:59:00 --> 5000 candles
LINKUSDT 1m, 2022-01-11 10:00:00 2022-01-14 21:20:00  | 2022-01-11 10:00:00 2022-01-14 21:19:00 --> 5000 candles
LINKUSDT 1m, 2022-01-14 21:20:00 2022-01-18 08:40:00  | 2022-01-14 21:20:00 2022-01-18 08:39:00 --> 5000 candles
LINKUSDT 1m, 2022-01-18 08:40:00 2022-01-21 20:00:00  | 2022-01-18 08:40:00 2022-01-21 19:59:00 --> 5000 candles
LINKUSDT 1m, 2022-01-21 20:00:00 2022-01-25 07:20:00  | 2022-01-21 20:00:00 2022-01-25 07:19:00 --> 5000 candles
LINKUSDT 1m, 2022-01-25 07:20:00 2022-01-28 18:40:00  | 2022-01-25 07:20:00 2022-01-28 18:39:00 --> 5000 candles
LINKUSDT 1m, 2022-01-28 18:40:00 2022-02-01 06:00:00  | 2022-01-28 18:40:00 2022-02-01 

In [24]:
df = get_history(symbol = "ADAUSDT", interval = "1m",
                 start = "2019-01-01T00:00:00", end = "2019-01-02T00:00:00")
df

TypeError: get_history() missing 1 required positional argument: 'api'

In [None]:
tt = last_req_time.strftime("%Y-%m-%dT%H:%M:%S")

In [None]:
tt

In [None]:
last_req_time