In [1]:
import ccxt
import pandas as pd
import polars as pl
import os

# GLOBAL VARIABLES
# Initialize the specific exchange 
EXCHANGE_NAME = "binance"
EXCHANGE = getattr(ccxt, EXCHANGE_NAME)()  # i.e. ccxt.binance()
markets = EXCHANGE.load_markets() 
TICKER_DATA_PATH = r"C:\Users\Damja\CODING_LOCAL\trading\data\ticker_specific_data_BINANCE"

def fetch_ohlcv_data_per_symbol(symbol, timeframe='1h', **kwargs):
    try:
        # Fetch OHLCV data for the current pairs
        ohlcv = EXCHANGE.fetch_ohlcv(symbol, timeframe=timeframe, **kwargs)
        # Convert to a DataFrame
        df = pd.DataFrame(ohlcv, columns=['Date', 'open', 'high', 'low', 'close', 'volume'])
        df['Date'] = pd.to_datetime(df['Date'], unit='ms')
        df.set_index('Date', inplace=True)
        df['usd_volume'] = df['close'] * df['volume']
        return df
    except Exception as e:
        print(f"Error fetching data for {symbol}: {e}")


def update_ohlcv_data_per_symbol(symbol, timeframe='1h', timestamp=None, **kwargs):
    '''
    Update data from last 
    arguments:
        symbol: str (as given by exchange, i.e. 'BTC/USD', needs to be converted to 'BTCUSD' for data folder)
        timeframe: str
        timestamp: datetime
        kwargs: other arguments for fetch_ohlcv_data_per_symbol
    '''
    assert timestamp is not None, "Timestamp must be provided"
    data_folder_symbol = symbol.replace('/', '')
    if timeframe == '1h':
        data_folder_symbol = data_folder_symbol + '_60'
    else:
        raise Exception("Timeframe not supported")

    df = load_ohlcv_data_per_symbol(data_folder_symbol)
    df_since_timestamp = fetch_ohlcv_data_per_symbol(symbol=symbol, timeframe='1h', since=int(timestamp.timestamp()*1000))
    df_new = pd.concat([df, df_since_timestamp]).drop_duplicates(keep='first').reset_index(drop=True)    
    df.index = pd.to_datetime(df.index)
    return df_new
        

def save_ohlcv_data_per_symbol(df, symbol):
    df.to_csv(TICKER_DATA_PATH + f"/{symbol}.csv", index=False)


def load_ohlcv_data_per_symbol(symbol):
    pd.read_csv(TICKER_DATA_PATH + f"/{symbol}.csv")


In [4]:
1+1

2

In this notebook, we load the historical data from Binance for some pairs of interest and save them in a csv file.
We start with data from 2022-01-01.

In [2]:
pairs = pd.read_csv("pairs.csv")
NUM_PAIRS_TO_LOAD = 100
pairs = pairs.iloc[:NUM_PAIRS_TO_LOAD, 0].values
pairs = [pair.replace("USD", "USDT") for pair in pairs]

The below code the history completely from a given start date for a given time-frame.
- To-Do: write another script that updates the history

In [7]:
TICKER_DATA_PATH

'C:\\Users\\Damja\\CODING_LOCAL\\trading\\data\\ticker_specific_data_BINANCE'

In [5]:
#pair = 'PEPE/USDT'
INTERVAL = '6h'
START_DATE = pd.to_datetime('2020-01-01')
START_DATE_in_ms = int(START_DATE.timestamp() * 1000)
TIMESTAMP_TODAY = pd.Timestamp.today()
TIMESTAMP_TODAY = pd.Timestamp(year=TIMESTAMP_TODAY.year, month=TIMESTAMP_TODAY.month, day=TIMESTAMP_TODAY.day)

for pair in pairs[0:150]:
    df_list = []
    START_DATE_in_ms = int(START_DATE.timestamp() * 1000)
    while True:
        if pair in markets:
            df = fetch_ohlcv_data_per_symbol(symbol=pair, timeframe=INTERVAL, since=START_DATE_in_ms)
        else:
            print(f'error for pair {pair}; Not in Markets!')
            break
        df_list.append(df)
        if df.index[-1].year == TIMESTAMP_TODAY.year and df.index[-1].month == TIMESTAMP_TODAY.month and df.index[-1].day == TIMESTAMP_TODAY.day:
            print(f'done for pair {pair}')
            out_dataframe = pl.concat([pl.from_pandas(df.reset_index()) for df in df_list])
            out_dataframe.write_parquet(f'{TICKER_DATA_PATH}/{pair.replace("/", "")}.parquet')
            del df
            break

        new_start_date = int(df.index[-1].timestamp() * 1000)
        if new_start_date == START_DATE_in_ms:
            print(f'done for pair {pair}, max date reached: {df.index[-1].timestamp()}') 
            out_dataframe = pl.concat([pl.from_pandas(df.reset_index()) for df in df_list])
            out_dataframe.write_parquet(f'{TICKER_DATA_PATH}/{pair.replace("/", "")}.parquet')
            del df
            break
        else:
            START_DATE_in_ms = new_start_date



done for pair BTC/USDT
done for pair PEPE/USDT
done for pair XRP/USDT
done for pair WIF/USDT
error for pair USDTT/USDT; Not in Markets!
done for pair DOGE/USDT
done for pair SOL/USDT
done for pair EUR/USDT
done for pair TURBO/USDT
done for pair ETH/USDT
done for pair SUI/USDT
done for pair XLM/USDT
done for pair WOO/USDT
done for pair NEAR/USDT
done for pair FTM/USDT, max date reached: 1736726400.0


In [9]:
pl.read_parquet(f'{TICKER_DATA_PATH}/{pair.replace("/", "")}.parquet')

Date,open,high,low,close,volume,usd_volume
datetime[ns],f64,f64,f64,f64,f64,f64
2020-01-01 00:00:00,0.1808,0.1846,0.18,0.1835,802584.77,147274.305295
2020-01-01 06:00:00,0.184,0.1924,0.1831,0.1908,2.2540e6,430067.462472
2020-01-01 12:00:00,0.1908,0.1921,0.1878,0.1908,1.6952e6,323438.218488
2020-01-01 18:00:00,0.1908,0.1984,0.1908,0.1962,4.4766e6,878303.255706
2020-01-02 00:00:00,0.196,0.1961,0.1882,0.1913,2.6819e6,513047.758863
…,…,…,…,…,…,…
2025-03-11 06:00:00,0.1424,0.1446,0.141,0.1442,1.544038e6,222650.2796
2025-03-11 12:00:00,0.1442,0.1473,0.14,0.1441,3.235793e6,466277.7713
2025-03-11 18:00:00,0.1444,0.15,0.1444,0.1462,1.790106e6,261713.4972
2025-03-12 00:00:00,0.1462,0.1473,0.1409,0.1422,1.218593e6,173283.9246
