In [2]:
import ccxt
import pandas as pd
import time
from datetime import datetime, timedelta
from tqdm import tqdm

In [3]:
def fetch_ohlcv_data(symbol, timeframe, since, limit=1000):
    """
    Fetches OHLCV data from Binance for a given symbol and timeframe starting from 'since' date.

    Parameters:
    - symbol (str): Trading pair symbol (e.g., 'BTC/USDT').
    - timeframe (str): Timeframe for OHLCV data (e.g., '1h', '4h', '1d').
    - since (int): Timestamp in milliseconds to start fetching data from.
    - limit (int): Number of data points per request (max 1000 for Binance).

    Returns:
    - ohlcv (list of lists): Retrieved OHLCV data.
    """
    binance = ccxt.binance({
        'rateLimit': 1200,  # Binance allows up to 1200 requests per minute
        'enableRateLimit': True,
    })

    all_ohlcv = []
    while True:
        try:
            # Fetch OHLCV data
            ohlcv = binance.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit)
            
            if not ohlcv:
                print("No more data to fetch.")
                break
            
            all_ohlcv.extend(ohlcv)
            print(f"Fetched {len(ohlcv)} rows of data.")

            # Update 'since' to the last timestamp + 1 millisecond to avoid duplication
            since = ohlcv[-1][0] + 1

            # Sleep to respect rate limits
            time.sleep(binance.rateLimit / 1000)  # Convert milliseconds to seconds

        except ccxt.NetworkError as e:
            print(f"Network error: {e}. Retrying in 10 seconds...")
            time.sleep(10)
            continue
        except ccxt.ExchangeError as e:
            print(f"Exchange error: {e}.")
            break
        except Exception as e:
            print(f"An unexpected error occurred: {e}.")
            break

    return all_ohlcv

In [4]:
def main():
    # ------------------------ Configuration ------------------------ #
    SYMBOL = 'BTC/USDT'       # Trading pair symbol
    TIMEFRAME = '1h'          # Timeframe: '1h', '4h', '1d', etc.
    START_DATE = '2018-01-01' # Start date in 'YYYY-MM-DD' format
    OUTPUT_FILE = 'BTC_USDT_1h_data.csv'  # Output CSV file name
    # ---------------------------------------------------------------- #

    # Convert start date to milliseconds timestamp
    start_timestamp = int(datetime.strptime(START_DATE, '%Y-%m-%d').timestamp() * 1000)

    # Initialize empty list to store all OHLCV data
    all_ohlcv = []

    print(f"Starting data fetch for {SYMBOL} from {START_DATE} with timeframe {TIMEFRAME}.")

    # Fetch data in batches
    all_ohlcv = fetch_ohlcv_data(SYMBOL, TIMEFRAME, since=start_timestamp, limit=1000)

    # Convert to DataFrame
    if all_ohlcv:
        df = pd.DataFrame(all_ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
        # Convert timestamp to datetime
        df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
        # Set timestamp as index
        df.set_index('timestamp', inplace=True)
        # Save to CSV
        df.to_csv(OUTPUT_FILE)
        print(f"Data saved to {OUTPUT_FILE}.")
    else:
        print("No data fetched.")


In [5]:
main()

Starting data fetch for BTC/USDT from 2024-01-01 with timeframe 1h.
Fetched 1000 rows of data.
Fetched 1000 rows of data.
Fetched 1000 rows of data.
Fetched 1000 rows of data.
Fetched 1000 rows of data.
Fetched 1000 rows of data.
Fetched 1000 rows of data.
Fetched 1000 rows of data.
Fetched 344 rows of data.
No more data to fetch.
Data saved to BTC_USDT_1h_data.csv.
