In [1]:
import ccxt
import pandas as pd
import numpy as np
import os
from dotenv import load_dotenv
from datetime import datetime, timedelta

In [2]:
# Load the environment variables
load_dotenv()

True

# Get Candle Stick Data from Binance

In [6]:
# Binance API credentials
BINANCE_API_KEY = os.getenv("BINANCE_API_KEY")
BINANCE_SECRET_KEY = os.getenv("BINANCE_SECRET_KEY")

# Initialize Binance Exchange
exchange = ccxt.binance({
    'apiKey': BINANCE_API_KEY,
    'secret': BINANCE_SECRET_KEY,
    'rateLimit': 1200,
    'enableRateLimit': True,
    'options': {
        'defaultType': 'future'  # Change to 'spot' for spot markets
    }
})

In [55]:
#Fetch the historical candle stick open, high, low, close and volume data
def fetch_ohlcv(symbol, timeframe, since, retries=3, progress_days=30):
    ohlcv = []  # Store all fetched data
    attempts = 0  # Track retries
    last_progress_message = since  # Track progress reporting

    while True:
        try:
            # Fetch 500 candles
            ohlcv_data = exchange.fetch_ohlcv(symbol, timeframe, since, limit=500)
            
            if ohlcv_data:
                ohlcv.extend(ohlcv_data)  # Add the new data to the list
                since = ohlcv_data[-1][0]  # Update the `since` for the next fetch
                
                # Check if progress message should be printed
                if since - last_progress_message >= progress_days * 24 * 60 * 60 * 1000:  # Progress every N days
                    print(f"Fetched up to {datetime.utcfromtimestamp(since / 1000)}")
                    last_progress_message = since

                # Stop if fewer than 500 candles are returned
                if len(ohlcv_data) < 500:
                    print(f"Fetching completed up to {datetime.utcfromtimestamp(since / 1000)}.")
                    break
            else:
                print(f"No more data to fetch at {datetime.utcfromtimestamp(since / 1000)}.")
                break
        except ccxt.NetworkError as e:
            print(f"Network error: {e}, retrying...")
            attempts += 1
            if attempts >= retries:
                print("Max retries reached. Exiting.")
                break
        except ccxt.ExchangeError as e:
            print(f"Exchange error: {e}, retrying...")
            attempts += 1
            if attempts >= retries:
                print("Max retries reached. Exiting.")
                break
        except Exception as e:
            print(f"Unexpected error: {e}, retrying...")
            attempts += 1
            if attempts >= retries:
                print("Max retries reached. Exiting.")
                break

    return ohlcv

In [61]:
# Symbol and timeframe
symbol = 'BTC/USDT'
timeframe = '1h'  # 1-hour timeframe
since = exchange.parse8601('2015-01-01T00:00:00Z')  # Start from Jan 1, 2015

In [63]:
# Fetch historical data
ohlcv = fetch_ohlcv(symbol, timeframe, since)

Network error: binance GET https://fapi.binance.com/fapi/v1/klines?interval=1h&limit=500&symbol=BTCUSDT&startTime=1420070400000, retrying...


  print(f"Fetched up to {datetime.utcfromtimestamp(since / 1000)}")


Fetched up to 2019-09-29 12:00:00
Fetched up to 2019-11-10 02:00:00
Fetched up to 2019-12-21 16:00:00
Fetched up to 2020-02-01 06:00:00
Fetched up to 2020-03-13 20:00:00
Fetched up to 2020-04-24 10:00:00
Fetched up to 2020-06-05 00:00:00
Fetched up to 2020-07-16 14:00:00
Fetched up to 2020-08-27 04:00:00
Fetched up to 2020-10-07 18:00:00
Fetched up to 2020-11-18 08:00:00
Fetched up to 2020-12-29 22:00:00
Fetched up to 2021-02-09 12:00:00
Fetched up to 2021-03-23 02:00:00
Fetched up to 2021-05-03 16:00:00
Fetched up to 2021-06-14 06:00:00
Fetched up to 2021-07-25 20:00:00
Fetched up to 2021-09-05 10:00:00
Fetched up to 2021-10-17 00:00:00
Fetched up to 2021-11-27 14:00:00
Fetched up to 2022-01-08 04:00:00
Fetched up to 2022-02-18 18:00:00
Fetched up to 2022-04-01 08:00:00
Fetched up to 2022-05-12 22:00:00
Fetched up to 2022-06-23 12:00:00
Fetched up to 2022-08-04 02:00:00
Fetched up to 2022-09-14 16:00:00
Fetched up to 2022-10-26 06:00:00
Fetched up to 2022-12-06 20:00:00
Fetched up to 

  print(f"Fetching completed up to {datetime.utcfromtimestamp(since / 1000)}.")


In [65]:
# Convert to DataFrame
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])

# Convert timestamp to readable format
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Display the first few rows of the data
print(df.head())

# Save the data to a CSV file
df.to_csv('btc_usdt_ohlcv_1h_5years.csv', index=False)

            timestamp      open      high       low     close   volume
0 2019-09-08 17:00:00  10000.00  10000.00  10000.00  10000.00    0.002
1 2019-09-08 18:00:00  10000.00  10000.00  10000.00  10000.00    0.000
2 2019-09-08 19:00:00  10344.77  10357.53  10337.43  10340.12  471.659
3 2019-09-08 20:00:00  10340.12  10368.64  10334.54  10351.42  583.271
4 2019-09-08 21:00:00  10351.42  10391.90  10324.77  10391.90  689.759


In [67]:
df.shape

(46460, 6)