In [1]:
!pip install ccxt pandas



In [8]:
import pandas as pd
import time
import ccxt
from datetime import datetime, timedelta

In [10]:
# Inicializa el exchange
exchange = ccxt.binance({
    'enableRateLimit': True
})

# Parámetros de la colección de datos
symbol = 'BTC/USDT'
timeframe = '5m'  # Opciones: 1m, 5m, 15m, 1h, etc.

# Control de fechas para rango de descarga
fecha_inicio = '2024-01-01 00:00:00'  # Cambia la fecha de inicio aquí
fecha_fin = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')  # Hasta el momento actual


In [9]:
# Fetch OHLCV (Open, High, Low, Close, Volume)
ohlcv = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)

# Convertimos a DataFrame
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])

# Convertimos el timestamp de milisegundos a fecha legible
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Reordenamos columnas (opcional)
df = df[['timestamp', 'open', 'high', 'low', 'close', 'volume']]

df.tail()


Unnamed: 0,timestamp,open,high,low,close,volume
995,2025-03-20 04:10:00,85850.75,85857.35,85801.44,85825.55,23.85461
996,2025-03-20 04:15:00,85825.55,85825.56,85756.93,85782.61,21.77889
997,2025-03-20 04:20:00,85782.61,85834.81,85734.89,85834.8,37.91304
998,2025-03-20 04:25:00,85834.81,85857.34,85716.45,85747.08,66.77192
999,2025-03-20 04:30:00,85747.08,85771.43,85747.08,85771.42,1.66892


In [12]:
def fetch_ohlcv_with_retry(symbol, timeframe, since=None, limit=1000, max_retries=5):
    for attempt in range(max_retries):
        try:
            data = exchange.fetch_ohlcv(symbol, timeframe=timeframe, since=since, limit=limit)
            return data
        except Exception as e:
            print(f"Error en intento {attempt+1}/{max_retries}: {str(e)}")
            time.sleep(exchange.rateLimit / 1000)
    return []

In [14]:
def date_to_milliseconds(date_str):
    dt = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
    return int(dt.timestamp() * 1000)

since_timestamp = date_to_milliseconds(fecha_inicio)
until_timestamp = date_to_milliseconds(fecha_fin)


In [16]:
def download_ohlcv_range(symbol, timeframe, since, until, limit=1000):
    all_data = []
    current_since = since

    while current_since < until:
        print(f"Descargando desde: {exchange.iso8601(current_since)}")
        data = fetch_ohlcv_with_retry(symbol, timeframe, since=current_since, limit=limit)

        if not data:
            print("No hay más datos o error. Saliendo.")
            break

        all_data.extend(data)

        # Actualiza el timestamp para la siguiente descarga
        last_timestamp = data[-1][0]
        if current_since == last_timestamp:
            # Evita un loop infinito
            break
        current_since = last_timestamp + 1

        # Control de rate limit
        time.sleep(exchange.rateLimit / 1000)

    return all_data

# Ejecutar la descarga
ohlcv_data = download_ohlcv_range(symbol, timeframe, since_timestamp, until_timestamp)

Descargando desde: 2024-01-01T06:00:00.000Z
Descargando desde: 2024-01-04T17:15:00.001Z
Descargando desde: 2024-01-08T04:35:00.001Z
Descargando desde: 2024-01-11T15:55:00.001Z
Descargando desde: 2024-01-15T03:15:00.001Z
Descargando desde: 2024-01-18T14:35:00.001Z
Descargando desde: 2024-01-22T01:55:00.001Z
Descargando desde: 2024-01-25T13:15:00.001Z
Descargando desde: 2024-01-29T00:35:00.001Z
Descargando desde: 2024-02-01T11:55:00.001Z
Descargando desde: 2024-02-04T23:15:00.001Z
Descargando desde: 2024-02-08T10:35:00.001Z
Descargando desde: 2024-02-11T21:55:00.001Z
Descargando desde: 2024-02-15T09:15:00.001Z
Descargando desde: 2024-02-18T20:35:00.001Z
Descargando desde: 2024-02-22T07:55:00.001Z
Descargando desde: 2024-02-25T19:15:00.001Z
Descargando desde: 2024-02-29T06:35:00.001Z
Descargando desde: 2024-03-03T17:55:00.001Z
Descargando desde: 2024-03-07T05:15:00.001Z
Descargando desde: 2024-03-10T16:35:00.001Z
Descargando desde: 2024-03-14T03:55:00.001Z
Descargando desde: 2024-03-17T15

In [18]:
# Convertimos los datos a DataFrame
columns = ['timestamp', 'open', 'high', 'low', 'close', 'volume']
df = pd.DataFrame(ohlcv_data, columns=columns)

# Convertimos timestamp a datetime legible
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

# Guardamos en un archivo CSV
file_path = f'data/BTC_{timeframe}_ohlcv.csv'
df.to_csv(file_path, index=False)

print(f"✅ Datos guardados correctamente en {file_path}")
df.tail()


✅ Datos guardados correctamente en data/BTC_5m_ohlcv.csv


Unnamed: 0,timestamp,open,high,low,close,volume
128115,2025-03-21 02:15:00,84653.06,84653.06,84566.39,84574.53,32.835
128116,2025-03-21 02:20:00,84574.53,84579.32,84520.67,84547.56,16.44097
128117,2025-03-21 02:25:00,84547.57,84666.61,84547.56,84630.43,69.40393
128118,2025-03-21 02:30:00,84630.44,84644.0,84572.54,84580.06,26.96325
128119,2025-03-21 02:35:00,84580.06,84629.99,84580.06,84601.62,7.0948
