In [8]:
import ccxt
import time
import pandas as pd
from datetime import datetime, timezone
from tqdm import tqdm
import os

In [2]:
SPOT_COLUMNS = ['Open time', 'Open', 'High', 'Low', 'Close', 'Volume', 'Close time', 'Quote asset volume', 
                'Number of trades', 'Taker buy base asset volume', 'Taker buy quote asset volume', 'Ignore']

binance_exchange = ccxt.binanceus({
    'timeout': 15000,
    'enableRateLimit': True
    # 'options': {'defaultType': 'future'}
})

In [3]:
def get_spot(exchange, symbol, interval = '1h', 
                    startTime = None,
                    endTime = None, 
                    limit = 1000):
            
    if (startTime == None and endTime == None):
        return exchange.publicGetKlines({'symbol': symbol, 
                                        'interval': interval, 
                                        'limit': limit})
    elif (startTime == None and endTime != None):
        return exchange.publicGetKlines({'symbol': symbol, 
                                        'interval': interval,
                                        'endTime': endTime,
                                        'limit': limit})
    elif (startTime != None and endTime == None):
        return exchange.publicGetKlines({'symbol': symbol, 
                                        'interval': interval,
                                        'startTime': startTime,
                                        'limit': limit})
    else:
        return exchange.publicGetKlines({'symbol': symbol, 
                                        'interval': interval,
                                        'startTime': startTime,
                                        'endTime': endTime,
                                        'limit': limit})

def convert_to_seconds(time_input):
    number = int(time_input[:-1])
    unit = time_input[-1]

    if unit == 's':
        return number
    elif unit == 'm':
        return number * 60
    elif unit == 'h':
        return number * 3600
    elif unit == 'd':
        return number * 86400
    else:
        raise ValueError("Unsupported time unit")
    
def transform_timestamp(timestamp_integer):
    '''
    As data points involved milliseconds, we need to transform them by constant 1000.
    '''

    return pd.to_datetime(int(timestamp_integer / 1000), utc=True, unit='s')

def transform_to_timestamp_integer(datetime_object):
    '''
    As data points involved milliseconds, we need to transform them by constant 1000.
    '''
    
    return int(datetime_object.timestamp() * 1000)

def obtain_full_spotdata(start_timestamp, 
                         end_timestamp,
                         exchange, symbol, interval = '1h', 
                         limit = 1000):

    time_difference = int(convert_to_seconds(interval) * limit * 1000)

    full_data_list = []

    curr_time = start_timestamp + time_difference
    while (curr_time + time_difference < end_timestamp):
        data_list = get_spot(exchange = exchange, symbol = symbol, interval = interval, 
                             endTime = curr_time, 
                             limit = limit)
        full_data_list = full_data_list + data_list

        time.sleep(0.2)
        curr_time += time_difference

    data_list = get_spot(exchange = exchange, symbol = symbol, interval = interval, 
                        startTime = curr_time,
                        endTime = end_timestamp, 
                        limit = limit)

    full_data_list = full_data_list + data_list

    return full_data_list

In [6]:
coin_tickers = ['BTCUSDT', 'ETHUSDT', 'SOLUSDT', 
             'BNBUSDT', 'XRPUSDT', 'DOGEUSDT']

start_date = "2020-01-01"
end_date = "2025-01-01"
interval = "1h"

In [9]:
def fetch_and_store_data(ticker_symbol, start_date, end_date, interval):
    dataset_name = f"{ticker_symbol}_{start_date}_{end_date}_{interval}"

    start_timestamp = int(((pd.to_datetime(start_date)).tz_localize('UTC')).timestamp() * 1000)
    end_timestamp = int(((pd.to_datetime(end_date)).tz_localize('UTC')).timestamp() * 1000)
    full_data_list = obtain_full_spotdata(start_timestamp, end_timestamp, 
                                        binance_exchange, ticker_symbol)
    data = pd.DataFrame(full_data_list, columns = SPOT_COLUMNS)
    data['Open time'] = data['Open time'].apply(lambda x: transform_timestamp(int(x)))
    data.drop_duplicates('Open time', keep='first', inplace=True)
    
    data.to_csv(f'./{dataset_name}.csv')
    print(f"Data for {ticker_symbol} stored successfully.")
    return data

In [10]:
for ticker in tqdm(coin_tickers, desc="Fetching crypto data"):
    fetch_and_store_data(ticker, start_date, end_date, interval)

Fetching crypto data:  17%|█▋        | 1/6 [00:14<01:14, 14.84s/it]

Data for BTCUSDT stored successfully.


Fetching crypto data:  33%|███▎      | 2/6 [00:29<00:58, 14.68s/it]

Data for ETHUSDT stored successfully.


Fetching crypto data:  50%|█████     | 3/6 [00:43<00:43, 14.40s/it]

Data for SOLUSDT stored successfully.


Fetching crypto data:  67%|██████▋   | 4/6 [00:58<00:29, 14.58s/it]

Data for BNBUSDT stored successfully.


Fetching crypto data:  83%|████████▎ | 5/6 [01:12<00:14, 14.53s/it]

Data for XRPUSDT stored successfully.


Fetching crypto data: 100%|██████████| 6/6 [01:27<00:00, 14.55s/it]

Data for DOGEUSDT stored successfully.





### Memecoins

In [15]:
coin_tickers = ['TRUMPUSDT']

start_date = "2023-01-01"
end_date = "2025-02-20"
interval = "1h"

In [16]:
for ticker in tqdm(coin_tickers, desc="Fetching crypto data"):
    fetch_and_store_data(ticker, start_date, end_date, interval)

Fetching crypto data: 100%|██████████| 1/1 [00:04<00:00,  4.28s/it]

Data for TRUMPUSDT stored successfully.



