In [26]:
import requests
import pandas as pd
import datetime as dt
from datetime import timedelta
import time

In [27]:
# Fetching Methods
def fetch_binance_klines(symbol, interval, start_time, end_time, limit=1000): 
    params = {
        'symbol': symbol,
        'interval': interval,
        'startTime': int(start_time),
        'endTime': int(end_time),
        'limit': limit
    }
    
    try:
        response = requests.get("https://api.binance.com/api/v3/klines", params=params)
        response.raise_for_status()
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data: {e}")
        return None

def datetime_to_timestamp(dt_obj):
    return int(dt_obj.timestamp() * 1000)

def fetch_all_data(symbol, interval, start_time, end_time):
    all_data = []
    current_start = start_time
    
    start_timestamp = datetime_to_timestamp(current_start)
    end_timestamp = datetime_to_timestamp(end_time)
    
    total_hours = int((end_timestamp - start_timestamp) / (1000 * 60 * 60))
    print(f"Total hours to fetch: {total_hours}")
    
    batch_count = 0
    
    while current_start < end_time:
        batch_end = current_start + timedelta(hours=999)
        if batch_end > end_time:
            batch_end = end_time
            
        start_ts = datetime_to_timestamp(current_start)
        end_ts = datetime_to_timestamp(batch_end)
        
        print(f"Fetching batch {batch_count + 1}: {current_start.strftime('%Y-%m-%d %H:%M')} to {batch_end.strftime('%Y-%m-%d %H:%M')}")
        
        batch_data = fetch_binance_klines(symbol, interval, start_ts, end_ts)
        
        if batch_data:
            all_data.extend(batch_data)
            print(f"  → Fetched {len(batch_data)} records")
        else:
            print("  → Failed to fetch data for this batch")
            break
            
        # Move to next batch
        current_start = batch_end + timedelta(hours=1)
        batch_count += 1
        
        time.sleep(0.1)
    
    print(f"Total records fetched: {len(all_data)}")
    return all_data

In [28]:
SYMBOL = "ETHUSDC"
INTERVAL = "1h"

# Previous default: end_time = (dt.datetime.now().replace(day=1) - timedelta(days=1)).replace(hour=0, minute=0, second=0, microsecond=0)

# Set end_time to a specific month and year, such as September 2025:
end_time = dt.datetime(year=2025, month=7, day=1, hour=0, minute=0, second=0, microsecond=0)
start_time = end_time - timedelta(days=365)

crypto_data = fetch_all_data(SYMBOL, INTERVAL, start_time, end_time)

if crypto_data:
    print("Data fetch completed successfully!")
else:
    print("Failed to fetch data. Please check your internet connection and try again.")

Total hours to fetch: 8760
Fetching batch 1: 2024-07-01 00:00 to 2024-08-11 15:00
  → Fetched 1000 records
Fetching batch 2: 2024-08-11 16:00 to 2024-09-22 07:00
  → Fetched 1000 records
Fetching batch 3: 2024-09-22 08:00 to 2024-11-02 23:00
  → Fetched 1000 records
Fetching batch 4: 2024-11-03 00:00 to 2024-12-14 15:00
  → Fetched 1000 records
Fetching batch 5: 2024-12-14 16:00 to 2025-01-25 07:00
  → Fetched 1000 records
Fetching batch 6: 2025-01-25 08:00 to 2025-03-07 23:00
  → Fetched 1000 records
Fetching batch 7: 2025-03-08 00:00 to 2025-04-18 15:00
  → Fetched 1000 records
Fetching batch 8: 2025-04-18 16:00 to 2025-05-30 07:00
  → Fetched 1000 records
Fetching batch 9: 2025-05-30 08:00 to 2025-07-01 00:00
  → Fetched 761 records
Total records fetched: 8761
Data fetch completed successfully!


In [29]:
# Process and save the data to CSV
if not crypto_data:
    exit()

columns = [
    'open_time', 'open', 'high', 'low', 'close', 'volume',
    'close_time', 'quote_asset_volume', 'number_of_trades',
    'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume', 'ignore'
]
df = pd.DataFrame(crypto_data, columns=columns)
df['open_time'] = pd.to_datetime(df['open_time'], unit='ms')
df['close_time'] = pd.to_datetime(df['close_time'], unit='ms')
price_columns = [
    'open', 'high', 'low', 'close', 'volume',
    'quote_asset_volume', 'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume'
]
for col in price_columns:
    df[col] = df[col].astype(float)
df = df.sort_values('open_time').reset_index(drop=True)
df = df.drop('ignore', axis=1)
filename = f"./research/simulation/data/{SYMBOL}_hourly_data_{start_time.strftime('%Y%m%d')}_{end_time.strftime('%Y%m%d')}.csv"
df.to_csv(filename, index=False)
