In [None]:
data_folder = 'data/exports'

timeframe_array = ['1m','15m','1h','4h', '1d', '1w']
timeframe_mapping = {'1m': '1min', '15m': '15min', '1h': '1h', '4h': '4h', '1d': '1D', '1w': '7D'}

timeframe_array = [
                '1m','15m','1h','4h', '1d', '1w',
                #'1M'
                ]
symbol_array = ['BTCUSDT', 'ETHUSDT', 'SOLUSDT','ICPUSDT', 'AVAXUSDT']

interval_to_ms = {
    "1m": 60_000,
    "3m": 3 * 60_000,
    "5m": 5 * 60_000,
    "15m": 15 * 60_000,
    "30m": 30 * 60_000,
    "1h": 60 * 60_000,
    "2h": 2 * 60 * 60_000,
    "4h": 4 * 60 * 60_000,
    "6h": 6 * 60 * 60_000,
    "8h": 8 * 60 * 60_000,
    "12h": 12 * 60 * 60_000,
    "1d": 24 * 60 * 60_000,
    "3d": 3 * 24 * 60 * 60_000,
    "1w": 7 * 24 * 60 * 60_000,
    "1M": 30 * 24 * 60 * 60_000  # approximate month (30 days)
}

# Init

In [None]:


import pandas as pd
import time
import os
import data.datasource.binance_api as ba 

import business.utils.trading_signals as ts
import business.utils.trading_indicators as ti
import presentation.plotter as pl


base_start_time_1m = int(pd.Timestamp("2019-01-01").timestamp() * 1000)
base_start_time_other = int(pd.Timestamp("2015-01-01").timestamp() * 1000)



In [None]:
def get_existing_data_path(symbol, interval):
    """Return the exact file path if the CSV exists (case-sensitive for intervals)."""
    filename = f"data/exports/{symbol}_{interval}_data.csv"
    return filename if os.path.exists(filename) else None

In [None]:
def get_last_timestamp_from_csv(filepath):
    """Retrieve the last timestamp from an existing CSV file."""
    df = pd.read_csv(filepath, usecols=["timestamp"])
    if df.empty:
        return None
    last_timestamp = df["timestamp"].iloc[-1]
    return int(pd.Timestamp(last_timestamp).timestamp() * 1000)

In [None]:

def export_historical_data(symbol, interval, start_time):
    if interval not in interval_to_ms:
        raise ValueError(f"Interval {interval} not supported.")

    delta = interval_to_ms[interval]

    all_data = []
    start_overall = time.time()
    iteration_count = 0

    while True:
        iteration_count += 1
        klines = ba.get_binance_klines(symbol, interval, start_time, limit=1000)
        if not klines:
            print("No more data returned from Binance.")
            break

        all_data += klines
        # Advance start_time by the appropriate millisecond delta for the interval.
        start_time = klines[-1][0] + delta

        elapsed = time.time() - start_overall
        latest_ts = klines[-1][0]
        latest_dt = pd.to_datetime(latest_ts, unit='ms')
        print(f"Iteration {iteration_count}: Latest timestamp: {latest_ts} ({latest_dt}), "
              f"Total records: {len(all_data)}, Elapsed time: {elapsed:.2f} sec")

        # If fewer than 'limit' records are returned, assume we've reached the end.
        if len(klines) < 1000:
            print(f"Iteration {iteration_count}: Last batch retrieved with {len(klines)} records. Ending extraction.")
            break

        # Respect Binance rate limits.
        time.sleep(0.5)

    # Convert collected data into a DataFrame.
    df = pd.DataFrame(all_data, columns=[
        "timestamp", "open", "high", "low", "close", "volume",
        "close_time", "quote_asset_volume", "number_of_trades",
        "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"
    ])
    df["timestamp"] = pd.to_datetime(df["timestamp"], unit='ms')
    df.set_index("timestamp", inplace=True)

    print("Data extraction complete. Sample data:")
    print(df.head())

    # Export to CSV.
    df.to_csv(f'data/exports/{symbol}_{interval}_data.csv', index=True)

In [None]:
def append_new_data(symbol, interval, start_time, csv_path):
    """Append new data to an existing CSV file and save."""
    # Fetch new data from Binance
    new_data = []
    iteration_count = 0
    start_overall = time.time()

    while True:
        iteration_count += 1
        klines = ba.get_binance_klines(symbol, interval, start_time, limit=1000)
        if not klines:
            print(f"{symbol} {interval}: No more data from Binance.")
            break

        new_data += klines
        start_time = klines[-1][0] + interval_to_ms[interval]

        elapsed = time.time() - start_overall
        latest_ts = klines[-1][0]
        latest_dt = pd.to_datetime(latest_ts, unit='ms')
        print(f"{symbol} {interval} | Iteration {iteration_count}: Latest timestamp: {latest_dt}, "
              f"Total new records: {len(new_data)}, Elapsed: {elapsed:.2f}s")

        if len(klines) < 1000:
            print(f"{symbol} {interval}: Last batch retrieved with {len(klines)} records. Reached end.")
            break

        time.sleep(0.5)  # Respect Binance rate limits

    if not new_data:
        print(f"{symbol} {interval}: No new data to append.")
        return

    # Convert collected data into a DataFrame
    df_new = pd.DataFrame(new_data, columns=[
        "timestamp", "open", "high", "low", "close", "volume",
        "close_time", "quote_asset_volume", "number_of_trades",
        "taker_buy_base_asset_volume", "taker_buy_quote_asset_volume", "ignore"
    ])
    df_new["timestamp"] = pd.to_datetime(df_new["timestamp"], unit='ms')

    # Load existing data
    df_existing = pd.read_csv(csv_path)
    df_existing["timestamp"] = pd.to_datetime(df_existing["timestamp"])

    # Combine and remove duplicates
    df_combined = pd.concat([df_existing, df_new]).drop_duplicates(subset=["timestamp"]).sort_values(by="timestamp")

    # Save updated CSV
    df_combined.to_csv(csv_path, index=False)
    print(f"{symbol} {interval}: Data appended and saved. Total records: {len(df_combined)}")


In [None]:
def export_or_update(symbol, interval):
    """Check if export exists, update if yes, otherwise download from scratch."""
    csv_path = get_existing_data_path(symbol, interval)

    if csv_path:
        print(f"✅ {symbol} {interval}: Existing CSV found. Checking for updates...")
        last_timestamp = get_last_timestamp_from_csv(csv_path)
        if last_timestamp:
            print(f"📅 Last recorded timestamp: {pd.to_datetime(last_timestamp, unit='ms')}")
            append_new_data(symbol, interval, last_timestamp, csv_path)
        else:
            start_time = base_start_time_1m if interval == "1m" else base_start_time_other
            print(f"⚠️ {symbol} {interval}: CSV is empty, starting from {pd.to_datetime(start_time, unit='ms')}...")
            export_historical_data(symbol, interval, start_time)
    else:
        start_time = base_start_time_1m if interval == "1m" else base_start_time_other
        print(f"🚀 {symbol} {interval}: No existing data. Starting from {pd.to_datetime(start_time, unit='ms')}...")
        export_historical_data(symbol, interval, start_time)

In [None]:
def merge_timeframes():
    data = {}
    
    # Load datasets into a dictionary
    for symbol in [
                'BTCUSDT',
                # 'ETHUSDT',
                #'SOLUSDT',
                #'ICPUSDT',
                #'AVAXUSDT'
            ]:
        for timeframe in timeframe_array:
            file_path = os.path.join(data_folder, f'{symbol}_{timeframe}_data.csv')
            if os.path.exists(file_path):
                df = pd.read_csv(file_path, parse_dates=['timestamp'])
                data[(symbol, timeframe)] = df.set_index('timestamp')
    
    for (symbol, timeframe), df in data.items():
        higher_timeframes = [tf for tf in timeframe_array 
                             if pd.Timedelta(timeframe_mapping[tf]) > pd.Timedelta(timeframe_mapping[timeframe])]
        
        for ht in higher_timeframes:
            if (symbol, ht) in data:
                ht_df = data[(symbol, ht)].reindex(df.index, method='ffill')
                df[f'{ht}_open'] = ht_df['open']
                df[f'{ht}_high'] = ht_df['high']
                df[f'{ht}_low'] = ht_df['low']
                # Use the current close from the lower timeframe (since we don't know the higher timeframe's final close)
                df[f'{ht}_close'] = df['close']
        
        output_path = os.path.join(data_folder, f'{symbol}_{timeframe}_data.csv')
        df.reset_index().to_csv(output_path, index=False)
        print(f'Saved merged file: {output_path}')
def check_missing_intervals():
    for timeframe, freq in timeframe_mapping.items():
        for symbol in symbol_array:
            file_path = os.path.join(data_folder, f'{symbol}_{timeframe}_data.csv')
            if os.path.exists(file_path):
                df = pd.read_csv(file_path, parse_dates=['timestamp'])
                df = df.set_index('timestamp')
                
                # Check for duplicates
                duplicates = df.index.duplicated().sum()
                if duplicates > 0:
                    print(f'{symbol}_{timeframe}: {duplicates} duplicate timestamps found.')
                
                # Check for missing intervals
                all_times = pd.date_range(start=df.index.min(), end=df.index.max(), freq=freq)
                missing_times = all_times.difference(df.index)
                if not missing_times.empty:
                    print(f'{symbol}_{timeframe}: {len(missing_times)} missing timestamps.')
                else:
                    print(f'{symbol}_{timeframe}: No missing timestamps.')
def find_missing_timestamps(df_name):
    # Extract symbol and timeframe from dataframe name (e.g., 'BTCUSDT_1m')
    symbol_timeframe = df_name.replace('_data', '').strip()  # Clean suffix if any
    symbol, timeframe = symbol_timeframe.split('_')
    
    
    file_path = f'data/exports/{df_name}.csv'
    if not os.path.exists(file_path):
        print(f"File {file_path} not found.")
        return None

    df = pd.read_csv(file_path, parse_dates=['timestamp']).set_index('timestamp')

    if timeframe not in timeframe_mapping:
        print(f"Timeframe {timeframe} not recognized.")
        return None

    # Create a full range of timestamps for the timeframe
    freq = timeframe_mapping[timeframe]
    full_range = pd.date_range(start=df.index.min(), end=df.index.max(), freq=freq)

    # Find missing timestamps
    missing_timestamps = full_range.difference(df.index)

    # Create a dataframe for the missing timestamps
    missing_df = pd.DataFrame({'timestamp': missing_timestamps})
    
    if missing_df.empty:
        print(f"No missing timestamps for {df_name}.")
    else:
        print(f"Found {len(missing_df)} missing timestamps for {df_name}.")

    return missing_df

def insert_indicator_values(df):
    df['RSI'], df['RSI_MA'] = ti.calculate_rsi_with_ma(df['close'], rsi_period=14, ma_type="SMA", ma_length=14)
    df['MACD'], df['Signal'], df['MACD_Hist'] = ti.calculate_macd(df['close'], fast_period=12, slow_period=26, signal_period=9)
    df['BB_Mid'], df['BB_Upper'], df['BB_Lower'] = ti.calculate_bollinger_bands(df['close'], window=20, num_std=2)
    df['Stoch_K'], df['Stoch_D'] = ti.calculate_stochastic(df, k_period=14, d_period=3)
    df = ti.calculate_fibonacci_from_swings(df = df, suffix='_val')

# Extract

In [None]:
for symbol in symbol_array:
        for interval in timeframe_array:
            print(f"📊 Processing {symbol} {interval}")
            export_or_update(symbol, interval)
            time.sleep(2)

# Insert higher timeframes

In [None]:
# merge_timeframes()
check_missing_intervals()

In [None]:
missing_df = find_missing_timestamps('BTCUSDT_1m_data')
if missing_df is not None and not missing_df.empty:
    missing_df.to_csv('BTCUSDT_1m_missing_timestamps.csv', index=False)

In [None]:
df = pd.read_csv('data/exports/BTCUSDT_1m_data.csv', parse_dates=['timestamp'])

In [None]:
df_existing = pd.read_csv('data/exports/BTCUSDT_1d_data.csv')

In [None]:
df_existing.head()

In [None]:
df_with_fib = ti.calculate_fibonacci_from_swings(df = df_existing, suffix='_val')

In [None]:
df_with_fib.tail(5)

In [None]:
pl.plot_fibonacci_chart(df_with_fib, selected_index=df_with_fib.index[-1], title="Fibonacci Retracement - 1D", suffix="_val")

# Archive

In [None]:
"""
EXPORT HISTORY

symbol = "BTCUSDT"
interval = "1m"
start_time = int(pd.Timestamp("2019-01-01").timestamp() * 1000)
export_historical_data(symbol, interval, start_time)

symbol = "ETHUSDT"
interval = "1m"
start_time = int(pd.Timestamp("2019-01-01").timestamp() * 1000)
export_historical_data(symbol, interval, start_time)

symbol = "SOLUSDT"
interval = "1m"
start_time = int(pd.Timestamp("2019-01-01").timestamp() * 1000)
export_historical_data(symbol, interval, start_time)

symbol = "ICPUSDT"
interval = "1m"
start_time = int(pd.Timestamp("2019-01-01").timestamp() * 1000)
export_historical_data(symbol, interval, start_time)

symbol = "AVAXUSDT"
interval = "1m"
start_time = int(pd.Timestamp("2019-01-01").timestamp() * 1000)
export_historical_data(symbol, interval, start_time)
"""