In [14]:
import pandas as pd
import vectorbtpro as vbt
import numpy as np
import matplotlib.pyplot as plt


In [15]:
vbt.settings.plotting["layout"]["template"] = "vbt_dark"
vbt.settings.plotting["layout"]["width"] = 1500
vbt.settings.plotting['layout']['height'] = 600
vbt.settings.wrapping["freq"] = "1m"
# vbt.settings.portfolio['size_granularity'] = 0.001
vbt.settings.portfolio['init_cash'] = 10000

# Wherever you saved the pickle file
spot_data_path = '/Users/ericervin/Documents/Coding/data-repository/data/BTCUSDT_1m.pkl'
futures_data_path = '/Users/ericervin/Documents/Coding/data-repository/data/BTCUSDT_1m_futures.pkl'

In [16]:
# Read in the 1m BTC data from pickle

# spot_data = vbt.BinanceData.load(spot_data_path)
# spot_btc_1m = spot_data.get()

futures_data = vbt.BinanceData.load(futures_data_path)
futures_1m = futures_data.get()

In [None]:
futures_1m['2021-01-01':'2021-01-05'].vbt.ohlcv.plot()

In [18]:
day = '2023-01-08'
futures_1m.loc[day:'2023-01-09']['Close'].vbt.plot().show()

In [19]:
def add_support_resistance(df: pd.DataFrame, 
                           Close_col: str = 'Close', 
                           stdev_lbw: str = '30D', 
                           low_sigma: int = 1, 
                           high_sigma: int = 6, 
                           sigma_step: int = 1) -> pd.DataFrame:
    
    # Calculate rolling 1D standard deviation of Close prices
    std_dev = df[Close_col].rolling('1D').std()
    # Take the stdev_lbw-day rolling average of the 1D standard deviations
    df['avg_rolling_stddev'] = std_dev.rolling(stdev_lbw).mean()
    df['fixed_daily_stddev'] = df['avg_rolling_stddev'].at_time('00:00')
    df['fixed_daily_stddev'] = df['fixed_daily_stddev'].ffill()
    df['open_line'] = df[Close_col].at_time('00:00')
    df['open_line'] = df['open_line'].ffill()

    for sigma in range(low_sigma, high_sigma+1, sigma_step):
        df[f'daily_support_{sigma}'] = df['open_line'] - (df['fixed_daily_stddev'] * sigma)
        df[f'daily_support_{sigma}'] = df[f'daily_support_{sigma}'].ffill()

        df[f'daily_resistance_{sigma}'] = df['open_line'] + (df['fixed_daily_stddev'] * sigma)
        df[f'daily_resistance_{sigma}'] = df[f'daily_resistance_{sigma}'].ffill()
        
    return df

In [20]:
futures_w_sigmas = add_support_resistance(futures_1m, Close_col='Close', stdev_lbw='30D', low_sigma=1, high_sigma=6, sigma_step=1)

In [21]:
futures_w_sigmas.loc[day:'2023-01-09'][['Close','open_line', 'daily_support_1', 'daily_resistance_1', 'daily_support_3', 'daily_resistance_3']].vbt.plot().show()

## Create Dollar Bar Functions 

The the next functions create dollar bars so you can manipulate and work with them and then the last function reverses that so you can merge the dollar bars back into a different time resolution.

In [22]:


def dollar_bar_func(ohlc_df, dollar_bar_size):
    # Calculate dollar value traded for each row
    ohlc_df['DollarValue'] = ohlc_df['Close'] * ohlc_df['Volume']
    
    # Calculate cumulative dollar value
    ohlc_df['CumulativeDollarValue'] = ohlc_df['DollarValue'].cumsum()
    
    # Determine the number of dollar bars
    num_bars = int(ohlc_df['CumulativeDollarValue'].iloc[-1] / dollar_bar_size)
    
    # Generate index positions for dollar bars
    bar_indices = [0]
    cumulative_value = 0
    for i in range(1, len(ohlc_df)):
        cumulative_value += ohlc_df['DollarValue'].iloc[i]
        if cumulative_value >= dollar_bar_size:
            bar_indices.append(i)
            cumulative_value = 0
    
    # Create a new dataframe with dollar bars
    dollar_bars = []
    for i in range(len(bar_indices) - 1):
        start_idx = bar_indices[i]
        end_idx = bar_indices[i + 1]
        # TODO: allow the original dataframe to be dynamic to retain all of the original columns and data so the user can pass a dictionary for how to aggregate each column
        dollar_bar = {
            'Open': ohlc_df['Open'].iloc[start_idx],
            'High': ohlc_df['High'].iloc[start_idx:end_idx].max(),
            'Low': ohlc_df['Low'].iloc[start_idx:end_idx].min(),
            'Close': ohlc_df['Close'].iloc[end_idx],
            'Volume': ohlc_df['Volume'].iloc[start_idx:end_idx].sum(),
            'Quote volume': ohlc_df['Quote volume'].iloc[start_idx:end_idx].sum(),
            'Trade count': ohlc_df['Trade count'].iloc[start_idx:end_idx].sum(),
            'Taker base volume': ohlc_df['Taker base volume'].iloc[start_idx:end_idx].sum(),
            'Taker quote volume': ohlc_df['Taker quote volume'].iloc[start_idx:end_idx].sum()
        }
        
        if isinstance(ohlc_df.index, pd.DatetimeIndex):
            dollar_bar['Open Time'] = ohlc_df.index[start_idx]
            dollar_bar['Close Time'] = ohlc_df.index[end_idx] - pd.Timedelta(milliseconds=1)
        elif 'Open Time' in ohlc_df.columns:
            dollar_bar['Open Time'] = ohlc_df['Open Time'].iloc[start_idx]
            dollar_bar['Close Time'] = ohlc_df['Open Time'].iloc[end_idx] - pd.Timedelta(milliseconds=1)
        
        dollar_bars.append(dollar_bar)
    
    dollar_bars_df = pd.concat([pd.DataFrame([bar]) for bar in dollar_bars], ignore_index=True)
    # Set the index to be the Open Time
    dollar_bars_df.set_index('Open Time', inplace=True)
    return dollar_bars_df

# Create a simple function to simplify the number so we can use it in our column names
def simplify_number(num):
    """
    Simplifies a large number by converting it to a shorter representation with a suffix (K, M, B).
    simplify_number(1000) -> 1K
    """
    suffixes = ['', 'K', 'M', 'B']
    suffix_index = 0
    while abs(num) >= 1000 and suffix_index < len(suffixes) - 1:
        num /= 1000.0
        suffix_index += 1
    suffix = suffixes[suffix_index] if suffix_index > 0 else ''
    simplified_num = f'{int(num)}{suffix}'
    return simplified_num

def merge_and_fill_dollar_bars(original_df, dollar_bars_df, dollar_bar_size):
    """
    Merges the original dataframe with the dollar bars dataframe and fills the NaN values.
    """
    # Add prefix to column names in dollar bars dataframe
    # Reset the index
    dollar_bars_df.reset_index(inplace=True) # in case open time is the index, we need to reset it to a column so we can rename it  
    dollar_bar_prefix = f'db_{simplify_number(dollar_bar_size)}_' # prefix for dollar bar columns eg 'db_90M_Open Time'
    dollar_bars_df_renamed = dollar_bars_df.add_prefix(dollar_bar_prefix)

    # Convert 'Open Time' columns to pandas datetime format and set them as index
    dollar_bars_df_renamed.index = pd.to_datetime(dollar_bars_df_renamed[dollar_bar_prefix + 'Open Time'])

    # Merge the dataframes on the index
    merged_df = original_df.merge(dollar_bars_df_renamed, how='left', left_index=True, right_index=True)

    # Set the flag for a new dollar bar with prefix
    merged_df[dollar_bar_prefix + 'NewDBFlag'] = ~merged_df[dollar_bar_prefix + 'Close'].isna()

    # Forward fill the NaN values for all columns except the new dollar bar flag
    columns_to_ffill = [col for col in merged_df.columns if col != dollar_bar_prefix + 'NewDBFlag']
    merged_df[columns_to_ffill] = merged_df[columns_to_ffill].fillna(method='ffill')

    # Fill the remaining NaN values in the new dollar bar flag column with False
    merged_df[dollar_bar_prefix + 'NewDBFlag'] = merged_df[dollar_bar_prefix + 'NewDBFlag'].fillna(False)
    
    # Assign the renamed 'Open Time' column back to the dataframe
    merged_df[dollar_bar_prefix + 'Open Time'] = merged_df[dollar_bar_prefix + 'Open Time']

    return merged_df


# Create dollar bars

In [23]:
dollar_bar_size = 90_000_000

In [24]:
dollar_bars = dollar_bar_func(futures_1m, dollar_bar_size=dollar_bar_size)
dollar_bars.shape

(124942, 10)

In [None]:
dollar_bars.loc['2023-01-09'].shape

(71, 11)

Now recreate the original dataframe with 1 minute candles if you have added any columns to the dollarbar dataframe for analysis, the values will be forward filled to prevent any data leakage. All of the columns from the dollarbars will be renamed with a prefix `db_90M_` in this case the dollar size was 90million

In [None]:
# now merge the two dataframes
dollar_bars_merged = merge_and_fill_dollar_bars(futures_1m, dollar_bars, dollar_bar_size=dollar_bar_size)
dollar_bars_merged.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Quote volume', 'Trade count',
       'Taker base volume', 'Taker quote volume', 'avg_rolling_stddev',
       'fixed_daily_stddev', 'open_line', 'daily_support_1',
       'daily_resistance_1', 'daily_support_2', 'daily_resistance_2',
       'daily_support_3', 'daily_resistance_3', 'daily_support_4',
       'daily_resistance_4', 'daily_support_5', 'daily_resistance_5',
       'daily_support_6', 'daily_resistance_6', 'DollarValue',
       'CumulativeDollarValue', 'db_90M_Open Time', 'db_90M_Open',
       'db_90M_High', 'db_90M_Low', 'db_90M_Close', 'db_90M_Volume',
       'db_90M_Quote volume', 'db_90M_Trade count', 'db_90M_Taker base volume',
       'db_90M_Taker quote volume', 'db_90M_Close Time', 'db_90M_NewDBFlag'],
      dtype='object')

Check the shape of the frames to make sure they make sense

In [None]:


print(futures_1m.shape) # Original 1 minute price dataframe
print(dollar_bars.shape) # Dollar bars dataframe
print(dollar_bars_merged.shape) # Merged dataframe
print(dollar_bars_merged['db_90M_NewDBFlag'].sum()) # Number of dollar bars in merged dataframe

(1272913, 26)
(124942, 11)
(1272913, 38)
124942


In [None]:
with_sigmas = add_support_resistance(dollar_bars_merged, Close_col='Close', stdev_lbw='30D', low_sigma=1, high_sigma=6, sigma_step=1)

In [None]:
with_sigmas.loc['2023-01-08':'2023-01-09'][[
    'Close',
    'db_90M_Open',
    'open_line', 
    'daily_support_1',
    'daily_support_2',
    'daily_support_3', 
    'daily_resistance_1', 
    'daily_resistance_2', 
    'daily_resistance_3']].vbt.plot().show()