In [1]:
import pandas as pd

In [2]:
def macd(price_series, short_window=5, long_window=34, signal_window=5):
    # 1. Calculate EMAs
    short_ema = price_series.ewm(span=short_window, adjust=False).mean()
    long_ema = price_series.ewm(span=long_window, adjust=False).mean()

    # 2. MACD Line
    macd_line = short_ema - long_ema

    # 3. Signal Line (EMA of MACD)
    signal_line = macd_line.ewm(span=signal_window, adjust=False).mean()

    # 4. Histogram (MACD - Signal)
    macd_hist = macd_line - signal_line

    # Return as a DataFrame
    return pd.DataFrame({
        "MACD": macd_line,
        "Signal Line": signal_line,
        "Histogram": macd_hist
    })

In [4]:
def weighted_moving_average(series, period):
    weights = np.arange(1, period + 1)
    return series.rolling(period).apply(lambda prices: np.dot(prices, weights)/weights.sum(), raw=True)

def hull_moving_average(series, period):
    half_length = int(period / 2)
    sqrt_length = int(np.sqrt(period))

    wma_half = weighted_moving_average(series, half_length)
    wma_full = weighted_moving_average(series, period)

    hull_series = 2 * wma_half - wma_full
    hma = weighted_moving_average(hull_series, sqrt_length)

    return hma

In [5]:
def process_stock_data(csv_url):
    # Load and prepare data
    df = pd.read_csv(csv_url)
    df['Open time'] = pd.to_datetime(df['Open time'])  # Ensure datetime format
    df.set_index('Open time', inplace=True)

    df['Hour'] = df.index.hour

    df['15minReturn'] = df['Close'] / df['Close'].shift(1) - 1
    df['15minrange'] = df['High'] - df['Low']
    df['Volchange'] = df['Volume'] / (df['Volume'].shift(1) + 1e-8) - 1

    # Calculate MACD and join to original DataFrame
    macd_r = macd(df["15minReturn"]).add_prefix("Ret_")
    df = df.join(macd_r)
    
    macd_v = macd(df["Volchange"]).add_prefix("Vol_")
    df = df.join(macd_v)

    df['HMA_100_Volume'] = hull_moving_average(df['Volume'], 100)
    df['HMA_200_Volume'] = hull_moving_average(df['Volume'], 200)

    # 5-day rolling volatility (daily returns)
    df['Volatility_5'] = df['15minReturn'].rolling(window=5).std()

    # 10-day rolling volatility
    df['Volatility_10'] = df['15minReturn'].rolling(window=10).std()

    # 20-day rolling volatility
    df['Volatility_20'] = df['15minReturn'].rolling(window=20).std()
    df['Volatility_35'] = df['15minReturn'].rolling(window=35).std()
    # Volume Z-score over past 20 days
    df['Volume_Z'] = (df['Volume'] - df['Volume'].rolling(20).mean()) / df['Volume'].rolling(20).std()

    # Price Z-score (on Close) over past 20 days
    df['Price_Z'] = (df['Close'] - df['Close'].rolling(20).mean()) / df['Close'].rolling(20).std()

    # Return Z-score over past 20 days
    df['Return_Z'] = (df['15minReturn'] - df['15minReturn'].rolling(20).mean()) / df['15minReturn'].rolling(20).std()

    ma_20 = df['Close'].rolling(20).mean()
    ma_std_20 = df['Close'].rolling(20).std()
    df['Z_Close_MA_20'] = (df['Close'] - ma_20) / ma_std_20

    df.dropna(inplace=True)
    
    return df

In [6]:
import numpy as np

def generate_volatility_labels(df, lookahead=12, n_classes=3):
    df = df.copy()

    # Compute log returns
    df['log_return'] = np.log(df['Close'] / df['Close'].shift(1))

    # Calculate rolling future volatility (std of log returns)
    df['future_vol'] = (
        df['log_return']
        .shift(-lookahead + 1)  # Shift so we're looking forward
        .rolling(window=lookahead)
        .std()
    )

    # Drop rows with NaNs due to rolling and shifting
    df = df.iloc[:-lookahead]

    # Quantile binning to create classes
    df['volatility_class'] = pd.qcut(
        df['future_vol'],
        q=n_classes,
        labels=range(n_classes)
    )

    return df


In [7]:
df = process_stock_data("/Users/rong/Desktop/btc_15m_data_2018_to_2025.csv")
df = generate_volatility_labels(df, lookahead=12, n_classes=3)

In [8]:
df.dropna()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Close time,Quote asset volume,Number of trades,Taker buy base asset volume,Taker buy quote asset volume,...,Volatility_10,Volatility_20,Volatility_35,Volume_Z,Price_Z,Return_Z,Z_Close_MA_20,log_return,future_vol,volatility_class
Open time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-01-03 07:45:00,15158.57,15231.51,15135.90,15170.00,172.989883,2018-01-03 07:59:59.999,2.626280e+06,1515,89.399478,1.357937e+06,...,0.006533,0.006506,0.005784,-0.815326,0.940688,0.015806,0.940688,0.000659,0.009372,2
2018-01-03 08:00:00,15170.00,15172.99,14910.00,14975.00,257.622957,2018-01-03 08:14:59.999,3.875107e+06,2153,160.337093,2.412682e+06,...,0.007777,0.006694,0.006233,0.880580,-0.572263,-1.826825,-0.572263,-0.012938,0.009560,2
2018-01-03 08:15:00,14974.98,15050.00,14900.00,14950.00,241.666106,2018-01-03 08:29:59.999,3.621941e+06,1927,143.733039,2.154657e+06,...,0.007772,0.006619,0.006243,0.603070,-0.712309,-0.183102,-0.712309,-0.001671,0.009695,2
2018-01-03 08:30:00,14968.02,15113.49,14950.00,15086.00,201.905635,2018-01-03 08:44:59.999,3.039620e+06,2006,129.801492,1.954690e+06,...,0.008272,0.006802,0.006359,-0.167519,0.363250,1.292182,0.363250,0.009056,0.009696,2
2018-01-03 08:45:00,15086.00,15113.49,15000.00,15002.59,194.740060,2018-01-03 08:59:59.999,2.934303e+06,1963,119.701303,1.804361e+06,...,0.008311,0.006312,0.006402,-0.247504,-0.432173,-0.976801,-0.432173,-0.005544,0.010265,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-06-16 20:00:00,108652.01,108693.21,108428.00,108646.31,241.133820,2025-06-16 20:14:59.999,2.617901e+07,38180,130.196620,1.413418e+07,...,0.001560,0.001349,0.001529,0.935354,1.506242,-0.398364,1.506242,-0.000052,0.003530,2
2025-06-16 20:15:00,108646.30,108648.68,108509.66,108626.77,93.391940,2025-06-16 20:29:59.999,1.013982e+07,19332,39.826400,4.323606e+06,...,0.001515,0.001309,0.001493,-0.924183,1.316000,-0.541088,1.316000,-0.000180,0.003586,2
2025-06-16 20:30:00,108626.77,108757.32,108559.89,108737.31,87.088030,2025-06-16 20:44:59.999,9.461555e+06,14678,53.431900,5.805273e+06,...,0.001473,0.001314,0.001484,-0.970460,1.408247,0.356865,1.408247,0.001017,0.003632,2
2025-06-16 20:45:00,108737.31,108849.77,108558.13,108782.68,144.274680,2025-06-16 20:59:59.999,1.568019e+07,30030,68.689790,7.464797e+06,...,0.001468,0.001250,0.001459,-0.267263,1.381658,-0.169363,1.381658,0.000417,0.003690,2


In [9]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Close time',
       'Quote asset volume', 'Number of trades', 'Taker buy base asset volume',
       'Taker buy quote asset volume', 'Ignore', 'Hour', '15minReturn',
       '15minrange', 'Volchange', 'Ret_MACD', 'Ret_Signal Line',
       'Ret_Histogram', 'Vol_MACD', 'Vol_Signal Line', 'Vol_Histogram',
       'HMA_100_Volume', 'HMA_200_Volume', 'Volatility_5', 'Volatility_10',
       'Volatility_20', 'Volatility_35', 'Volume_Z', 'Price_Z', 'Return_Z',
       'Z_Close_MA_20', 'log_return', 'future_vol', 'volatility_class'],
      dtype='object')

In [10]:
from fastai.imports import *
np.set_printoptions(linewidth=130)

In [12]:
df['Hour'] = df['Hour'].astype('category')

df['Hour'] = pd.Categorical(df['Hour'], categories=range(24), ordered=True)


In [14]:
cap_value = 155.31
df['Volchange'] = df['Volchange'].clip(upper=cap_value)


In [30]:
trn_df = df.iloc[:200000].copy().reset_index(drop=True)
val_df = df.iloc[200000:230000].copy().reset_index(drop=True)
test_df = df.iloc[230000:].copy().reset_index(drop=True)


In [17]:
from fastai.tabular.all import *

pd.options.display.float_format = '{:.2f}'.format

In [31]:
procs = [Categorify, FillMissing, Normalize]
cats=['Hour']
conts=['15minReturn', '15minrange', 'Volchange', 'Volatility_5', 'Volatility_10', 'Volatility_20','Volatility_35','Volume_Z', 'Price_Z', 'Return_Z', 'Z_Close_MA_20','Vol_Histogram','Ret_Histogram', 'HMA_100_Volume', 'HMA_200_Volume']
y = "volatility_class"


In [42]:
to_train = TabularPandas(
    trn_df,
    procs=procs,
    cat_names=cats,
    cont_names=conts,
    y_names=y,
    y_block=CategoryBlock()
)

to_valid = TabularPandas(
    val_df,
    procs=procs,
    cat_names=cats,
    cont_names=conts,
    y_names=y,
    y_block=CategoryBlock()
)

to_test = TabularPandas(
    test_df,
    procs=procs,
    cat_names=cats,
    cont_names=conts,
    y_names=y,
    y_block=CategoryBlock()
)


In [43]:
dls = DataLoaders.from_dsets(to_train, to_valid, bs=64)

In [44]:
learn = tabular_learner(dls, metrics=accuracy, layers=[10, 10])

In [54]:
df['volatility_class'].info

<bound method Series.info of 0         NaN
1         NaN
2         NaN
3         NaN
4         NaN
         ... 
260744      2
260745      2
260746      2
260747      2
260748      2
Name: volatility_class, Length: 260749, dtype: category
Categories (3, int64): [0 < 1 < 2]>

In [45]:
learn.fit_one_cycle(5)  # 5 epochs

epoch,train_loss,valid_loss,accuracy,time


KeyError: 55595

In [38]:
# Reset index
trn_df = df.iloc[:200000].copy().reset_index(drop=True)
val_df = df.iloc[200000:230000].copy().reset_index(drop=True)

# Create TabularPandas
to_train = TabularPandas(trn_df, procs=procs, cat_names=cats, cont_names=conts, y_names=y, y_block=CategoryBlock())
to_valid = TabularPandas(val_df, procs=procs, cat_names=cats, cont_names=conts, y_names=y, y_block=CategoryBlock())

# Create DataLoaders
dls = DataLoaders.from_dsets(to_train, to_valid, bs=64)

# Train
learn = tabular_learner(dls, metrics=accuracy, layers=[10, 10])
learn.fit_one_cycle(5)


epoch,train_loss,valid_loss,accuracy,time


KeyError: 52656

In [59]:
from fastai.tabular.all import *

# 1. Make sure your original df is fully reset
df = df.reset_index(drop=True)

# 2. Slice and reset
trn_df = df.iloc[:200000].copy().reset_index(drop=True)
val_df = df.iloc[200000:230000].copy().reset_index(drop=True)

# 3. Define config
procs = [Categorify, FillMissing, Normalize]
cats = ['Hour']
conts = ['Volchange', 'Volatility_5', 'Vol_Histogram']  # Just 2â€“3 for now
y = "volatility_class"

# 4. Create TabularPandas
to_train = TabularPandas(trn_df, procs=procs, cat_names=cats, cont_names=conts,
                         y_names=y, y_block=CategoryBlock())

to_valid = TabularPandas(val_df, procs=procs, cat_names=cats, cont_names=conts,
                         y_names=y, y_block=CategoryBlock())

# âœ… Check indexes
print("Train index:", to_train.items.index[:5])  # Should be 0,1,2,3...
print("Valid index:", to_valid.items.index[:5])  # Should be 0,1,2,3...

# 5. Build DataLoaders
dls = DataLoaders.from_dsets(to_train, to_valid, bs=64)

# 6. Train
learn = tabular_learner(dls, metrics=accuracy, layers=[10, 10])
learn.fit_one_cycle(3)  # Should work now!


Train index: RangeIndex(start=0, stop=5, step=1)
Valid index: RangeIndex(start=0, stop=5, step=1)


epoch,train_loss,valid_loss,accuracy,time


KeyError: 133254