## Load data

In [None]:
import pandas as pd

Unnamed: 0,timestamp,close,volume,id,return
118053,2025-05-16 21:00:00,103643.59,36962480.0,118053,-0.00089
118054,2025-05-16 22:00:00,103551.32,101260900.0,118054,-0.000499
118055,2025-05-16 23:00:00,103499.6,32355070.0,118055,-0.001078
118056,2025-05-17 00:00:00,103388.02,50440670.0,118056,-0.005115
118057,2025-05-17 01:00:00,102859.21,68915260.0,118057,0.004354


In [54]:
def process_data_for_model(data_path):
    df = pd.read_csv(data_path)

    df = df[['time', 'close', 'volumeto']].copy()
    df = df.rename(columns={'volumeto': 'volume'})
    df['id'] = 'BTC'
    df['time'] = pd.to_datetime(df['time']).dt.tz_localize(None)
    df['return'] = df['close'].pct_change().shift(-1)  # 1-step-ahead return
    df=df.dropna(subset=['return'])
    df.info()
    df.head()

    return df

## AutoMLStrategy

In [55]:
import sys
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

sys.path.append("../src")

from strategy import Strategy
from backtest import run_backtest

class AutoMLStrategy(Strategy):
    def __init__(self, initial_capital=10000, df=None, lookback=1000):
        super().__init__(initial_capital)
        self.has_bought = False
        self.lookback = lookback           # how many bars to keep
        self.window   = pd.DataFrame()  # seed with training tail

        train_df = TimeSeriesDataFrame.from_data_frame(
            df,          # drop last NaN caused by shift
            timestamp_column='time',
            id_column='id',
            static_features_df=None
        )
        self.model = TimeSeriesPredictor(
            target='return',
            prediction_length=1,
            eval_metric='rmse',
            verbosity=4,
            quantile_levels=[0.05, 0.25, 0.5, 0.75, 0.95],  # gives you a distribution
            freq='1H'             # or '1H', '5min', …  match your data
            # presets='medium_quality'
        )
        self.model.fit(train_df)

    # -----------------------------------------------------------------
    # called once per new bar by the back-testing engine
    # -----------------------------------------------------------------
    def process_bar(self, bar):
        """
        Append the new bar to our rolling window and feed the
        *window* to AutoGluon, not just the latest row.
        """
        # 1) Keep the last `lookback` rows:
        self.window = (
            pd.concat([self.window, pd.DataFrame([bar])])
              .groupby('id')
              .tail(self.lookback)
              .reset_index(drop=True)
        )
        print(self.window)

        # 2) Convert to TimeSeriesDataFrame
        latest_tsd = TimeSeriesDataFrame.from_data_frame(
            self.window,
            timestamp_column='time',
            id_column='id',
        )

        # 3) Forecast 1-step-ahead return distribution
        self.fcst = self.model.predict(latest_tsd).iloc[0]


    # -----------------------------------------------------------------
    # turn forecast + risk estimate into trading instruction
    # -----------------------------------------------------------------
    def get_signal(self) -> str:
        """
        BUY  :  μ > k·σ  and entire 90 % interval is positive
        SELL :  μ < −k·σ and entire 90 % interval is negative
        HOLD :  otherwise
        """

        # Need enough history to compute σ -- otherwise stay flat
        if len(self.returns_hist) < self.lookback:
            return "hold"

        sigma  = np.std(self.returns_hist)
        k_sigma = self.thresh_factor * sigma

        mu   = self.fcst["0.5"]   # median / mean
        q05  = self.fcst["0.05"]
        q95  = self.fcst["0.95"]

        # --- decision logic ------------------------------------------
        if (mu > k_sigma) and (q05 > 0) and not self.has_position:
            self.has_position = True
            return "buy"

        if (mu < -k_sigma) and (q95 < 0) and self.has_position:
            self.has_position = False
            return "sell"

        return "hold"

In [None]:
df = process_data_for_model('../data/btc_hour.csv')
training_df = df.copy().head(2000)
test_df = df.copy().tail(10000)

run_backtest(
    lambda initial_capital: AutoMLStrategy(initial_capital, training_df),
    test_df,
    initial_capital=10000
)

  offset = pd.tseries.frequencies.to_offset(self.freq)
Frequency '1H' stored as 'h'
Beginning AutoGluon training...
AutoGluon will save models to '/Users/jiro/play/nstrade/notebooks/AutogluonModels/ag-20250517_083050'
AutoGluon Version:  1.3.0
Python Version:     3.12.8
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 23.6.0: Thu Mar  6 22:08:50 PST 2025; root:xnu-10063.141.1.704.6~1/RELEASE_ARM64_T8112
CPU Count:          8
GPU Count:          0
Memory Avail:       5.68 GB / 16.00 GB (35.5%)
Disk Space Avail:   5.63 GB / 228.27 GB (2.5%)
	We recommend a minimum available disk space of 10 GB, and large datasets may require more.

Fitting with arguments:
{'enable_ensemble': True,
 'eval_metric': RMSE,
 'freq': 'h',
 'hyperparameters': 'default',
 'known_covariates_names': [],
 'num_val_windows': 1,
 'prediction_length': 1,
 'quantile_levels': [0.05, 0.25, 0.5, 0.75, 0.95],
 'random_seed': 123,
 'refit_every_n_windows': 1,
 'refit_full': Fals

<class 'pandas.core.frame.DataFrame'>
Index: 118058 entries, 0 to 118057
Data columns (total 5 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   time    118058 non-null  datetime64[ns]
 1   close   118058 non-null  float64       
 2   volume  118058 non-null  float64       
 3   id      118058 non-null  object        
 4   return  118058 non-null  float64       
dtypes: datetime64[ns](1), float64(3), object(1)
memory usage: 5.4+ MB


Models that will be trained: ['SeasonalNaive', 'RecursiveTabular', 'DirectTabular', 'NPTS', 'DynamicOptimizedTheta', 'AutoETS', 'ChronosZeroShot[bolt_base]', 'ChronosFineTuned[bolt_small]', 'TemporalFusionTransformer', 'DeepAR', 'PatchTST', 'TiDE']
Training timeseries model SeasonalNaive. 
	Window 0
Shortening all time series to at most 2500
		-0.0033      = Validation score (-RMSE)
		0.002   s    = Training runtime
		1.141   s    = Prediction runtime
	-0.0033       = Validation score (-RMSE)
	0.01    s     = Training runtime
	1.14    s     = Validation (prediction) runtime
Training timeseries model RecursiveTabular. 
	Window 0
Shortening all series to at most 1000025
train_df shape: (1974, 47), val_df shape: (1, 47)
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.0
Python Version:     3.12.8
Operating System:   Darwin
Platform Machine:   arm64
Platform Version:   Darwin Kernel Version 23.6.0: Thu Mar  6 22:08:50 PST 2025; root:xnu-10063.141.1.704.6~1/RELEASE_ARM64_T8112
CPU Co

## Development // Drafts // WIP

In [20]:
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor

df = df.head(1000)

train_df = TimeSeriesDataFrame.from_data_frame(
    df,          # drop last NaN caused by shift
    timestamp_column='timestamp',
    id_column='id',
    static_features_df=None
)


model = TimeSeriesPredictor(
    target='return',
    prediction_length=1,
    eval_metric='rmse',
    verbosity=4
)


In [2]:
run_backtest(AutoMLStrategy, df)

NameError: name 'run_backtest' is not defined

In [None]:
# create a new strategy
class AutoMLStrategy(Strategy):
    def __init__(self, initial_capital=10000):
        super().__init__(initial_capital)
        self.has_bought = False
        self.model = model

    def process_bar(self, bar):
        self.current_bar = bar
        self.model.predict(bar)

    def get_signal(self):
        if not self.has_bought:
            self.has_bought = True