In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [2]:
btc_df = pd.read_csv('BTCUSDC-1m-2years.csv') 
btc_df['timestamp'] = pd.to_datetime(btc_df['timestamp'],dayfirst=True )
btc_df.set_index('timestamp', inplace=True)
btc_df = btc_df.sort_index()

In [3]:
import numpy as np
from sklearn.linear_model import RidgeClassifier

class RandomizedSignatureDrift:
    def __init__(self, rd=50, rm=0.05, rv=0.03, alpha=1e-3, tw=120):
        self.rd = rd
        self.rm = rm
        self.rv = rv
        self.alpha = alpha
        self.tw = tw
        self.activation = np.tanh 

    def _generate_reservoir_system(self, input_dim):
        self.A = np.random.normal(loc=self.rm, scale=np.sqrt(self.rv), 
                                  size=(input_dim, self.rd, self.rd))
        
        self.b = np.random.normal(loc=0, scale=np.sqrt(self.rv), 
                                  size=(input_dim, self.rd))

    def compute_signatures(self, X_augmented):
        T, input_dim = X_augmented.shape
        print('X_augmented shape: ', T, input_dim)
        R = np.zeros((T, self.rd))
        R[0] = np.random.normal(0, 1, self.rd)
        
        Y = []
        for k in tqdm(range(self.tw, T)):
            for t in range(self.tw):
                update_sum = np.zeros(self.rd)
                for i in range(input_dim):
                    proj = np.dot(self.A[i], R[t]) + self.b[i]
                    update_sum += self.activation(proj) * (X_augmented[k-self.tw+t+1, i]-X_augmented[k-self.tw +t, i])
                    
                R[t+1] = R[t] + update_sum
            #if k < self.tw+10:
                #print(R[self.tw])
            Y.append(R[self.tw].copy())
        return Y

    def predict(self, prices):
        vol = np.array(prices)[1:,4]
        prices = np.array(prices)[:,0:4]
        log_prices = np.log(prices / prices[0]) 
        X_log_returns = np.diff(log_prices, axis=0) 
        #X_log_returns_lag1 = np.zeros_like(X_log_returns)
        #X_log_returns_lag1[0] = X_log_returns[0]
        #X_log_returns_lag1[1:] = X_log_returns[0:-1]
        y_returns = np.where(X_log_returns[:,3]>0, 1,-1)
        T_len, _ = X_log_returns.shape
        print(y_returns)
        time_index = np.arange(T_len).reshape(-1, 1) * (1.0/T_len) # Normalized time
        X_augmented = np.hstack([time_index, X_log_returns])
        
        self._generate_reservoir_system(input_dim=X_augmented.shape[1])
        
        Y = self.compute_signatures(X_augmented)
        Y_len = len(Y)
        
        t_s = int(Y_len * 9 / 10)
                
        print(f"Starting training loop from t={t_s} to {T_len-1}...")

        X_train = Y[0 : t_s-120]
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        print(X_train[0:10])
        
        y_train = y_returns[self.tw + 1 : self.tw + 1 + t_s - 120]
        
        ridge = RidgeClassifier(alpha=self.alpha, fit_intercept=True)
        ridge.fit(X_train, y_train)
        
        X_test = Y[t_s : Y_len-1]
        X_test = scaler.transform(X_test)

        predictions = ridge.predict(X_test)
            
        y_true = y_returns[self.tw + t_s + 1 : ]
        ascore = accuracy_score(y_true, predictions)
        print('percentage of 1: ', sum(predictions>0)/len(predictions))
        print('percentage of 1 in y true: ', sum(y_true>0)/len(y_true))

        return ascore, y_true, predictions

In [4]:
btc_df.iloc[0:20]

Unnamed: 0_level_0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_volume,taker_buy_quote_volume,ignore
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2024-01-01 00:00:00,1704067200000,42274.27,42291.22,42255.49,42286.97,0.42926,1704067259999,18143.726615,20,0.41173,17402.724821,0
2024-01-01 00:01:00,1704067260000,42293.21,42314.41,42293.21,42314.41,0.07881,1704067319999,3333.99156,15,0.06083,2573.427464,0
2024-01-01 00:02:00,1704067320000,42311.52,42324.75,42310.81,42313.19,0.75971,1704067379999,32149.032673,22,0.63288,26781.9464,0
2024-01-01 00:03:00,1704067380000,42321.14,42357.01,42321.14,42357.01,0.35064,1704067439999,14846.349183,29,0.28116,11904.935498,0
2024-01-01 00:04:00,1704067440000,42357.02,42390.63,42357.02,42390.63,1.99094,1704067499999,84362.39614,37,1.95944,83027.392961,0
2024-01-01 00:05:00,1704067500000,42384.03,42396.04,42376.03,42396.04,1.09819,1704067559999,46550.617399,18,0.729,30901.673504,0
2024-01-01 00:06:00,1704067560000,42396.15,42415.86,42396.15,42410.39,0.73764,1704067619999,31277.631167,20,0.6625,28091.279628,0
2024-01-01 00:07:00,1704067620000,42410.85,42419.22,42401.12,42401.12,0.73981,1704067679999,31375.22953,22,0.18913,8021.134653,0
2024-01-01 00:08:00,1704067680000,42402.26,42408.1,42392.22,42408.1,0.15746,1704067739999,6676.518581,8,0.01451,615.298501,0
2024-01-01 00:09:00,1704067740000,42407.47,42410.82,42392.1,42401.17,1.73395,1704067799999,73527.227527,28,0.50177,21275.444516,0


In [70]:
RESAMPLE_FREQ = '15min'
prices = pd.DataFrame()
prices['close'] = btc_df.close.resample(RESAMPLE_FREQ).last()
prices['open'] = btc_df.open.resample(RESAMPLE_FREQ).first()
prices['high'] = btc_df.high.resample(RESAMPLE_FREQ).max()
prices['low'] = btc_df.low.resample(RESAMPLE_FREQ).min()
prices['volume'] = btc_df.volume.resample('15min').sum()
prices

Unnamed: 0_level_0,close,open,high,low,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,42476.73,42274.27,42476.73,42255.49,12.31433
2024-01-01 00:15:00,42409.47,42476.72,42538.32,42401.10,9.55929
2024-01-01 00:30:00,42426.50,42397.18,42434.35,42344.56,6.02744
2024-01-01 00:45:00,42465.40,42430.75,42481.42,42408.39,3.41888
2024-01-01 01:00:00,42458.23,42455.15,42460.30,42418.66,4.66731
...,...,...,...,...,...
2025-09-30 22:45:00,113992.02,113968.08,114080.27,113961.01,18.02007
2025-09-30 23:00:00,113883.76,113990.80,114000.39,113766.63,51.76906
2025-09-30 23:15:00,114013.00,113886.31,114063.79,113817.89,22.08340
2025-09-30 23:30:00,114012.54,114013.00,114031.43,113898.42,13.40764


In [5]:
prices = pd.DataFrame()
prices['close'] = btc_df.close
prices['open'] = btc_df.open
prices['high'] = btc_df.high
prices['low'] = btc_df.low
prices['volume'] = btc_df.volume
prices

Unnamed: 0_level_0,close,open,high,low,volume
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-01-01 00:00:00,42286.97,42274.27,42291.22,42255.49,0.42926
2024-01-01 00:01:00,42314.41,42293.21,42314.41,42293.21,0.07881
2024-01-01 00:02:00,42313.19,42311.52,42324.75,42310.81,0.75971
2024-01-01 00:03:00,42357.01,42321.14,42357.01,42321.14,0.35064
2024-01-01 00:04:00,42390.63,42357.02,42390.63,42357.02,1.99094
...,...,...,...,...,...
2025-09-30 23:55:00,114073.40,114073.37,114073.40,114073.36,0.82091
2025-09-30 23:56:00,114077.15,114073.40,114077.15,114073.40,0.01677
2025-09-30 23:57:00,114086.00,114077.15,114086.00,114077.15,0.87074
2025-09-30 23:58:00,114086.00,114086.00,114086.00,114085.99,0.13571


In [6]:
model = RandomizedSignatureDrift(rd=50, rm=0.00, rv=0.7, tw=60, alpha=0.1)
predicted_returns, _,_ = model.predict(prices)
print(predicted_returns)

[1 1 1 ... 1 1 1]
X_augmented shape:  920159 5


100%|██████████| 920099/920099 [09:27<00:00, 1619.92it/s]


Starting training loop from t=828089 to 920158...
[[ 1.48607550e+00  1.54218380e+00 -4.24180995e-01  9.48514709e-01
  -1.56504047e+00 -1.53293255e+00  1.26530020e+00  1.27857377e+00
   1.28985224e+00  1.25709763e+00  8.99427353e-01 -4.96626609e-01
   1.29046785e+00  7.66371204e-01  8.51438271e-01  8.83652474e-02
  -4.36300828e-01 -2.15518174e-02  1.28961773e+00  1.14035570e+00
   3.49380227e-01  1.16055773e+00 -1.28962391e+00  8.86396961e-01
   4.47588879e-01  3.72077708e-01 -1.54034030e+00 -8.72084649e-01
   1.30325195e+00  5.92822406e-01  1.52043515e+00 -7.93695730e-01
   9.53361513e-01 -1.19724797e-01  4.58771933e-01  7.51391087e-01
  -9.26709296e-02  4.73415915e-01  9.39575278e-02 -1.28957310e+00
   2.10570045e-01 -5.91402027e-01  1.53786451e+00  1.52438532e+00
  -7.08390271e-02  4.18704804e-01  7.00236151e-01 -1.54420660e+00
  -1.42566977e+00 -6.08637121e-01]
 [ 5.10031785e-01  2.55334705e-01 -3.27692830e-01  7.60000629e-01
  -3.33359396e-01 -1.93306578e-01 -3.65812165e-01  2.0449

In [13]:
prices = np.array(prices)
log_prices = np.log(prices / prices[0]) 
X_log_returns = np.diff(log_prices, axis=0) 

In [15]:
prices

array([ 42286.97,  42314.41,  42313.19, ..., 114086.  , 114086.  ,
       114090.  ])