In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [2]:
btc_df = pd.read_csv('BTCUSDC-1m-2years.csv') 
btc_df['timestamp'] = pd.to_datetime(btc_df['timestamp'],dayfirst=True )
btc_df.set_index('timestamp', inplace=True)

In [76]:
import numpy as np
from sklearn.linear_model import RidgeClassifier

class RandomizedSignatureDrift:
    def __init__(self, rd=50, rm=0.05, rv=0.03, alpha=1e-3, tw=120):
        self.rd = rd
        self.rm = rm
        self.rv = rv
        self.alpha = alpha
        self.tw = tw
        self.activation = np.tanh 

    def _generate_reservoir_system(self, input_dim):
        self.A = np.random.normal(loc=self.rm, scale=np.sqrt(self.rv), 
                                  size=(input_dim, self.rd, self.rd))
        
        self.b = np.random.normal(loc=0, scale=np.sqrt(self.rv), 
                                  size=(input_dim, self.rd))

    def compute_signatures(self, X_augmented):
        T, input_dim = X_augmented.shape
        print('X_augmented shape: ', T, input_dim)
        R = np.zeros((T, self.rd))
        R[0] = np.random.normal(0, 1, self.rd)
        
        Y = []
        for k in tqdm(range(self.tw, T)):
            for t in range(self.tw):
                update_sum = np.zeros(self.rd)
                for i in range(input_dim):
                    proj = np.dot(self.A[i], R[t]) + self.b[i]
                    update_sum += self.activation(proj) * (X_augmented[k-self.tw+t+1, i]-X_augmented[k-self.tw +t, i])
                    
                R[t+1] = R[t] + update_sum
            #if k < self.tw+10:
                #print(R[self.tw])
            Y.append(R[self.tw].copy())
        return Y

    def predict(self, prices):
        prices = np.array(prices)
        log_prices = np.log(prices / prices[0]) 
        X_log_returns = np.diff(log_prices, axis=0) 
        X_log_returns_lag1 = np.zeros_like(X_log_returns)
        X_log_returns_lag1[0] = X_log_returns[0]
        X_log_returns_lag1[1:] = X_log_returns[0:-1]
        y_returns = np.where(X_log_returns>0, 1,-1)
        T_len, _ = X_log_returns.shape
        print(y_returns)
        time_index = np.arange(T_len).reshape(-1, 1) #* (1.0/T_len) # Normalized time
        X_augmented = np.hstack([time_index, X_log_returns])
        
        self._generate_reservoir_system(input_dim=X_augmented.shape[1])
        
        Y = self.compute_signatures(X_augmented)
        Y_len = len(Y)
        
        t_s = int(Y_len * 9 / 10)
                
        print(f"Starting training loop from t={t_s} to {T_len-1}...")

        X_train = Y[0 : t_s-120]
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        print(X_train[0:10])
        
        y_train = y_returns[self.tw + 1 : self.tw + 1 + t_s - 120]
        
        ridge = RidgeClassifier(alpha=self.alpha, fit_intercept=True)
        ridge.fit(X_train, y_train)
        
        X_test = Y[t_s : Y_len-1]
        X_test = scaler.transform(X_test)

        predictions = ridge.predict(X_test)
            
        y_true = y_returns[self.tw + t_s + 1 : ]
        ascore = accuracy_score(y_true, predictions)
        print('percentage of 1: ', sum(predictions>0)/len(predictions))
        print('percentage of 1 in y true: ', sum(y_true>0)/len(y_true))

        return ascore, y_true, predictions

In [None]:
prices = btc_df.close.values[0:50000]
model = RandomizedSignatureDrift(rd=50, rm=0.00, rv=0.3, tw=120, alpha=0.1)
predicted_returns, _,_ = model.predict(prices[:,np.newaxis])
print(predicted_returns)

[[ 1]
 [-1]
 [ 1]
 ...
 [-1]
 [-1]
 [-1]]
X_augmented shape:  49999 2


100%|██████████| 49879/49879 [00:26<00:00, 1883.72it/s]

Starting training loop from t=44891 to 49998...
[[ 1.24805490e-01 -2.00224754e-02  9.79113757e-01  6.59826106e-02
  -2.81645423e-01 -9.58029409e-01  1.07349974e+00 -1.10094585e-02
   1.46161093e-02 -1.07406884e+00  7.86073512e-01  1.07385114e+00
   6.92356283e-02  2.18653081e-02 -2.22678112e-01  3.41684704e-02
   4.16743775e-03 -2.08943972e-01 -3.53094259e-01  7.95659900e-02
  -1.07001987e+00  4.85074490e-03  1.07221871e+00 -3.85336261e-02
   1.73125626e-01 -5.46398544e-01  1.92476861e-03  1.92773810e-01
   1.99278177e-01 -3.32199291e-01  8.71589905e-01  8.66179969e-02
  -2.83497974e-01 -6.11906915e-01 -1.07384838e+00 -1.07375940e+00
  -2.35718691e-01 -1.08952095e-01  6.32604647e-02 -1.00274596e-01
  -1.52577137e-01 -6.11935545e-01  1.07245876e+00  2.05491127e-01
  -7.77170430e-01 -4.22984617e-01 -1.46901721e+00 -1.24020330e+00
  -1.75461200e-01 -2.44131284e-02]
 [-1.19935984e-01 -3.51038915e-02  6.21998590e-01  2.51349511e-01
  -1.08652975e-01 -2.73062461e-01 -6.54881259e-02 -4.288548


  y = column_or_1d(y, warn=True)


In [13]:
prices = np.array(prices)
log_prices = np.log(prices / prices[0]) 
X_log_returns = np.diff(log_prices, axis=0) 

In [15]:
prices

array([ 42286.97,  42314.41,  42313.19, ..., 114086.  , 114086.  ,
       114090.  ])