In [23]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler

In [24]:
import numpy as np
from sklearn.linear_model import RidgeClassifier

class RandomizedSignatureDrift:
    def __init__(self, rd=50, rm=0.05, rv=0.03, alpha=1e-3, tw=120):
        self.rd = rd
        self.rm = rm
        self.rv = rv
        self.alpha = alpha
        self.tw = tw
        self.activation = np.tanh 

    def _generate_reservoir_system(self, input_dim):
        self.A = np.random.normal(loc=self.rm, scale=np.sqrt(self.rv), 
                                  size=(input_dim, self.rd, self.rd))
        
        self.b = np.random.normal(loc=0, scale=np.sqrt(self.rv), 
                                  size=(input_dim, self.rd))

    def compute_signatures(self, X_augmented):
        T, input_dim = X_augmented.shape
        R = np.zeros((T, self.rd))
        R[0] = np.random.normal(0, 1, self.rd)
        
        Y = []
        for k in tqdm(range(self.tw, T)):
            for t in range(self.tw):
                update_sum = np.zeros(self.rd)
                for i in range(input_dim):
                    proj = np.dot(self.A[i], R[t]) + self.b[i]
                    update_sum += self.activation(proj) * (X_augmented[k-self.tw+t+1, i]-X_augmented[k-self.tw +t, i])
                    
                R[t+1] = R[t] + update_sum
            #if k < self.tw+10:
                #print(R[self.tw])
            Y.append(R[self.tw].copy())
        return Y

    def predict(self, prices):
        vol = np.array(prices)[1:,4]
        prices = np.array(prices)[:,0:4]
        log_prices = np.log(prices / prices[0]) 
        X_log_returns = np.diff(log_prices, axis=0) 
        #X_log_returns_lag1 = np.zeros_like(X_log_returns)
        #X_log_returns_lag1[0] = X_log_returns[0]
        #X_log_returns_lag1[1:] = X_log_returns[0:-1]
        y_returns = np.where(X_log_returns[:,3]>0, 1,-1)
        T_len, _ = X_log_returns.shape
        time_index = np.arange(T_len).reshape(-1, 1) * (1.0/T_len) # Normalized time
        X_augmented = np.hstack([time_index, X_log_returns])
        
        self._generate_reservoir_system(input_dim=X_augmented.shape[1])
        
        Y = self.compute_signatures(X_augmented)
        Y_len = len(Y)
        
        t_s = int(Y_len * 9 / 10)
                
        print(f"Starting training loop from t={t_s} to {T_len-1}...")

        X_train = Y[0 : t_s-120]
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        #print(X_train[0:10])
        
        y_train = y_returns[self.tw + 1 : self.tw + 1 + t_s - 120]
        
        ridge = RidgeClassifier(alpha=self.alpha, fit_intercept=True)
        ridge.fit(X_train, y_train)
        
        X_test = Y[t_s : Y_len-1]
        X_test = scaler.transform(X_test)

        predictions = ridge.predict(X_test)
            
        y_true = y_returns[self.tw + t_s + 1 : ]
        ascore = accuracy_score(y_true, predictions)
        print('percentage of 1: ', sum(predictions>0)/len(predictions))
        print('percentage of 1 in y true: ', sum(y_true>0)/len(y_true))

        return ascore, y_true, predictions

In [26]:
for coin in ['BTCUSDC','ETHUSDC','SOLUSDC','XRPUSDC','BNBUSDC']: 
    print('coin: ',coin)       
    coin_df = pd.read_csv(f'{coin}-1m-2years.csv') 
    coin_df['timestamp'] = pd.to_datetime(coin_df['timestamp'],dayfirst=True )
    coin_df.set_index('timestamp', inplace=True)
    coin_df = coin_df.sort_index()
    RESAMPLE_FREQ = '15min'
    prices = pd.DataFrame()
    prices['close'] = coin_df.close.resample(RESAMPLE_FREQ).last()
    prices['open'] = coin_df.open.resample(RESAMPLE_FREQ).first()
    prices['high'] = coin_df.high.resample(RESAMPLE_FREQ).max()
    prices['low'] = coin_df.low.resample(RESAMPLE_FREQ).min()
    prices['volume'] = coin_df.volume.resample(RESAMPLE_FREQ).sum()
    model = RandomizedSignatureDrift(rd=50, rm=0.00, rv=0.7, tw=60, alpha=0.1)
    predicted_returns, _,_ = model.predict(prices)
    print('score for ',coin,': ',predicted_returns)

coin:  BTCUSDC


100%|██████████| 61283/61283 [00:40<00:00, 1496.14it/s]


Starting training loop from t=55154 to 61342...
percentage of 1:  0.5912206266318538
percentage of 1 in y true:  0.5057114882506527
score for  BTCUSDC :  0.6491514360313316
coin:  ETHUSDC


100%|██████████| 61283/61283 [00:38<00:00, 1580.20it/s]


Starting training loop from t=55154 to 61342...
percentage of 1:  0.6258159268929504
percentage of 1 in y true:  0.5352480417754569
score for  ETHUSDC :  0.6502937336814621
coin:  SOLUSDC


100%|██████████| 61283/61283 [00:38<00:00, 1600.34it/s]


Starting training loop from t=55154 to 61342...
percentage of 1:  0.5775130548302873
percentage of 1 in y true:  0.5275783289817232
score for  SOLUSDC :  0.6452349869451697
coin:  XRPUSDC


100%|██████████| 61283/61283 [00:39<00:00, 1542.88it/s]


Starting training loop from t=55154 to 61342...
percentage of 1:  0.5110966057441253
percentage of 1 in y true:  0.5148498694516971
score for  XRPUSDC :  0.6718342036553525
coin:  BNBUSDC


100%|██████████| 61283/61283 [00:38<00:00, 1608.76it/s]


Starting training loop from t=55154 to 61342...
percentage of 1:  0.5654373368146214
percentage of 1 in y true:  0.5321475195822454
score for  BNBUSDC :  0.6445822454308094
