In [1]:
from firebase import firebase
import json
import pandas as pd
import numpy as np
from collections import deque

import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression


from datetime import timedelta
import datetime
#import missingno as msno
import tensorflow as tf
import IPython 
import IPython.display
from scipy import stats

INFO:tensorflow:Enabling eager execution
INFO:tensorflow:Enabling v2 tensorshape
INFO:tensorflow:Enabling resource variables
INFO:tensorflow:Enabling tensor equality
INFO:tensorflow:Enabling control flow v2


# Test strateggy for 1 day (train on April 25 test on april 26)

## Train data (April 25)

In [2]:
my_firebase = firebase.FirebaseApplication('https://test-random-305921-default-rtdb.firebaseio.com/', None)
#trades=my_firebase.get('/websocket_trades_v1_2021-03-12', '')

start='2021-04-25'
end='2021-04-26'
l=pd.date_range(start,pd.to_datetime(end)-timedelta(days=1),freq='d')
print(l)

dates=[d.strftime('%Y-%m-%d') for d in l]
dall={}
for date in dates:
  trades=my_firebase.get('/websocket_trades_v1_'+date, '')
  dall.update(trades)

DatetimeIndex(['2021-04-25'], dtype='datetime64[ns]', freq='D')


## Test data (April 26)

In [36]:
my_firebase = firebase.FirebaseApplication('https://test-random-305921-default-rtdb.firebaseio.com/', None)
#trades=my_firebase.get('/websocket_trades_v1_2021-03-12', '')

start='2021-04-26'
end='2021-04-27'
l=pd.date_range(start,pd.to_datetime(end)-timedelta(days=1),freq='d')
print(l)

dates=[d.strftime('%Y-%m-%d') for d in l]
dall_test={}
for date in dates:
  trades=my_firebase.get('/websocket_trades_v1_'+date, '')
  dall_test.update(trades)

DatetimeIndex(['2021-04-26'], dtype='datetime64[ns]', freq='D')


## Class helper for testing investment startegies

In [34]:
#df already has all the data we need

class strategy():
    def __init__(self, window_size=100, feature='TFI', low_boundary=-7,condition='low', transaction_fees=.00012, wanted_margin=.00012):
        self.window_size=window_size
        self.feature=feature
        self.low_boundary=low_boundary
        self.condition=condition
        self.transaction_fees=transaction_fees
        self.wanted_margin=wanted_margin


    def preprocess(self, trades):
        df=pd.DataFrame(trades).T.reset_index(drop=True)
        #datatypes
        df.time=pd.to_datetime(df.time)
        df["price"] = df.price.astype(float)
        df["last_size"] = df['last_size'].astype(float)
        df["best_bid"] = df['best_bid'].astype(float)
        df["best_ask"] = df['best_ask'].astype(float)
        
        #create new relevant columns
        df['bid_ask_spread']=df['best_ask']-df['best_bid']
        df['tfi'] = np.where(df['side'] == 'buy', df['last_size'], -df['last_size'])

        #TFI is sum of tfis
        df['TFI']=df['tfi'].rolling(window=self.window_size).sum()
        df.head()
        return(df)

    def backtest(self, trades):
        df=self.preprocess(trades)
        trade_current=False
        prices_buy=[]
        prices_sell=[]

        for i in range(self.window_size, len(df)):
            if (df['TFI'][i]<self.low_boundary and not trade_current): #or (abs(tfi)<.3 and not trade_current):
                
                price_buy=df['best_ask'][i]
                prices_buy.append(price_buy)
                trade_current=True 
            
            #sell signal
            if trade_current and df['best_bid'][i]>(1+self.transaction_fees+ self.wanted_margin)*price_buy:
            #if trade_current and tfi>5:
                price_sell=df['best_bid'][i]
                prices_sell.append(price_sell)
                trade_current=False
            
        if len(prices_buy)==len(prices_sell):
            rois=np.array(prices_sell)/np.array(prices_buy)-self.transaction_fees
            total_roi=np.prod(rois)
        else: 
            prices_buy.pop(-1)
            rois=np.array(prices_sell)/np.array(prices_buy)-self.transaction_fees
            total_roi=np.prod(rois)
        return total_roi, rois, prices_buy,prices_sell

# A first investment strategy

In [5]:
strategy1=strategy(window_size=100)
strategy1.backtest(dall)

(1.004712160431377,
 array([1.00016081, 1.00014708, 1.00039404, 1.00014104, 1.0001853 ,
        1.00015987, 1.00024007, 1.00025002, 1.00013346, 1.0001625 ,
        1.00022505, 1.00017137, 1.00012396, 1.00013183, 1.00016986,
        1.0001258 , 1.0001219 , 1.00016626, 1.00016981, 1.00018304,
        1.00023157, 1.0001324 , 1.00017614, 1.00024435, 1.00022341,
        1.00013061]),
 [50353.8,
  50283.67,
  50016.01,
  49800.01,
  49820.35,
  49844.89,
  49852.1,
  49862.3,
  49594.28,
  49698.67,
  50050.01,
  50280.01,
  50295.0,
  50390.85,
  50403.55,
  50365.21,
  50351.59,
  50129.47,
  50274.99,
  50289.57,
  50146.19,
  50000.01,
  49976.0,
  49980.0,
  49998.22,
  49997.02],
 [50367.94,
  50297.1,
  50041.72,
  49813.01,
  49835.56,
  49858.84,
  49870.05,
  49880.75,
  49606.85,
  49712.71,
  50067.28,
  50294.66,
  50307.27,
  50403.54,
  50418.16,
  50377.59,
  50363.77,
  50143.82,
  50289.56,
  50304.81,
  50163.82,
  50012.63,
  49990.8,
  49998.21,
  50015.39,
  50009.55])

# Tune investment strategies

In [29]:
window_sizes_grid=np.arange(10,270,30)
low_boundaries_grid=np.arange(-15,-5,1)

In [35]:
strat=dict()
for window_size in window_sizes_grid:
    for low_boundary in low_boundaries_grid:
        strategy_instance=strategy(window_size=window_size, low_boundary=low_boundary)
        roi=strategy_instance.backtest(dall)[0]
        print(roi, window_size, low_boundary)
        strat[roi]={'window_size':window_size, 'low_boundary':low_boundary}

1.0008535141046524 10 -15
1.0008535141046524 10 -14
1.0008535141046524 10 -13
1.0009965120441051 10 -12
1.0011912982927977 10 -11
1.0014859817125883 10 -10
1.0017851711396006 10 -9
1.002105123244459 10 -8
1.0027854982252127 10 -7
1.0047811581995876 10 -6
1.0017686173392195 40 -15
1.0018907319464403 40 -14
1.0023356036597681 40 -13
1.003439406147874 40 -12
1.0029951437738798 40 -11
1.0035154958395245 40 -10
1.0038871037864696 40 -9
1.0052522269736708 40 -8
1.0049969626871884 40 -7
1.003826645876222 40 -6
1.002542425244445 70 -15
1.002857867971432 70 -14
1.0040403939949116 70 -13
1.0040646041317043 70 -12
1.0038208873742793 70 -11
1.0052967853545915 70 -10
1.0046025095443547 70 -9
1.0056496138139377 70 -8
1.006097344716119 70 -7
1.0042489077190733 70 -6
1.00332223740909 100 -15
1.0053212136541134 100 -14
1.0052384269980643 100 -13
1.0056564926640226 100 -12
1.0059736878924914 100 -11
1.0063345346146026 100 -10
1.0060525056279035 100 -9
1.005028383982726 100 -8
1.004712160431377 100 -7
1.

In [41]:
pd.DataFrame(strat).T.reset_index().rename({'index': 'roi'}, axis=1).sort_values(by='roi', ascending=False)

Unnamed: 0,roi,window_size,low_boundary
81,1.009585,250,-12
61,1.009392,190,-12
71,1.008757,220,-12
83,1.008634,250,-10
80,1.008575,250,-13
...,...,...,...
8,1.001769,40,-15
3,1.001486,10,-10
2,1.001191,10,-11
1,1.000997,10,-12


# Best strategy for April 25

In [44]:

best_strategy=strategy(window_size=250, low_boundary=-12)
best_strategy.backtest(dall)

(1.009585435987873,
 array([1.00012047, 1.00021451, 1.00034803, 1.00026728, 1.00013956,
        1.00015521, 1.00013346, 1.00017062, 1.00016093, 1.00015282,
        1.00014969, 1.00061129, 1.00016105, 1.00015331, 1.00024248,
        1.00026864, 1.00029787, 1.00015277, 1.0002899 , 1.00012083,
        1.0001219 , 1.00015262, 1.00012947, 1.00016626, 1.00038292,
        1.00019832, 1.00032847, 1.00023157, 1.0001324 , 1.00017614,
        1.00024435, 1.00022341, 1.00015952, 1.00035107, 1.00024141,
        1.00013526, 1.00022404, 1.00019504, 1.00041744, 1.00014529,
        1.00014848, 1.00013438, 1.00012865, 1.00016232, 1.00016808,
        1.00013146]),
 [50360.75,
  49983.78,
  50018.31,
  49757.7,
  49776.99,
  49816.1,
  49594.28,
  49997.4,
  50011.94,
  49482.72,
  49501.25,
  49514.91,
  49564.65,
  49578.22,
  49602.92,
  49429.25,
  49800.13,
  49821.45,
  49841.27,
  49868.97,
  50351.59,
  50363.78,
  50387.48,
  50129.47,
  50147.06,
  50169.51,
  50282.26,
  50146.19,
  50000.01,
 

In [47]:
a,b,c,d=best_strategy.backtest(dall_test)

In [53]:
df['TFI'].describe()

count    77922.000000
mean        -0.057107
std          3.818618
min        -31.709849
25%         -1.550465
50%          0.080349
75%          1.718433
max         26.332942
Name: TFI, dtype: float64

# Backtest best strategy for all days of data we have

In [64]:
my_firebase = firebase.FirebaseApplication('https://test-random-305921-default-rtdb.firebaseio.com/', None)
#trades=my_firebase.get('/websocket_trades_v1_2021-03-12', '')

start='2021-03-13'
end='2021-04-30'
l=pd.date_range(start,pd.to_datetime(end)-timedelta(days=1),freq='d')
print(l)

rois=dict()
dates=[d.strftime('%Y-%m-%d') for d in l]
for date in dates:
  trades=my_firebase.get('/websocket_trades_v1_'+date, '')
  #dall.update(trades)
  rois[date]=dict()
  a,b,c,d=best_strategy.backtest(trades)
  rois[date]['roi']=a
  rois[date]['n_trades']=len(b)
  print(rois)

DatetimeIndex(['2021-03-13', '2021-03-14', '2021-03-15', '2021-03-16',
               '2021-03-17', '2021-03-18', '2021-03-19', '2021-03-20',
               '2021-03-21', '2021-03-22', '2021-03-23', '2021-03-24',
               '2021-03-25', '2021-03-26', '2021-03-27', '2021-03-28',
               '2021-03-29', '2021-03-30', '2021-03-31', '2021-04-01',
               '2021-04-02', '2021-04-03', '2021-04-04', '2021-04-05',
               '2021-04-06', '2021-04-07', '2021-04-08', '2021-04-09',
               '2021-04-10', '2021-04-11', '2021-04-12', '2021-04-13',
               '2021-04-14', '2021-04-15', '2021-04-16', '2021-04-17',
               '2021-04-18', '2021-04-19', '2021-04-20', '2021-04-21',
               '2021-04-22', '2021-04-23', '2021-04-24', '2021-04-25',
               '2021-04-26', '2021-04-27', '2021-04-28', '2021-04-29'],
              dtype='datetime64[ns]', freq='D')
{'2021-03-13': {'roi': 1.0133107885914738, 'n_trades': 64}}
{'2021-03-13': {'roi': 1.01331078859147

In [65]:
results=pd.DataFrame(rois).T

In [66]:
results.to_csv('rois_best_strategy.csv')

In [67]:
results

Unnamed: 0,roi,n_trades
2021-03-13,1.013311,64.0
2021-03-14,1.001672,7.0
2021-03-15,1.001537,8.0
2021-03-16,1.009076,40.0
2021-03-17,1.007467,38.0
2021-03-18,1.008636,45.0
2021-03-19,1.008693,45.0
2021-03-20,1.009376,52.0
2021-03-21,1.003121,15.0
2021-03-22,1.006292,33.0


In [69]:
np.prod(results['roi'])

1.3634594167580398

In [58]:
test=dict()
test['2013-06-06']=dict()
test['2013-06-06']['roi']=1.01
test['2013-06-06']['n_trades']=50

In [57]:
a,b,c,d=best_strategy.backtest(trades)

In [60]:
pd.DataFrame(test).T

Unnamed: 0,n_trades,roi
2013-06-06,50.0,1.01
